]> git.donarmstrong.com Git - mothur.git/blob - shhhercommand.h
changed random forest output filename
[mothur.git] / shhhercommand.h
1 #ifndef SHHHER_H
2 #define SHHHER_H
3
4 /*
5  *  shhher.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 12/27/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "readcolumn.h"
16 #include "readmatrix.hpp"
17 #include "rabundvector.hpp"
18 #include "sabundvector.hpp"
19 #include "listvector.hpp"
20 #include "cluster.hpp"
21 #include <cfloat>
22
23 //**********************************************************************************************************************
24
25 #define NUMBINS 1000
26 #define HOMOPS 10
27 #define MIN_COUNT 0.1
28 #define MIN_WEIGHT 0.1
29 #define MIN_TAU 0.0001
30 #define MIN_ITER 10
31 //**********************************************************************************************************************
32
33 class ShhherCommand : public Command {
34         
35 public:
36         ShhherCommand(string);
37         ShhherCommand();
38         ~ShhherCommand() {}
39         
40         vector<string> setParameters();
41         string getCommandName()                 { return "shhh.flows";  }
42         string getCommandCategory()             { return "Sequence Processing";         }
43         
44         string getHelpString(); 
45     string getOutputPattern(string);    
46         string getCitation() { return "Schloss PD, Gevers D, Westcott SL (2011).  Reducing the effects of PCR amplification and sequencing artifacts on 16S rRNA-based studies.  PLoS ONE.  6:e27310.\nQuince C, Lanzen A, Davenport RJ, Turnbaugh PJ (2011).  Removing noise from pyrosequenced amplicons.  BMC Bioinformatics  12:38.\nQuince C, LanzĂ©n A, Curtis TP, Davenport RJ, Hall N, Head IM, Read LF, Sloan WT (2009).  Accurate determination of microbial diversity from 454 pyrosequencing data.  Nat. Methods 6:639.\nhttp://www.mothur.org/wiki/Shhh.flows"; }
47         string getDescription()         { return "shhh.flows"; }
48
49         
50         int execute(); 
51         void help() { m->mothurOut(getHelpString()); }          
52 private:
53         
54     struct linePair {
55                 int start;
56                 int end;
57                 linePair(int i, int j) : start(i), end(j) {}
58         };
59     
60         bool abort, large;
61         string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName, compositeNamesFileName;
62
63         int processors, maxIters, largeSize;
64         float cutoff, sigma, minDelta;
65         string flowOrder;
66     
67     vector<string> outputNames;
68         vector<double> singleLookUp;
69         vector<double> jointLookUp;
70     vector<string> flowFileVector;
71         
72     vector<string> parseFlowFiles(string);
73     int driver(vector<string>, string, string);
74     int createProcesses(vector<string>);
75     int getFlowData(string, vector<string>&, vector<int>&, vector<short>&, map<string, int>&, int&);
76     int getUniques(int, int, vector<short>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
77     int flowDistParentFork(int, string, int, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
78     float calcPairwiseDist(int, int, int, vector<int>&, vector<int>&, vector<double>&, vector<short>&);
79     int createNamesFile(int, int, string, vector<string>&, vector<int>&, vector<int>&);
80     int cluster(string, string, string);
81     int getOTUData(int numSeqs, string,  vector<int>&, vector<int>&, vector<int>&, vector<vector<int> >&, vector<vector<int> >&, vector<int>&, vector<int>&,map<string, int>&);
82     int calcCentroidsDriver(int numOTUs, vector<int>&, vector<int>&, vector<int>&, vector<short>&, vector<int>&, vector<double>&, vector<int>&, vector<short>&, vector<short>&, vector<int>&, int, vector<int>&);
83     double getDistToCentroid(int, int, int, vector<short>&, vector<short>&, int);
84     double getNewWeights(int, vector<int>&, vector<int>&, vector<double>&, vector<int>&, vector<double>&);
85     
86     double getLikelihood(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<double>&, vector<double>&);
87     int checkCentroids(int, vector<int>&, vector<double>&);
88     void calcNewDistances(int, int, vector<int>& , vector<double>&,vector<double>& , vector<short>& change, vector<int>&,vector<vector<int> >&, vector<double>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&, vector<short>&, int, vector<int>&);
89     int fill(int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<vector<int> >&, vector<vector<int> >&);
90     void setOTUs(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&,
91                  vector<int>&, vector<double>&, vector<double>&, vector<vector<int> >&, vector<vector<int> >&);
92     void writeQualities(int, int, string, vector<int>, vector<int>&, vector<int>&, vector<double>&, vector<short>&, vector<short>&, vector<int>&, vector<int>&, vector<string>&, vector<int>&, vector<vector<int> >&);
93     void writeSequences(string, int, int, string, vector<int>, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&);
94     void writeNames(string, int, string, vector<int>, vector<string>&, vector<vector<int> >&, vector<int>&);
95     void writeGroups(string, string, int, vector<string>&);
96     void writeClusters(string, int, int, vector<int>, vector<int>&, vector<short>&, vector<string>&, vector<vector<int> >&, vector<int>&, vector<int>&, vector<short>&);
97     
98         void getSingleLookUp();
99         void getJointLookUp();
100     double getProbIntensity(int);
101         
102         
103 #ifdef USE_MPI
104         string flowDistMPI(int, int);
105         void calcNewDistancesChildMPI(int, int, vector<int>&);
106
107         int pid, ncpus; 
108     
109      void getFlowData();
110      void getUniques();
111      
112      float calcPairwiseDist(int, int);
113      void flowDistParentFork(string, int, int);
114      
115      string createDistFile(int);
116      string createNamesFile();
117      string cluster(string, string);
118      
119      void getOTUData(string);
120      void initPyroCluster();
121      void fill();
122      void calcCentroids();
123      void calcCentroidsDriver(int, int);
124      double getDistToCentroid(int, int, int);
125      double getNewWeights();
126      double getLikelihood();
127      void checkCentroids();
128      void calcNewDistances();
129      void calcNewDistancesParent(int, int);
130      void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
131      
132      
133      void setOTUs();
134      void writeQualities(vector<int>);
135      void writeSequences(vector<int>);
136      void writeNames(vector<int>);
137      void writeGroups();
138      void writeClusters(vector<int>);
139     
140     vector<string> seqNameVector;
141         vector<int> lengths;
142         vector<short> flowDataIntI;
143         vector<double> flowDataPrI;
144         map<string, int> nameMap;
145         vector<int> otuData;
146         vector<int> cumNumSeqs;
147         vector<int> nSeqsPerOTU;
148         vector<vector<int> > aaP;       //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
149         vector<vector<int> > aaI;       //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
150         vector<int> seqNumber;          //tMaster->anP:         the sequence id number sorted by OTU
151         vector<int> seqIndex;           //tMaster->anI;         the index that corresponds to seqNumber
152         vector<double> dist;            //adDist - distance of sequences to centroids
153         vector<short> change;           //did the centroid sequence change? 0 = no; 1 = yes
154         vector<int> centroids;          //the representative flowgram for each cluster m
155         vector<double> weight;
156         vector<double> singleTau;       //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
157         vector<short> uniqueFlowgrams;
158         vector<int> uniqueCount;
159         vector<int> mapSeqToUnique;
160         vector<int> mapUniqueToSeq;
161         vector<int> uniqueLengths;
162     int numSeqs, numUniques, numOTUs, numFlowCells;
163     vector<int> nSeqsBreaks;
164         vector<int> nOTUsBreaks;
165
166 #endif
167         
168 };
169
170 /**************************************************************************************************
171 //custom data structure for threads to use.
172 // This is passed by void pointer so it can be any data type
173 // that can be passed using a single void pointer (LPVOID).
174 struct shhhFlowsData {
175         int threadID, maxIters;
176         float cutoff, sigma, minDelta;
177         string flowOrder;
178         vector<double> singleLookUp;
179         vector<double> jointLookUp;
180     vector<string> filenames;
181     vector<string> outputNames;
182     string thisCompositeFASTAFileName, thisCompositeNameFileName, outputDir;
183     int start, stop;
184         MothurOut* m;
185         
186         shhhFlowsData(){}
187         shhhFlowsData(vector<string> f, string cf, string cn, string ou, string flor, vector<double> jl, vector<double> sl, MothurOut* mout, int st, int sp, float cut, float si, float mD, int mx, int tid) {
188                 filenames = f;
189         thisCompositeFASTAFileName = cf;
190         thisCompositeNameFileName = cn;
191         outputDir = ou;
192         flowOrder = flor;
193                 m = mout;
194                 start = st;
195                 stop = sp;
196                 cutoff= cut;
197         sigma = si;
198         minDelta = mD;
199         maxIters = mx;
200         jointLookUp = jl;
201         singleLookUp = sl;
202                 threadID = tid;
203         }
204 };
205
206 /**************************************************************************************************
207 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
208 #else
209 static DWORD WINAPI ShhhFlowsThreadFunction(LPVOID lpParam){ 
210         shhhFlowsData* pDataArray;
211         pDataArray = (shhhFlowsData*)lpParam;
212         
213         try {
214         
215                 for(int l=pDataArray->start;l<pDataArray->stop;l++){
216                         
217                         if (pDataArray->m->control_pressed) { break; }
218                         
219                         string flowFileName = pDataArray->filenames[l];
220             
221                         pDataArray->m->mothurOut("\n>>>>>\tProcessing " + flowFileName + " (file " + toString(l+1) + " of " + toString(pDataArray->filenames.size()) + ")\t<<<<<\n");
222                         pDataArray->m->mothurOut("Reading flowgrams...\n");
223                         
224             vector<string> seqNameVector;
225             vector<int> lengths;
226             vector<short> flowDataIntI;
227             vector<double> flowDataPrI;
228             map<string, int> nameMap;
229             vector<short> uniqueFlowgrams;
230             vector<int> uniqueCount;
231             vector<int> mapSeqToUnique;
232             vector<int> mapUniqueToSeq;
233             vector<int> uniqueLengths;
234             int numFlowCells;
235             
236             //int numSeqs = getFlowData(flowFileName, seqNameVector, lengths, flowDataIntI, nameMap, numFlowCells);
237             /*****************************************************************************************************
238             
239             ifstream flowFile;
240            // cout << "herethread " << flowFileName << '\t' << &flowFile << endl;
241             pDataArray->m->openInputFile(flowFileName, flowFile);
242             
243            // cout << "herethread " << flowFileName << endl;
244             string seqName;
245             int currentNumFlowCells;
246             float intensity;
247                         
248             flowFile >> numFlowCells;
249             int index = 0;//pcluster
250             while(!flowFile.eof()){
251                 
252                 if (pDataArray->m->control_pressed) { flowFile.close(); return 0; }
253                 
254                 flowFile >> seqName >> currentNumFlowCells;
255                 lengths.push_back(currentNumFlowCells);
256              //  cout << "herethread " << seqName << endl;  
257                 seqNameVector.push_back(seqName);
258                 nameMap[seqName] = index++;//pcluster
259                 
260                 for(int i=0;i<numFlowCells;i++){
261                     flowFile >> intensity;
262                     if(intensity > 9.99)        {       intensity = 9.99;       }
263                     int intI = int(100 * intensity + 0.0001);
264                     flowDataIntI.push_back(intI);
265                 }
266                 pDataArray->m->gobble(flowFile);
267             }
268             flowFile.close();
269             
270             int numSeqs = seqNameVector.size();         
271           //  cout << numSeqs << endl;   
272             for(int i=0;i<numSeqs;i++){
273                 
274                 if (pDataArray->m->control_pressed) { return 0; }
275                 
276                 int iNumFlowCells = i * numFlowCells;
277                 for(int j=lengths[i];j<numFlowCells;j++){
278                     flowDataIntI[iNumFlowCells + j] = 0;
279                 }
280             }
281           //  cout << "here" << endl; 
282             /*****************************************************************************************************
283         
284                         if (pDataArray->m->control_pressed) { return 0; }
285                         
286                         pDataArray->m->mothurOut("Identifying unique flowgrams...\n");
287                         //int numUniques = getUniques(numSeqs, numFlowCells, uniqueFlowgrams, uniqueCount, uniqueLengths, mapSeqToUnique, mapUniqueToSeq, lengths, flowDataPrI, flowDataIntI);
288             /*****************************************************************************************************
289             int numUniques = 0;
290             uniqueFlowgrams.assign(numFlowCells * numSeqs, -1);
291             uniqueCount.assign(numSeqs, 0);                                                     //      anWeights
292             uniqueLengths.assign(numSeqs, 0);
293             mapSeqToUnique.assign(numSeqs, -1);
294             mapUniqueToSeq.assign(numSeqs, -1);
295             
296             vector<short> uniqueFlowDataIntI(numFlowCells * numSeqs, -1);
297             
298             for(int i=0;i<numSeqs;i++){
299                 
300                 if (pDataArray->m->control_pressed) { return 0; }
301                 
302                 int index = 0;
303                 
304                 vector<short> current(numFlowCells);
305                 for(int j=0;j<numFlowCells;j++){
306                     current[j] = short(((flowDataIntI[i * numFlowCells + j] + 50.0)/100.0));
307                 }
308                 
309                 for(int j=0;j<numUniques;j++){
310                     int offset = j * numFlowCells;
311                     bool toEnd = 1;
312                     
313                     int shorterLength;
314                     if(lengths[i] < uniqueLengths[j])   {       shorterLength = lengths[i];                     }
315                     else                                                                {       shorterLength = uniqueLengths[j];       }
316                     
317                     for(int k=0;k<shorterLength;k++){
318                         if(current[k] != uniqueFlowgrams[offset + k]){
319                             toEnd = 0;
320                             break;
321                         }
322                     }
323                     
324                     if(toEnd){
325                         mapSeqToUnique[i] = j;
326                         uniqueCount[j]++;
327                         index = j;
328                         if(lengths[i] > uniqueLengths[j])       {       uniqueLengths[j] = lengths[i];  }
329                         break;
330                     }
331                     index++;
332                 }
333                 
334                 if(index == numUniques){
335                     uniqueLengths[numUniques] = lengths[i];
336                     uniqueCount[numUniques] = 1;
337                     mapSeqToUnique[i] = numUniques;//anMap
338                     mapUniqueToSeq[numUniques] = i;//anF
339                     
340                     for(int k=0;k<numFlowCells;k++){
341                         uniqueFlowgrams[numUniques * numFlowCells + k] = current[k];
342                         uniqueFlowDataIntI[numUniques * numFlowCells + k] = flowDataIntI[i * numFlowCells + k];
343                     }
344                     
345                     numUniques++;
346                 }
347             }
348             uniqueFlowDataIntI.resize(numFlowCells * numUniques);
349             uniqueLengths.resize(numUniques);   
350             
351             flowDataPrI.resize(numSeqs * numFlowCells, 0);
352             for(int i=0;i<flowDataPrI.size();i++)       {       
353                 if (pDataArray->m->control_pressed) { return 0; } 
354                 //flowDataPrI[i] = getProbIntensity(flowDataIntI[i]);   
355                 
356                 flowDataPrI[i] = 100000000; 
357             
358                 for(int j=0;j<HOMOPS;j++){//loop signal strength
359                     
360                     if (pDataArray->m->control_pressed) { return 0; }
361                     
362                     float negLogProb = pDataArray->singleLookUp[j * NUMBINS + flowDataIntI[i]];
363                     if(negLogProb < flowDataPrI[i])     {       flowDataPrI[i] = negLogProb; }
364                 }
365             }            
366             
367             /*****************************************************************************************************
368                         
369                         if (pDataArray->m->control_pressed) { return 0; }
370                         
371                         pDataArray->m->mothurOut("Calculating distances between flowgrams...\n");
372             string distFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.dist";
373             unsigned long long begTime = time(NULL);
374             double begClock = clock();
375             
376             //flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);   
377             /*****************************************************************************************************
378             ostringstream outStream;
379             outStream.setf(ios::fixed, ios::floatfield);
380             outStream.setf(ios::dec, ios::basefield);
381             outStream.setf(ios::showpoint);
382             outStream.precision(6);
383             
384             int thisbegTime = time(NULL);
385             double thisbegClock = clock();
386             
387             for(int i=0;i<numUniques;i++){
388                 
389                 if (pDataArray->m->control_pressed) { break; }
390                 
391                 for(int j=0;j<i;j++){
392                     //float flowDistance = calcPairwiseDist(numFlowCells, mapUniqueToSeq[i], mapUniqueToSeq[j], mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
393                     /*****************************************************************************************************
394                     int seqA = mapUniqueToSeq[i]; int seqB = mapUniqueToSeq[j];
395                     int minLength = lengths[mapSeqToUnique[seqA]];
396                     if(lengths[seqB] < minLength){      minLength = lengths[mapSeqToUnique[seqB]];      }
397                     
398                     int ANumFlowCells = seqA * numFlowCells;
399                     int BNumFlowCells = seqB * numFlowCells;
400                     
401                     float flowDistance = 0;
402                     
403                     for(int k=0;k<minLength;k++){
404                         
405                         if (pDataArray->m->control_pressed) { break; }
406                         
407                         int flowAIntI = flowDataIntI[ANumFlowCells + k];
408                         float flowAPrI = flowDataPrI[ANumFlowCells + k];
409                         
410                         int flowBIntI = flowDataIntI[BNumFlowCells + k];
411                         float flowBPrI = flowDataPrI[BNumFlowCells + k];
412                         flowDistance += pDataArray->jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
413                     }
414                     
415                     flowDistance /= (float) minLength;
416                     /*****************************************************************************************************
417
418                     if(flowDistance < 1e-6){
419                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
420                     }
421                     else if(flowDistance <= pDataArray->cutoff){
422                         outStream << mapUniqueToSeq[i] << '\t' << mapUniqueToSeq[j] << '\t' << flowDistance << endl;
423                     }
424                 }
425                 if(i % 100 == 0){
426                     pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - thisbegTime));
427                     pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
428                     pDataArray->m->mothurOutEndLine();
429                 }
430             }
431             
432             ofstream distFile(distFileName.c_str());
433             distFile << outStream.str();                
434             distFile.close();
435             
436             if (pDataArray->m->control_pressed) {}
437             else {
438                 pDataArray->m->mothurOut(toString(numUniques-1) + "\t" + toString(time(NULL) - thisbegTime));
439                 pDataArray->m->mothurOut("\t" + toString((clock()-thisbegClock)/CLOCKS_PER_SEC));
440                 pDataArray->m->mothurOutEndLine();
441             }
442             /*****************************************************************************************************
443
444             pDataArray->m->mothurOutEndLine();
445             pDataArray->m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
446             
447                         string namesFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
448                         //createNamesFile(numSeqs, numUniques, namesFileName, seqNameVector, mapSeqToUnique, mapUniqueToSeq);
449             /*****************************************************************************************************
450             vector<string> duplicateNames(numUniques, "");
451             for(int i=0;i<numSeqs;i++){
452                 duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
453             }
454             
455             ofstream nameFile;
456             pDataArray->m->openOutputFile(namesFileName, nameFile);
457             
458             for(int i=0;i<numUniques;i++){
459                 if (pDataArray->m->control_pressed) { nameFile.close(); return 0; }
460                 nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
461             }
462             nameFile.close();
463             /*****************************************************************************************************
464
465                         if (pDataArray->m->control_pressed) { return 0; }
466                         
467                         pDataArray->m->mothurOut("\nClustering flowgrams...\n");
468             string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.list";
469                         //cluster(listFileName, distFileName, namesFileName);
470             /*****************************************************************************************************
471             ReadMatrix* read = new ReadColumnMatrix(distFileName);      
472             read->setCutoff(pDataArray->cutoff);
473             
474             NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
475             clusterNameMap->readMap();
476             read->read(clusterNameMap);
477             
478             ListVector* list = read->getListVector();
479             SparseMatrix* matrix = read->getMatrix();
480             
481             delete read; 
482             delete clusterNameMap; 
483             
484             RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
485             
486             Cluster* cluster = new CompleteLinkage(rabund, list, matrix, pDataArray->cutoff, "furthest"); 
487             string tag = cluster->getTag();
488             
489             double clusterCutoff = pDataArray->cutoff;
490             while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){
491                 
492                 if (pDataArray->m->control_pressed) { break; }
493                 
494                 cluster->update(clusterCutoff);
495             }
496             
497             list->setLabel(toString(pDataArray->cutoff));
498             
499             ofstream listFileOut;
500             pDataArray->m->openOutputFile(listFileName, listFileOut);
501             list->print(listFileOut);
502             listFileOut.close();
503             
504             delete matrix;      delete cluster; delete rabund; delete list;
505             /*****************************************************************************************************
506
507                         if (pDataArray->m->control_pressed) { return 0; }
508             
509             vector<int> otuData;
510             vector<int> cumNumSeqs;
511             vector<int> nSeqsPerOTU;
512             vector<vector<int> > aaP;   //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
513             vector<vector<int> > aaI;   //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
514             vector<int> seqNumber;              //tMaster->anP:         the sequence id number sorted by OTU
515             vector<int> seqIndex;               //tMaster->anI;         the index that corresponds to seqNumber
516             
517                         
518                         //int numOTUs = getOTUData(numSeqs, listFileName, otuData, cumNumSeqs, nSeqsPerOTU, aaP, aaI, seqNumber, seqIndex, nameMap);
519                         /*****************************************************************************************************
520             ifstream listFile;
521             pDataArray->m->openInputFile(listFileName, listFile);
522             string label;
523             int numOTUs;
524             
525             listFile >> label >> numOTUs;
526             
527             otuData.assign(numSeqs, 0);
528             cumNumSeqs.assign(numOTUs, 0);
529             nSeqsPerOTU.assign(numOTUs, 0);
530             aaP.clear();aaP.resize(numOTUs);
531             
532             seqNumber.clear();
533             aaI.clear();
534             seqIndex.clear();
535             
536             string singleOTU = "";
537             
538             for(int i=0;i<numOTUs;i++){
539                 
540                 if (pDataArray->m->control_pressed) { break; }
541                 
542                 listFile >> singleOTU;
543                 
544                 istringstream otuString(singleOTU);
545                 
546                 while(otuString){
547                     
548                     string seqName = "";
549                     
550                     for(int j=0;j<singleOTU.length();j++){
551                         char letter = otuString.get();
552                         
553                         if(letter != ','){
554                             seqName += letter;
555                         }
556                         else{
557                             map<string,int>::iterator nmIt = nameMap.find(seqName);
558                             int index = nmIt->second;
559                             
560                             nameMap.erase(nmIt);
561                             
562                             otuData[index] = i;
563                             nSeqsPerOTU[i]++;
564                             aaP[i].push_back(index);
565                             seqName = "";
566                         }
567                     }
568                     
569                     map<string,int>::iterator nmIt = nameMap.find(seqName);
570                     
571                     int index = nmIt->second;
572                     nameMap.erase(nmIt);
573                     
574                     otuData[index] = i;
575                     nSeqsPerOTU[i]++;
576                     aaP[i].push_back(index);    
577                     
578                     otuString.get();
579                 }
580                 
581                 sort(aaP[i].begin(), aaP[i].end());
582                 for(int j=0;j<nSeqsPerOTU[i];j++){
583                     seqNumber.push_back(aaP[i][j]);
584                 }
585                 for(int j=nSeqsPerOTU[i];j<numSeqs;j++){
586                     aaP[i].push_back(0);
587                 }
588                 
589                 
590             }
591             
592             for(int i=1;i<numOTUs;i++){
593                 cumNumSeqs[i] = cumNumSeqs[i-1] + nSeqsPerOTU[i-1];
594             }
595             aaI = aaP;
596             seqIndex = seqNumber;
597             
598             listFile.close();       
599             /*****************************************************************************************************
600
601                         if (pDataArray->m->control_pressed) { return 0; }
602                         
603                         pDataArray->m->mothurRemove(distFileName);
604                         pDataArray->m->mothurRemove(namesFileName);
605                         pDataArray->m->mothurRemove(listFileName);
606                         
607             vector<double> dist;                //adDist - distance of sequences to centroids
608             vector<short> change;               //did the centroid sequence change? 0 = no; 1 = yes
609             vector<int> centroids;              //the representative flowgram for each cluster m
610             vector<double> weight;
611             vector<double> singleTau;   //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
612             vector<int> nSeqsBreaks;
613             vector<int> nOTUsBreaks;
614             
615                         dist.assign(numSeqs * numOTUs, 0);
616             change.assign(numOTUs, 1);
617             centroids.assign(numOTUs, -1);
618             weight.assign(numOTUs, 0);
619             singleTau.assign(numSeqs, 1.0);
620             
621             nSeqsBreaks.assign(2, 0);
622             nOTUsBreaks.assign(2, 0);
623             
624             nSeqsBreaks[0] = 0;
625             nSeqsBreaks[1] = numSeqs;
626             nOTUsBreaks[1] = numOTUs;
627                         
628                         if (pDataArray->m->control_pressed) { break; }
629                         
630                         double maxDelta = 0;
631                         int iter = 0;
632                         
633                         begClock = clock();
634                         begTime = time(NULL);
635             
636                         pDataArray->m->mothurOut("\nDenoising flowgrams...\n");
637                         pDataArray->m->mothurOut("iter\tmaxDelta\tnLL\t\tcycletime\n");
638                         
639                         while((pDataArray->maxIters == 0 && maxDelta > pDataArray->minDelta) || iter < MIN_ITER || (maxDelta > pDataArray->minDelta && iter < pDataArray->maxIters)){
640                                 
641                                 if (pDataArray->m->control_pressed) { break; }
642                                 
643                                 double cycClock = clock();
644                                 unsigned long long cycTime = time(NULL);
645                                 //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
646                 /*****************************************************************************************************
647                 int indexFill = 0;
648                 for(int i=0;i<numOTUs;i++){
649                     
650                     if (pDataArray->m->control_pressed) { return 0; }
651                     
652                     cumNumSeqs[i] = indexFill;
653                     for(int j=0;j<nSeqsPerOTU[i];j++){
654                         seqNumber[indexFill] = aaP[i][j];
655                         seqIndex[indexFill] = aaI[i][j];
656                         
657                         indexFill++;
658                     }
659                 }
660                 /*****************************************************************************************************
661
662                                 
663                                 if (pDataArray->m->control_pressed) { break; }
664                 
665                                 //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);
666                 /*****************************************************************************************************
667                 for(int i=0;i<numOTUs;i++){
668                     
669                     if (pDataArray->m->control_pressed) { break; }
670                     
671                     double count = 0;
672                     int position = 0;
673                     int minFlowGram = 100000000;
674                     double minFlowValue = 1e8;
675                     change[i] = 0; //FALSE
676                     
677                     for(int j=0;j<nSeqsPerOTU[i];j++){
678                         count += singleTau[seqNumber[cumNumSeqs[i] + j]];
679                     }
680                     
681                     if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
682                         vector<double> adF(nSeqsPerOTU[i]);
683                         vector<int> anL(nSeqsPerOTU[i]);
684                         
685                         for(int j=0;j<nSeqsPerOTU[i];j++){
686                             int index = cumNumSeqs[i] + j;
687                             int nI = seqIndex[index];
688                             int nIU = mapSeqToUnique[nI];
689                             
690                             int k;
691                             for(k=0;k<position;k++){
692                                 if(nIU == anL[k]){
693                                     break;
694                                 }
695                             }
696                             if(k == position){
697                                 anL[position] = nIU;
698                                 adF[position] = 0.0000;
699                                 position++;
700                             }                                           
701                         }
702                         
703                         for(int j=0;j<nSeqsPerOTU[i];j++){
704                             int index = cumNumSeqs[i] + j;
705                             int nI = seqIndex[index];
706                             
707                             double tauValue = singleTau[seqNumber[index]];
708                             
709                             for(int k=0;k<position;k++){
710                                // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
711                                 /*****************************************************************************************************
712                                 int flowAValue = anL[k] * numFlowCells;
713                                 int flowBValue = nI * numFlowCells;
714                                 
715                                 double dist = 0;
716                                 
717                                 for(int l=0;l<lengths[nI];l++){
718                                     dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
719                                     flowAValue++;
720                                     flowBValue++;
721                                 }
722                                 
723                                 dist = dist / (double)lengths[nI];
724                                 /*****************************************************************************************************
725                                 adF[k] += dist * tauValue;
726                             }
727                         }
728                         
729                         for(int j=0;j<position;j++){
730                             if(adF[j] < minFlowValue){
731                                 minFlowGram = j;
732                                 minFlowValue = adF[j];
733                             }
734                         }
735                         
736                         if(centroids[i] != anL[minFlowGram]){
737                             change[i] = 1;
738                             centroids[i] = anL[minFlowGram];
739                         }
740                     }
741                     else if(centroids[i] != -1){
742                         change[i] = 1;
743                         centroids[i] = -1;                      
744                     }
745                 }
746                 /*****************************************************************************************************
747
748                                 if (pDataArray->m->control_pressed) { break; }
749                 
750                                 //maxDelta = getNewWeights(numOTUs, cumNumSeqs, nSeqsPerOTU, singleTau, seqNumber, weight);  
751                 /*****************************************************************************************************
752                 double maxChange = 0;
753                 
754                 for(int i=0;i<numOTUs;i++){
755                     
756                     if (pDataArray->m->control_pressed) { break; }
757                     
758                     double difference = weight[i];
759                     weight[i] = 0;
760                     
761                     for(int j=0;j<nSeqsPerOTU[i];j++){
762                         int index = cumNumSeqs[i] + j;
763                         double tauValue = singleTau[seqNumber[index]];
764                         weight[i] += tauValue;
765                     }
766                     
767                     difference = fabs(weight[i] - difference);
768                     if(difference > maxChange){ maxChange = difference; }
769                 }
770                 maxDelta = maxChange;
771                 /*****************************************************************************************************
772
773                 if (pDataArray->m->control_pressed) { break; }
774                 
775                                 //double nLL = getLikelihood(numSeqs, numOTUs, nSeqsPerOTU, seqNumber, cumNumSeqs, seqIndex, dist, weight); 
776                 /*****************************************************************************************************
777                 vector<long double> P(numSeqs, 0);
778                 int effNumOTUs = 0;
779                 
780                 for(int i=0;i<numOTUs;i++){
781                     if(weight[i] > MIN_WEIGHT){
782                         effNumOTUs++;
783                     }
784                 }
785                 
786                 string hold;
787                 for(int i=0;i<numOTUs;i++){
788                     
789                     if (pDataArray->m->control_pressed) { break; }
790                     
791                     for(int j=0;j<nSeqsPerOTU[i];j++){
792                         int index = cumNumSeqs[i] + j;
793                         int nI = seqIndex[index];
794                         double singleDist = dist[seqNumber[index]];
795                         
796                         P[nI] += weight[i] * exp(-singleDist * pDataArray->sigma);
797                     }
798                 }
799                 double nLL = 0.00;
800                 for(int i=0;i<numSeqs;i++){
801                     if(P[i] == 0){      P[i] = DBL_EPSILON;     }
802                     
803                     nLL += -log(P[i]);
804                 }
805                 
806                 nLL = nLL -(double)numSeqs * log(pDataArray->sigma);
807                 /*****************************************************************************************************
808
809                 if (pDataArray->m->control_pressed) { break; }
810                 
811                                 //checkCentroids(numOTUs, centroids, weight);
812                 /*****************************************************************************************************
813                 vector<int> unique(numOTUs, 1);
814                 
815                 for(int i=0;i<numOTUs;i++){
816                     if(centroids[i] == -1 || weight[i] < MIN_WEIGHT){
817                         unique[i] = -1;
818                     }
819                 }
820                 
821                 for(int i=0;i<numOTUs;i++){
822                     
823                     if (pDataArray->m->control_pressed) { break; }
824                     
825                     if(unique[i] == 1){
826                         for(int j=i+1;j<numOTUs;j++){
827                             if(unique[j] == 1){
828                                 
829                                 if(centroids[j] == centroids[i]){
830                                     unique[j] = 0;
831                                     centroids[j] = -1;
832                                     
833                                     weight[i] += weight[j];
834                                     weight[j] = 0.0;
835                                 }
836                             }
837                         }
838                     }
839                 }
840                 /*****************************************************************************************************
841
842                                 if (pDataArray->m->control_pressed) { break; }
843                                 
844                                 //calcNewDistances(numSeqs, numOTUs, nSeqsPerOTU,  dist, weight, change, centroids, aaP, singleTau, aaI, seqNumber, seqIndex, uniqueFlowgrams, flowDataIntI, numFlowCells, lengths);
845                 /*****************************************************************************************************
846                 int total = 0;
847                 vector<double> newTau(numOTUs,0);
848                 vector<double> norms(numSeqs, 0);
849                 nSeqsPerOTU.assign(numOTUs, 0);
850                 
851                 for(int i=0;i<numSeqs;i++){
852                     
853                     if (pDataArray->m->control_pressed) { break; }
854                     
855                     int indexOffset = i * numOTUs;
856                     
857                     double offset = 1e8;
858                     
859                     for(int j=0;j<numOTUs;j++){
860                         
861                         if(weight[j] > MIN_WEIGHT && change[j] == 1){
862                             //dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i], uniqueFlowgrams, flowDataIntI, numFlowCells);
863                             /*****************************************************************************************************
864                             int flowAValue = centroids[j] * numFlowCells;
865                             int flowBValue = i * numFlowCells;
866                             
867                             double distTemp = 0;
868                             
869                             for(int l=0;l<lengths[i];l++){
870                                 distTemp += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
871                                 flowAValue++;
872                                 flowBValue++;
873                             }
874                             
875                             dist[indexOffset + j] = distTemp / (double)lengths[i];
876                             /*****************************************************************************************************
877
878                         }
879                         
880                         if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){
881                             offset = dist[indexOffset + j];
882                         }
883                     }
884                     
885                     for(int j=0;j<numOTUs;j++){
886                         if(weight[j] > MIN_WEIGHT){
887                             newTau[j] = exp(pDataArray->sigma * (-dist[indexOffset + j] + offset)) * weight[j];
888                             norms[i] += newTau[j];
889                         }
890                         else{
891                             newTau[j] = 0.0;
892                         }
893                     }
894                     
895                     for(int j=0;j<numOTUs;j++){
896                         newTau[j] /= norms[i];
897                     }
898                     
899                     for(int j=0;j<numOTUs;j++){
900                         if(newTau[j] > MIN_TAU){
901                             
902                             int oldTotal = total;
903                             
904                             total++;
905                             
906                             singleTau.resize(total, 0);
907                             seqNumber.resize(total, 0);
908                             seqIndex.resize(total, 0);
909                             
910                             singleTau[oldTotal] = newTau[j];
911                             
912                             aaP[j][nSeqsPerOTU[j]] = oldTotal;
913                             aaI[j][nSeqsPerOTU[j]] = i;
914                             nSeqsPerOTU[j]++;
915                         }
916                     }
917                     
918                 }
919
920                 /*****************************************************************************************************
921
922                                 if (pDataArray->m->control_pressed) { break; }
923                                 
924                                 iter++;
925                                 
926                                 pDataArray->m->mothurOut(toString(iter) + '\t' + toString(maxDelta) + '\t' + toString(nLL) + '\t' + toString(time(NULL) - cycTime) + '\t' + toString((clock() - cycClock)/(double)CLOCKS_PER_SEC) + '\n');
927                 
928                         }       
929                         
930                         if (pDataArray->m->control_pressed) { break; }
931                         
932                         pDataArray->m->mothurOut("\nFinalizing...\n");
933                         //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);
934             /*****************************************************************************************************
935             int indexFill = 0;
936             for(int i=0;i<numOTUs;i++){
937                 
938                 if (pDataArray->m->control_pressed) { return 0; }
939                 
940                 cumNumSeqs[i] = indexFill;
941                 for(int j=0;j<nSeqsPerOTU[i];j++){
942                     seqNumber[indexFill] = aaP[i][j];
943                     seqIndex[indexFill] = aaI[i][j];
944                     
945                     indexFill++;
946                 }
947             }
948             /*****************************************************************************************************
949
950                         if (pDataArray->m->control_pressed) { break; }
951                         
952                         //setOTUs(numOTUs, numSeqs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, otuData, singleTau, dist, aaP, aaI);
953             /*****************************************************************************************************
954             vector<double> bigTauMatrix(numOTUs * numSeqs, 0.0000);
955             
956             for(int i=0;i<numOTUs;i++){
957                 
958                 if (pDataArray->m->control_pressed) { break; }
959                 
960                 for(int j=0;j<nSeqsPerOTU[i];j++){
961                     int index = cumNumSeqs[i] + j;
962                     double tauValue = singleTau[seqNumber[index]];
963                     int sIndex = seqIndex[index];
964                     bigTauMatrix[sIndex * numOTUs + i] = tauValue;                              
965                 }
966             }
967             
968             for(int i=0;i<numSeqs;i++){
969                 double maxTau = -1.0000;
970                 int maxOTU = -1;
971                 for(int j=0;j<numOTUs;j++){
972                     if(bigTauMatrix[i * numOTUs + j] > maxTau){
973                         maxTau = bigTauMatrix[i * numOTUs + j];
974                         maxOTU = j;
975                     }
976                 }
977                 
978                 otuData[i] = maxOTU;
979             }
980             
981             nSeqsPerOTU.assign(numOTUs, 0);             
982             
983             for(int i=0;i<numSeqs;i++){
984                 int index = otuData[i];
985                 
986                 singleTau[i] = 1.0000;
987                 dist[i] = 0.0000;
988                 
989                 aaP[index][nSeqsPerOTU[index]] = i;
990                 aaI[index][nSeqsPerOTU[index]] = i;
991                 
992                 nSeqsPerOTU[index]++;
993             }
994             
995             //fill(numOTUs, seqNumber, seqIndex, cumNumSeqs, nSeqsPerOTU, aaP, aaI);    
996             /*****************************************************************************************************
997             indexFill = 0;
998             for(int i=0;i<numOTUs;i++){
999                 
1000                 if (pDataArray->m->control_pressed) { return 0; }
1001                 
1002                 cumNumSeqs[i] = indexFill;
1003                 for(int j=0;j<nSeqsPerOTU[i];j++){
1004                     seqNumber[indexFill] = aaP[i][j];
1005                     seqIndex[indexFill] = aaI[i][j];
1006                     
1007                     indexFill++;
1008                 }
1009             }
1010             /*****************************************************************************************************/
1011
1012             /*****************************************************************************************************
1013
1014                         if (pDataArray->m->control_pressed) { break; }
1015                         
1016                         vector<int> otuCounts(numOTUs, 0);
1017                         for(int i=0;i<numSeqs;i++)      {       otuCounts[otuData[i]]++;        }
1018                         
1019                         //calcCentroidsDriver(numOTUs, cumNumSeqs, nSeqsPerOTU, seqIndex, change, centroids, singleTau, mapSeqToUnique, uniqueFlowgrams, flowDataIntI, lengths, numFlowCells, seqNumber);       
1020             /*****************************************************************************************************
1021             for(int i=0;i<numOTUs;i++){
1022                 
1023                 if (pDataArray->m->control_pressed) { break; }
1024                 
1025                 double count = 0;
1026                 int position = 0;
1027                 int minFlowGram = 100000000;
1028                 double minFlowValue = 1e8;
1029                 change[i] = 0; //FALSE
1030                 
1031                 for(int j=0;j<nSeqsPerOTU[i];j++){
1032                     count += singleTau[seqNumber[cumNumSeqs[i] + j]];
1033                 }
1034                 
1035                 if(nSeqsPerOTU[i] > 0 && count > MIN_COUNT){
1036                     vector<double> adF(nSeqsPerOTU[i]);
1037                     vector<int> anL(nSeqsPerOTU[i]);
1038                     
1039                     for(int j=0;j<nSeqsPerOTU[i];j++){
1040                         int index = cumNumSeqs[i] + j;
1041                         int nI = seqIndex[index];
1042                         int nIU = mapSeqToUnique[nI];
1043                         
1044                         int k;
1045                         for(k=0;k<position;k++){
1046                             if(nIU == anL[k]){
1047                                 break;
1048                             }
1049                         }
1050                         if(k == position){
1051                             anL[position] = nIU;
1052                             adF[position] = 0.0000;
1053                             position++;
1054                         }                                               
1055                     }
1056                     
1057                     for(int j=0;j<nSeqsPerOTU[i];j++){
1058                         int index = cumNumSeqs[i] + j;
1059                         int nI = seqIndex[index];
1060                         
1061                         double tauValue = singleTau[seqNumber[index]];
1062                         
1063                         for(int k=0;k<position;k++){
1064                             // double dist = getDistToCentroid(anL[k], nI, lengths[nI], uniqueFlowgrams, flowDataIntI, numFlowCells);
1065                             /*****************************************************************************************************
1066                             int flowAValue = anL[k] * numFlowCells;
1067                             int flowBValue = nI * numFlowCells;
1068                             
1069                             double dist = 0;
1070                             
1071                             for(int l=0;l<lengths[nI];l++){
1072                                 dist += pDataArray->singleLookUp[uniqueFlowgrams[flowAValue] * NUMBINS + flowDataIntI[flowBValue]];
1073                                 flowAValue++;
1074                                 flowBValue++;
1075                             }
1076                             
1077                             dist = dist / (double)lengths[nI];
1078                             /*****************************************************************************************************
1079                             adF[k] += dist * tauValue;
1080                         }
1081                     }
1082                     
1083                     for(int j=0;j<position;j++){
1084                         if(adF[j] < minFlowValue){
1085                             minFlowGram = j;
1086                             minFlowValue = adF[j];
1087                         }
1088                     }
1089                     
1090                     if(centroids[i] != anL[minFlowGram]){
1091                         change[i] = 1;
1092                         centroids[i] = anL[minFlowGram];
1093                     }
1094                 }
1095                 else if(centroids[i] != -1){
1096                     change[i] = 1;
1097                     centroids[i] = -1;                  
1098                 }
1099             }
1100
1101             /*****************************************************************************************************
1102
1103             if (pDataArray->m->control_pressed) { break; }
1104             
1105                         //writeQualities(numOTUs, numFlowCells, flowFileName, otuCounts, nSeqsPerOTU, seqNumber, singleTau, flowDataIntI, uniqueFlowgrams, cumNumSeqs, mapUniqueToSeq, seqNameVector, centroids, aaI); 
1106             if (pDataArray->m->control_pressed) { break; }
1107             /*****************************************************************************************************
1108             string thisOutputDir = pDataArray->outputDir;
1109             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1110             string qualityFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.qual";
1111             
1112             ofstream qualityFile;
1113             pDataArray->m->openOutputFile(qualityFileName, qualityFile);
1114             
1115             qualityFile.setf(ios::fixed, ios::floatfield);
1116             qualityFile.setf(ios::showpoint);
1117             qualityFile << setprecision(6);
1118             
1119             vector<vector<int> > qualities(numOTUs);
1120             vector<double> pr(HOMOPS, 0);
1121             
1122             
1123             for(int i=0;i<numOTUs;i++){
1124                 
1125                 if (pDataArray->m->control_pressed) { break; }
1126                 
1127                 int index = 0;
1128                 int base = 0;
1129                 
1130                 if(nSeqsPerOTU[i] > 0){
1131                     qualities[i].assign(1024, -1);
1132                     
1133                     while(index < numFlowCells){
1134                         double maxPrValue = 1e8;
1135                         short maxPrIndex = -1;
1136                         double count = 0.0000;
1137                         
1138                         pr.assign(HOMOPS, 0);
1139                         
1140                         for(int j=0;j<nSeqsPerOTU[i];j++){
1141                             int lIndex = cumNumSeqs[i] + j;
1142                             double tauValue = singleTau[seqNumber[lIndex]];
1143                             int sequenceIndex = aaI[i][j];
1144                             short intensity = flowDataIntI[sequenceIndex * numFlowCells + index];
1145                             
1146                             count += tauValue;
1147                             
1148                             for(int s=0;s<HOMOPS;s++){
1149                                 pr[s] += tauValue * pDataArray->singleLookUp[s * NUMBINS + intensity];
1150                             }
1151                         }
1152                         
1153                         maxPrIndex = uniqueFlowgrams[centroids[i] * numFlowCells + index];
1154                         maxPrValue = pr[maxPrIndex];
1155                         
1156                         if(count > MIN_COUNT){
1157                             double U = 0.0000;
1158                             double norm = 0.0000;
1159                             
1160                             for(int s=0;s<HOMOPS;s++){
1161                                 norm += exp(-(pr[s] - maxPrValue));
1162                             }
1163                             
1164                             for(int s=1;s<=maxPrIndex;s++){
1165                                 int value = 0;
1166                                 double temp = 0.0000;
1167                                 
1168                                 U += exp(-(pr[s-1]-maxPrValue))/norm;
1169                                 
1170                                 if(U>0.00){
1171                                     temp = log10(U);
1172                                 }
1173                                 else{
1174                                     temp = -10.1;
1175                                 }
1176                                 temp = floor(-10 * temp);
1177                                 value = (int)floor(temp);
1178                                 if(value > 100){        value = 100;    }
1179                                 
1180                                 qualities[i][base] = (int)value;
1181                                 base++;
1182                             }
1183                         }
1184                         
1185                         index++;
1186                     }
1187                 }
1188                 
1189                 
1190                 if(otuCounts[i] > 0){
1191                     qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
1192                     
1193                     int j=4;    //need to get past the first four bases
1194                     while(qualities[i][j] != -1){
1195                         qualityFile << qualities[i][j] << ' ';
1196                         j++;
1197                     }
1198                     qualityFile << endl;
1199                 }
1200             }
1201             qualityFile.close();
1202             pDataArray->outputNames.push_back(qualityFileName);
1203             /*****************************************************************************************************
1204
1205            // writeSequences(thisCompositeFASTAFileName, numOTUs, numFlowCells, flowFileName, otuCounts, uniqueFlowgrams, seqNameVector, aaI, centroids);
1206             if (pDataArray->m->control_pressed) { break; }
1207             /*****************************************************************************************************
1208             thisOutputDir = pDataArray->outputDir;
1209             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1210             string fastaFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.fasta";
1211             ofstream fastaFile;
1212             pDataArray->m->openOutputFile(fastaFileName, fastaFile);
1213             
1214             vector<string> names(numOTUs, "");
1215             
1216             for(int i=0;i<numOTUs;i++){
1217                 
1218                 if (pDataArray->m->control_pressed) { break; }
1219                 
1220                 int index = centroids[i];
1221                 
1222                 if(otuCounts[i] > 0){
1223                     fastaFile << '>' << seqNameVector[aaI[i][0]] << endl;
1224                     
1225                     string newSeq = "";
1226                     
1227                     for(int j=0;j<numFlowCells;j++){
1228                         
1229                         char base = pDataArray->flowOrder[j % 4];
1230                         for(int k=0;k<uniqueFlowgrams[index * numFlowCells + j];k++){
1231                             newSeq += base;
1232                         }
1233                     }
1234                     
1235                     fastaFile << newSeq.substr(4) << endl;
1236                 }
1237             }
1238             fastaFile.close();
1239             
1240             pDataArray->outputNames.push_back(fastaFileName);
1241             
1242             if(pDataArray->thisCompositeFASTAFileName != ""){
1243                 pDataArray->m->appendFiles(fastaFileName, pDataArray->thisCompositeFASTAFileName);
1244             }
1245
1246             /*****************************************************************************************************
1247
1248             //writeNames(thisCompositeNamesFileName, numOTUs, flowFileName, otuCounts, seqNameVector, aaI, nSeqsPerOTU);                                
1249             if (pDataArray->m->control_pressed) { break; }
1250             /*****************************************************************************************************
1251             thisOutputDir = pDataArray->outputDir;
1252             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1253             string nameFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.names";
1254             ofstream nameFileOut;
1255             pDataArray->m->openOutputFile(nameFileName, nameFileOut);
1256             
1257             for(int i=0;i<numOTUs;i++){
1258                 
1259                 if (pDataArray->m->control_pressed) { break; }
1260                 
1261                 if(otuCounts[i] > 0){
1262                     nameFileOut << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]];
1263                     
1264                     for(int j=1;j<nSeqsPerOTU[i];j++){
1265                         nameFileOut << ',' << seqNameVector[aaI[i][j]];
1266                     }
1267                     
1268                     nameFileOut << endl;
1269                 }
1270             }
1271             nameFileOut.close();
1272             pDataArray->outputNames.push_back(nameFileName);
1273             
1274             
1275             if(pDataArray->thisCompositeNameFileName != ""){
1276                 pDataArray->m->appendFiles(nameFileName, pDataArray->thisCompositeNameFileName);
1277             }           
1278             /*****************************************************************************************************
1279
1280             //writeClusters(flowFileName, numOTUs, numFlowCells,otuCounts, centroids, uniqueFlowgrams, seqNameVector, aaI, nSeqsPerOTU, lengths, flowDataIntI);                 
1281             if (pDataArray->m->control_pressed) { break; }
1282             /*****************************************************************************************************
1283             thisOutputDir = pDataArray->outputDir;
1284             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1285             string otuCountsFileName = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName)) + "shhh.counts";
1286             ofstream otuCountsFile;
1287             pDataArray->m->openOutputFile(otuCountsFileName, otuCountsFile);
1288             
1289             string bases = pDataArray->flowOrder;
1290             
1291             for(int i=0;i<numOTUs;i++){
1292                 
1293                 if (pDataArray->m->control_pressed) {
1294                     break;
1295                 }
1296                 //output the translated version of the centroid sequence for the otu
1297                 if(otuCounts[i] > 0){
1298                     int index = centroids[i];
1299                     
1300                     otuCountsFile << "ideal\t";
1301                     for(int j=8;j<numFlowCells;j++){
1302                         char base = bases[j % 4];
1303                         for(int s=0;s<uniqueFlowgrams[index * numFlowCells + j];s++){
1304                             otuCountsFile << base;
1305                         }
1306                     }
1307                     otuCountsFile << endl;
1308                     
1309                     for(int j=0;j<nSeqsPerOTU[i];j++){
1310                         int sequence = aaI[i][j];
1311                         otuCountsFile << seqNameVector[sequence] << '\t';
1312                         
1313                         string newSeq = "";
1314                         
1315                         for(int k=0;k<lengths[sequence];k++){
1316                             char base = bases[k % 4];
1317                             int freq = int(0.01 * (double)flowDataIntI[sequence * numFlowCells + k] + 0.5);
1318                             
1319                             for(int s=0;s<freq;s++){
1320                                 newSeq += base;
1321                                 //otuCountsFile << base;
1322                             }
1323                         }
1324                         otuCountsFile << newSeq.substr(4) << endl;
1325                     }
1326                     otuCountsFile << endl;
1327                 }
1328             }
1329             otuCountsFile.close();
1330             pDataArray->outputNames.push_back(otuCountsFileName)
1331             /*****************************************************************************************************
1332
1333             //writeGroups(flowFileName, numSeqs, seqNameVector);                                                
1334             if (pDataArray->m->control_pressed) { break; }
1335             /*****************************************************************************************************
1336             thisOutputDir = pDataArray->outputDir;
1337             if (pDataArray->outputDir == "") {  thisOutputDir += pDataArray->m->hasPath(flowFileName);  }
1338             string fileRoot = thisOutputDir + pDataArray->m->getRootName(pDataArray->m->getSimpleName(flowFileName));
1339             string groupFileName = fileRoot + "shhh.groups";
1340             ofstream groupFile;
1341             pDataArray->m->openOutputFile(groupFileName, groupFile);
1342             
1343             for(int i=0;i<numSeqs;i++){
1344                 if (pDataArray->m->control_pressed) { break; }
1345                 groupFile << seqNameVector[i] << '\t' << fileRoot << endl;
1346             }
1347             groupFile.close();
1348             pDataArray->outputNames.push_back(groupFileName);
1349             /*****************************************************************************************************
1350
1351             pDataArray->m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
1352                 }
1353                 
1354         if (pDataArray->m->control_pressed) { for (int i = 0; i < pDataArray->outputNames.size(); i++) { pDataArray->m->mothurRemove(pDataArray->outputNames[i]); } return 0; }
1355         
1356         return 0;
1357                 
1358         }
1359         catch(exception& e) {
1360                 pDataArray->m->errorOut(e, "ShhherCommand", "ShhhFlowsThreadFunction");
1361                 exit(1);
1362         }
1363
1364 #endif
1365 */
1366
1367 #endif
1368