]> git.donarmstrong.com Git - mothur.git/blob - chimeraslayercommand.h
changed random forest output filename
[mothur.git] / chimeraslayercommand.h
1 #ifndef CHIMERASLAYERCOMMAND_H
2 #define CHIMERASLAYERCOMMAND_H
3
4 /*
5  *  chimeraslayercommand.h
6  *  Mothur
7  *
8  *  Created by westcott on 3/31/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15 #include "chimera.h"
16 #include "chimeraslayer.h"
17 #include "sequenceparser.h"
18 #include "sequencecountparser.h"
19
20 /***********************************************************/
21
22 class ChimeraSlayerCommand : public Command {
23 public:
24         ChimeraSlayerCommand(string);
25         ChimeraSlayerCommand();
26         ~ChimeraSlayerCommand() {}
27         
28         vector<string> setParameters();
29         string getCommandName()                 { return "chimera.slayer";              }
30         string getCommandCategory()             { return "Sequence Processing"; }
31         
32         string getHelpString(); 
33     string getOutputPattern(string);    
34         string getCitation() { return "Haas BJ, Gevers D, Earl A, Feldgarden M, Ward DV, Giannokous G, Ciulla D, Tabbaa D, Highlander SK, Sodergren E, Methe B, Desantis TZ, Petrosino JF, Knight R, Birren BW (2011). Chimeric 16S rRNA sequence formation and detection in Sanger and 454-pyrosequenced PCR amplicons. Genome Res  21:494.\nhttp://www.mothur.org/wiki/Chimera.slayer"; }
35         string getDescription()         { return "detect chimeric sequences"; }
36         
37         int execute(); 
38         void help() { m->mothurOut(getHelpString()); }          
39         
40 private:
41
42         struct linePair {
43                 unsigned long long start;
44                 unsigned long long end;
45                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
46         };
47
48         vector<int> processIDS;   //processid
49         vector<linePair> lines;
50         
51         int driver(linePair, string, string, string, string, map<string, int>&);
52         int createProcesses(string, string, string, string, map<string, int>&);
53         int divideInHalf(Sequence, string&, string&);
54         map<string, int> sortFastaFile(string, string);
55         map<string, int> sortFastaFile(vector<Sequence>&, map<string, string>&, string newFile);
56     int sortFastaFile(vector<Sequence>&, map<string, int>&, string newFile);
57         string getNamesFile(string&);
58         //int setupChimera(string,);
59         int MPIExecute(string, string, string, string, map<string, int>&);
60         int deconvoluteResults(map<string, string>&, string, string, string);
61         map<string, int> priority;
62         int setUpForSelfReference(SequenceParser*&, map<string, string>&, map<string, map<string, int> >&, int);
63     int setUpForSelfReference(SequenceCountParser*&, map<string, string>&, map<string, map<string, int> >&, int);
64         int driverGroups(string, string, string, map<string, map<string, int> >&, map<string, string>&, string);
65         int createProcessesGroups(string, string, string, map<string, map<string, int> >&, map<string, string>&, string, string);
66         int MPIExecuteGroups(string, string, string, map<string, map<string, int> >&, map<string, string>&, string, string);
67
68                 
69         #ifdef USE_MPI
70         int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, set<string>&, vector<unsigned long long>&, string, map<string, int>&, bool);
71         #endif
72
73         bool abort, realign, trim, trimera, save, hasName, hasCount, dups;
74         string fastafile, groupfile, templatefile, outputDir, search, namefile, countfile, blastlocation;
75         int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
76         float divR;
77         
78     map<string, map<string, string> > group2NameMap;
79         vector<string> outputNames;
80         vector<string> fastaFileNames;
81         vector<string> nameFileNames;
82         vector<string> groupFileNames;
83         
84 };
85
86 /***********************************************************/
87
88 //custom data structure for threads to use.
89 // This is passed by void pointer so it can be any data type
90 // that can be passed using a single void pointer (LPVOID).
91 struct slayerData {
92         string outputFName; 
93         string fasta; 
94         string accnos;
95         string filename, countlist;
96         string templatefile;
97         string search;
98         string blastlocation;
99         bool trimera;
100         bool trim, realign, dups, hasCount;
101         unsigned long long start;
102         unsigned long long end;
103         int ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted;
104         MothurOut* m;
105         float divR;
106         map<string, int> priority;
107         int count;
108         int numNoParents;
109         int threadId;
110         map<string, map<string, int> > fileToPriority;
111         map<string, string> fileGroup;
112     map<string, map<string, string> > group2NameMap;
113         
114         slayerData(){}
115         slayerData(string o, string fa, string ac, string f, string te, string se, string bl, bool tri, bool trm, bool re, MothurOut* mout, unsigned long long st, unsigned long long en, int ks, int ma, int mis, int win, int minS, int minC, int miBS, int minSN, int par, int it, int inc, int numw, float div, map<string, int> prior, int tid) {
116                 outputFName = o;
117                 fasta = fa;
118                 accnos = ac;
119                 filename = f;
120                 templatefile = te;
121                 search = se;
122                 blastlocation = bl;
123                 trimera = tri;
124                 trim = trm;
125                 realign = re;
126                 m = mout;
127                 start = st;
128                 end = en;
129                 ksize = ks;
130                 match = ma; 
131                 mismatch = mis;
132                 window = win;
133                 minSimilarity = minS;
134                 minCoverage = minC;
135                 minBS = miBS;
136                 minSNP = minSN;
137                 parents = par;
138                 iters = it;
139                 increment = inc;
140                 numwanted = numw;
141                 divR = div;
142                 priority = prior;
143                 threadId = tid;
144                 count = 0;
145                 numNoParents = 0;
146         }
147         slayerData(map<string, map<string, string> > g2n, bool hc, bool dps, string cl, string o, string fa, string ac, string te, string se, string bl, bool tri, bool trm, bool re, MothurOut* mout, map<string, map<string, int> >& fPriority, map<string, string>& fileG, int ks, int ma, int mis, int win, int minS, int minC, int miBS, int minSN, int par, int it, int inc, int numw, float div, map<string, int> prior, int tid) {
148                 outputFName = o;
149                 fasta = fa;
150                 accnos = ac;
151                 templatefile = te;
152                 search = se;
153                 blastlocation = bl;
154         countlist = cl;
155         dups = dps;
156         hasCount = hc;
157         group2NameMap = g2n;
158                 trimera = tri;
159                 trim = trm;
160                 realign = re;
161                 m = mout;
162                 fileGroup = fileG;
163                 fileToPriority = fPriority;
164                 ksize = ks;
165                 match = ma; 
166                 mismatch = mis;
167                 window = win;
168                 minSimilarity = minS;
169                 minCoverage = minC;
170                 minBS = miBS;
171                 minSNP = minSN;
172                 parents = par;
173                 iters = it;
174                 increment = inc;
175                 numwanted = numw;
176                 divR = div;
177                 priority = prior;
178                 threadId = tid;
179                 count = 0;
180                 numNoParents = 0;
181         }
182         
183 };
184
185 /**************************************************************************************************/
186 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
187 #else
188 static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){ 
189         slayerData* pDataArray;
190         pDataArray = (slayerData*)lpParam;
191         
192         try {
193                 ofstream out;
194                 pDataArray->m->openOutputFile(pDataArray->outputFName, out);
195                 
196                 ofstream out2;
197                 pDataArray->m->openOutputFile(pDataArray->accnos, out2);
198                 
199                 ofstream out3;
200                 if (pDataArray->trim) {  pDataArray->m->openOutputFile(pDataArray->fasta, out3); }
201                 
202                 ifstream inFASTA;
203                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
204                 
205                 
206                 
207                 Chimera* chimera;
208                 if (pDataArray->templatefile != "self") { //you want to run slayer with a reference template
209                         chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);     
210                 }else {
211                         chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->priority, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);       
212                 }
213                 
214                 //print header if you are process 0
215                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
216                         chimera->printHeader(out); 
217                         inFASTA.seekg(0);
218                 }else { //this accounts for the difference in line endings. 
219                         inFASTA.seekg(pDataArray->start-1); pDataArray->m->gobble(inFASTA); 
220                 }
221                 
222                 if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera;  return 0;    }
223                 
224                 if (chimera->getUnaligned()) { 
225                         pDataArray->m->mothurOut("Your template sequences are different lengths, please correct."); pDataArray->m->mothurOutEndLine(); 
226                         out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close();
227                         delete chimera;
228                         return 0; 
229                 }
230                 int templateSeqsLength = chimera->getLength();
231                 
232                 if (pDataArray->start == 0) { chimera->printHeader(out); }
233                 
234                 pDataArray->count = 0;
235                 for(int i = 0; i < pDataArray->end; i++){
236                         
237                         if (pDataArray->m->control_pressed) {   out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 1;   }
238                         
239                         Sequence* candidateSeq = new Sequence(inFASTA);  pDataArray->m->gobble(inFASTA);
240                         string candidateAligned = candidateSeq->getAligned();
241                         
242                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
243                                 if (candidateSeq->getAligned().length() != templateSeqsLength) {  
244                                         pDataArray->m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); pDataArray->m->mothurOutEndLine();
245                                 }else{
246                                         //find chimeras
247                                         chimera->getChimeras(candidateSeq);
248                                         
249                                         if (pDataArray->m->control_pressed) {   delete candidateSeq; delete chimera; return 1;  }
250                                         
251                                         //if you are not chimeric, then check each half
252                                         data_results wholeResults = chimera->getResults();
253                                         
254                                         //determine if we need to split
255                                         bool isChimeric = false;
256                                         
257                                         if (wholeResults.flag == "yes") {
258                                                 string chimeraFlag = "no";
259                                                 if(  (wholeResults.results[0].bsa >= pDataArray->minBS && wholeResults.results[0].divr_qla_qrb >= pDataArray->divR)
260                                                    ||
261                                                    (wholeResults.results[0].bsb >= pDataArray->minBS && wholeResults.results[0].divr_qlb_qra >= pDataArray->divR) ) { chimeraFlag = "yes"; }
262                                                 
263                                                 
264                                                 if (chimeraFlag == "yes") {     
265                                                         if ((wholeResults.results[0].bsa >= pDataArray->minBS) || (wholeResults.results[0].bsb >= pDataArray->minBS)) { isChimeric = true; }
266                                                 }
267                                         }
268                                         
269                                         if ((!isChimeric) && pDataArray->trimera) {
270                                                 
271                                                 //split sequence in half by bases
272                                                 string leftQuery, rightQuery;
273                                                 Sequence tempSeq(candidateSeq->getName(), candidateAligned);
274                                                 //divideInHalf(tempSeq, leftQuery, rightQuery);
275                                                 string queryUnAligned = tempSeq.getUnaligned();
276                                                 int numBases = int(queryUnAligned.length() * 0.5);
277                                                 
278                                                 string queryAligned = tempSeq.getAligned();
279                                                 leftQuery = tempSeq.getAligned();
280                                                 rightQuery = tempSeq.getAligned();
281                                                 
282                                                 int baseCount = 0;
283                                                 int leftSpot = 0;
284                                                 for (int i = 0; i < queryAligned.length(); i++) {
285                                                         //if you are a base
286                                                         if (isalpha(queryAligned[i])) {         
287                                                                 baseCount++; 
288                                                         }
289                                                         
290                                                         //if you have half
291                                                         if (baseCount >= numBases) {  leftSpot = i; break; } //first half
292                                                 }
293                                                 
294                                                 //blank out right side
295                                                 for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; }
296                                                 
297                                                 //blank out left side
298                                                 for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; }
299                                                 
300                                                 //run chimeraSlayer on each piece
301                                                 Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
302                                                 Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
303                                                 
304                                                 //find chimeras
305                                                 chimera->getChimeras(left);
306                                                 data_results leftResults = chimera->getResults();
307                                                 
308                                                 chimera->getChimeras(right);
309                                                 data_results rightResults = chimera->getResults();
310                                                 
311                                                 //if either piece is chimeric then report
312                                                 Sequence trimmed = chimera->print(out, out2, leftResults, rightResults);
313                                                 if (pDataArray->trim) { trimmed.printSequence(out3);  }
314                                                 
315                                                 delete left; delete right;
316                                                 
317                                         }else { //already chimeric
318                                                 //print results
319                                                 Sequence trimmed = chimera->print(out, out2);
320                                                 if (pDataArray->trim) { trimmed.printSequence(out3);  }
321                                         }
322                                         
323                                         
324                                 }
325                                 pDataArray->count++;
326                         }
327                         
328                         delete candidateSeq;
329                         //report progress
330                         if((pDataArray->count) % 100 == 0){     pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(pDataArray->count) +"\n");      }
331                 }
332                 //report progress
333                 if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(pDataArray->count)+"\n");               }
334                 
335                 pDataArray->numNoParents = chimera->getNumNoParents();
336                 if (pDataArray->numNoParents == pDataArray->count) {    pDataArray->m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors.\n"); }
337
338                 out.close();
339                 out2.close();
340                 if (pDataArray->trim) { out3.close(); }
341                 inFASTA.close();
342                 delete chimera;
343                 
344                 return 0;
345                 
346         }
347         catch(exception& e) {
348                 pDataArray->m->errorOut(e, "ChimeraSlayerCommand", "MySlayerThreadFunction");
349                 exit(1);
350         }
351
352
353 /**************************************************************************************************/
354
355 static DWORD WINAPI MySlayerGroupThreadFunction(LPVOID lpParam){ 
356         slayerData* pDataArray;
357         pDataArray = (slayerData*)lpParam;
358         
359         try {
360         ofstream outCountList;
361         if (pDataArray->hasCount && pDataArray->dups) { pDataArray->m->openOutputFile(pDataArray->countlist, outCountList); }
362
363                 int totalSeqs = 0;
364         pDataArray->end = 0;
365                 
366                 for (map<string, map<string, int> >::iterator itFile = pDataArray->fileToPriority.begin(); itFile != pDataArray->fileToPriority.end(); itFile++) {
367                         
368                         if (pDataArray->m->control_pressed) {  return 0;  }
369                         
370                         int start = time(NULL);
371                         string thisFastaName = itFile->first;
372                         map<string, int> thisPriority = itFile->second;
373                         string thisoutputFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.chimera";
374                         string thisaccnosFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.accnos";
375                         string thistrimFastaFileName = pDataArray->m->getRootName(pDataArray->m->getSimpleName(thisFastaName)) + pDataArray->fileGroup[thisFastaName] + "slayer.fasta";
376                         
377                         pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("Checking sequences from group: " + pDataArray->fileGroup[thisFastaName] + "."); pDataArray->m->mothurOutEndLine(); 
378                 
379                         //int numSeqs = driver(lines[0], thisoutputFileName, thisFastaName, thisaccnosFileName, thistrimFastaFileName, thisPriority);
380                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
381                         
382                         ofstream out;
383                         pDataArray->m->openOutputFile(thisoutputFileName, out);
384                         
385                         ofstream out2;
386                         pDataArray->m->openOutputFile(thisaccnosFileName, out2);
387                         
388                         ofstream out3;
389                         if (pDataArray->trim) {  pDataArray->m->openOutputFile(thistrimFastaFileName, out3); }
390                         
391                         ifstream inFASTA;
392                         pDataArray->m->openInputFile(thisFastaName, inFASTA);
393                         
394                         Chimera* chimera;
395                         chimera = new ChimeraSlayer(thisFastaName, pDataArray->templatefile, pDataArray->trim, thisPriority, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId);      
396                         chimera->printHeader(out); 
397                         
398                         int numSeqs = 0;
399                         
400                         if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera;  return 0;    }
401                         
402                         if (chimera->getUnaligned()) { 
403                                 pDataArray->m->mothurOut("Your template sequences are different lengths, please correct."); pDataArray->m->mothurOutEndLine(); 
404                                 out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close();
405                                 delete chimera;
406                                 return 0; 
407                         }
408                         int templateSeqsLength = chimera->getLength();
409                         
410                         bool done = false;
411                         while (!done) {
412                                 
413                                 if (pDataArray->m->control_pressed) {   out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 1;   }
414                                 
415                                 Sequence* candidateSeq = new Sequence(inFASTA);  pDataArray->m->gobble(inFASTA);
416                                 string candidateAligned = candidateSeq->getAligned();
417                                 
418                                 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
419                                         if (candidateSeq->getAligned().length() != templateSeqsLength) {  
420                                                 pDataArray->m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); pDataArray->m->mothurOutEndLine();
421                                         }else{
422                                                 //find chimeras
423                                                 chimera->getChimeras(candidateSeq);
424                                                 
425                                                 if (pDataArray->m->control_pressed) {   out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete candidateSeq; delete chimera; return 1;      }
426                                                 
427                                                 //if you are not chimeric, then check each half
428                                                 data_results wholeResults = chimera->getResults();
429                                                 
430                                                 //determine if we need to split
431                                                 bool isChimeric = false;
432                                                 
433                                                 if (wholeResults.flag == "yes") {
434                                                         string chimeraFlag = "no";
435                                                         if(  (wholeResults.results[0].bsa >= pDataArray->minBS && wholeResults.results[0].divr_qla_qrb >= pDataArray->divR)
436                                                            ||
437                                                            (wholeResults.results[0].bsb >= pDataArray->minBS && wholeResults.results[0].divr_qlb_qra >= pDataArray->divR) ) { chimeraFlag = "yes"; }
438                                                         
439                                                         
440                                                         if (chimeraFlag == "yes") {     
441                                                                 if ((wholeResults.results[0].bsa >= pDataArray->minBS) || (wholeResults.results[0].bsb >= pDataArray->minBS)) { isChimeric = true; }
442                                                         }
443                                                 }
444                                                 
445                                                 if ((!isChimeric) && pDataArray->trimera) {
446                                                         
447                                                         //split sequence in half by bases
448                                                         string leftQuery, rightQuery;
449                                                         Sequence tempSeq(candidateSeq->getName(), candidateAligned);
450                                                         //divideInHalf(tempSeq, leftQuery, rightQuery);
451                                                         string queryUnAligned = tempSeq.getUnaligned();
452                                                         int numBases = int(queryUnAligned.length() * 0.5);
453                                                         
454                                                         string queryAligned = tempSeq.getAligned();
455                                                         leftQuery = tempSeq.getAligned();
456                                                         rightQuery = tempSeq.getAligned();
457                                                         
458                                                         int baseCount = 0;
459                                                         int leftSpot = 0;
460                                                         for (int i = 0; i < queryAligned.length(); i++) {
461                                                                 //if you are a base
462                                                                 if (isalpha(queryAligned[i])) {         
463                                                                         baseCount++; 
464                                                                 }
465                                                                 
466                                                                 //if you have half
467                                                                 if (baseCount >= numBases) {  leftSpot = i; break; } //first half
468                                                         }
469                                                         
470                                                         //blank out right side
471                                                         for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; }
472                                                         
473                                                         //blank out left side
474                                                         for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; }
475                                                         
476                                                         //run chimeraSlayer on each piece
477                                                         Sequence* left = new Sequence(candidateSeq->getName(), leftQuery);
478                                                         Sequence* right = new Sequence(candidateSeq->getName(), rightQuery);
479                                                         
480                                                         //find chimeras
481                                                         chimera->getChimeras(left);
482                                                         data_results leftResults = chimera->getResults();
483                                                         
484                                                         chimera->getChimeras(right);
485                                                         data_results rightResults = chimera->getResults();
486                                                         
487                                                         //if either piece is chimeric then report
488                                                         Sequence trimmed = chimera->print(out, out2, leftResults, rightResults);
489                                                         if (pDataArray->trim) { trimmed.printSequence(out3);  }
490                                                         
491                                                         delete left; delete right;
492                                                         
493                                                 }else { //already chimeric
494                                                         //print results
495                                                         Sequence trimmed = chimera->print(out, out2);
496                                                         if (pDataArray->trim) { trimmed.printSequence(out3);  }
497                                                 }
498                                                 
499                                                 
500                                         }
501                                         numSeqs++;
502                                 }
503                                 
504                                 delete candidateSeq;
505                                 
506                                 if (inFASTA.eof()) { break; }
507                                 
508                                 //report progress
509                                 if((numSeqs) % 100 == 0){       pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(numSeqs)+"\n"); pDataArray->m->mothurOutEndLine();              }
510                         }
511                         //report progress
512                         if((numSeqs) % 100 != 0){       pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(numSeqs)+"\n");                 }
513                         
514                         pDataArray->numNoParents = chimera->getNumNoParents();
515                         if (pDataArray->numNoParents == numSeqs) {      pDataArray->m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors.\n"); }
516                         
517                         out.close();
518                         out2.close();
519                         if (pDataArray->trim) { out3.close(); }
520                         inFASTA.close();
521                         delete chimera;
522                         pDataArray->end++;
523                         
524                         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
525                         
526             //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
527             //This table will zero out group counts for seqs determined to be chimeric by that group.
528             if (pDataArray->dups) {
529                 if (!pDataArray->m->isBlank(thisaccnosFileName)) {
530                     ifstream in;
531                     pDataArray->m->openInputFile(thisaccnosFileName, in);
532                     string name;
533                     if (pDataArray->hasCount) {
534                         while (!in.eof()) {
535                             in >> name; pDataArray->m->gobble(in);
536                             outCountList << name << '\t' << pDataArray->fileGroup[thisFastaName] << endl;
537                         }
538                         in.close();
539                     }else {
540                         map<string, map<string, string> >::iterator itGroupNameMap = pDataArray->group2NameMap.find(pDataArray->fileGroup[thisFastaName]);
541                         if (itGroupNameMap != pDataArray->group2NameMap.end()) {
542                             map<string, string> thisnamemap = itGroupNameMap->second;
543                             map<string, string>::iterator itN;
544                             ofstream out;
545                             pDataArray->m->openOutputFile(thisaccnosFileName+".temp", out);
546                             while (!in.eof()) {
547                                 in >> name; pDataArray->m->gobble(in);
548                                 //pDataArray->m->mothurOut("here = " + name + '\t');
549                                 itN = thisnamemap.find(name);
550                                 if (itN != thisnamemap.end()) {
551                                     vector<string> tempNames; pDataArray->m->splitAtComma(itN->second, tempNames);
552                                     for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
553                                     //pDataArray->m->mothurOut(itN->second + '\n');
554                                     
555                                 }else { pDataArray->m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); pDataArray->m->control_pressed = true; }
556                             }
557                             out.close();
558                             in.close();
559                             pDataArray->m->renameFile(thisaccnosFileName+".temp", thisaccnosFileName);
560                         }else { pDataArray->m->mothurOut("[ERROR]: parsing cannot find " + pDataArray->fileGroup[thisFastaName] + ".\n"); pDataArray->m->control_pressed = true; }
561                     }
562                     
563                 }
564             }
565             
566             
567                         //append files
568                         pDataArray->m->appendFiles(thisoutputFileName, pDataArray->outputFName); pDataArray->m->mothurRemove(thisoutputFileName); 
569                         pDataArray->m->appendFiles(thisaccnosFileName, pDataArray->accnos); pDataArray->m->mothurRemove(thisaccnosFileName);
570                         if (pDataArray->trim) { pDataArray->m->appendFiles(thistrimFastaFileName, pDataArray->fasta); pDataArray->m->mothurRemove(thistrimFastaFileName); }
571                         pDataArray->m->mothurRemove(thisFastaName);
572                         
573                         totalSeqs += numSeqs;
574                         
575                         pDataArray->m->mothurOutEndLine(); pDataArray->m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + pDataArray->fileGroup[thisFastaName] + "."); pDataArray->m->mothurOutEndLine();
576                 }
577                 
578                 pDataArray->count = totalSeqs;
579         if (pDataArray->hasCount && pDataArray->dups) { outCountList.close(); }
580                 
581                 return 0;
582                 
583         }
584         catch(exception& e) {
585                 pDataArray->m->errorOut(e, "ChimeraSlayerCommand", "MySlayerGroupThreadFunction");
586                 exit(1);
587         }
588
589
590 #endif
591
592 /**************************************************************************************************/
593
594
595 #endif
596
597