]> git.donarmstrong.com Git - mothur.git/blob - prcseqscommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / prcseqscommand.cpp
1 //
2 //  prcseqscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/14/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "pcrseqscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> PcrSeqsCommand::setParameters(){ 
13         try {
14                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
15                 CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false,true); parameters.push_back(poligos);
16         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
17         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
19         CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false,true); parameters.push_back(ptax);
20         CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false); parameters.push_back(pecoli);
21                 CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart);
22                 CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
23                 CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
26         CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
27         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29         
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "PcrSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string PcrSeqsCommand::getHelpString(){ 
41         try {
42                 string helpString = "";
43                 helpString += "The pcr.seqs command reads a fasta file.\n";
44         helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
45                 helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
46         helpString += "The start parameter allows you to provide a starting position to trim to.\n";
47         helpString += "The end parameter allows you to provide a ending position to trim from.\n";
48         helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file.  if nomatch=true, then do nothing to sequence.\n";
49         helpString += "The processors parameter allows you to use multiple processors.\n";
50         helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
51         helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
52                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
53                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "PcrSeqsCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 string PcrSeqsCommand::getOutputPattern(string type) {
63     try {
64         string pattern = "";
65         
66         if (type == "fasta")            {   pattern = "[filename],pcr,[extension]-[filename],[tag],pcr,[extension]";    }
67         else if (type == "taxonomy")    {   pattern = "[filename],pcr,[extension]";    }
68         else if (type == "name")        {   pattern = "[filename],pcr,[extension]";    }
69         else if (type == "group")       {   pattern = "[filename],pcr,[extension]";    }
70         else if (type == "count")       {   pattern = "[filename],pcr,[extension]";    }
71         else if (type == "accnos")      {   pattern = "[filename],bad.accnos";    }
72         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
73         
74         return pattern;
75     }
76     catch(exception& e) {
77         m->errorOut(e, "PcrSeqsCommand", "getOutputPattern");
78         exit(1);
79     }
80 }
81 //**********************************************************************************************************************
82
83 PcrSeqsCommand::PcrSeqsCommand(){       
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["fasta"] = tempOutNames;
89                 outputTypes["taxonomy"] = tempOutNames;
90                 outputTypes["group"] = tempOutNames;
91                 outputTypes["name"] = tempOutNames;
92         outputTypes["count"] = tempOutNames;
93         outputTypes["accnos"] = tempOutNames;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
97                 exit(1);
98         }
99 }
100 //***************************************************************************************************************
101
102 PcrSeqsCommand::PcrSeqsCommand(string option)  {
103         try {
104                 
105                 abort = false; calledHelp = false;   
106                 
107                 //allow user to run help
108                 if(option == "help") { help(); abort = true; calledHelp = true; }
109                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110                 
111                 else {
112                         vector<string> myArray = setParameters();
113                         
114                         OptionParser parser(option);
115                         map<string,string> parameters = parser.getParameters();
116                         
117                         ValidParameters validParameter;
118                         map<string,string>::iterator it;
119                         
120                         //check to make sure all parameters are valid for command
121                         for (it = parameters.begin(); it != parameters.end(); it++) { 
122                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
123                         }
124                         
125                         //initialize outputTypes
126                         vector<string> tempOutNames;
127                         outputTypes["fasta"] = tempOutNames;
128                         outputTypes["taxonomy"] = tempOutNames;
129                         outputTypes["group"] = tempOutNames;
130                         outputTypes["name"] = tempOutNames;
131             outputTypes["accnos"] = tempOutNames;
132             outputTypes["count"] = tempOutNames;
133                         
134                         //if the user changes the input directory command factory will send this info to us in the output parameter 
135                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
136                         if (inputDir == "not found"){   inputDir = "";          }
137                         else {
138                                 string path;
139                                 it = parameters.find("fasta");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
145                                 }
146                                 
147                                 it = parameters.find("oligos");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
153                                 }
154                 
155                 it = parameters.find("ecoli");
156                                 //user has given a template file
157                                 if(it != parameters.end()){ 
158                                         path = m->hasPath(it->second);
159                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                         if (path == "") {       parameters["ecoli"] = inputDir + it->second;            }
161                                 }
162                                 
163                                 it = parameters.find("taxonomy");
164                                 //user has given a template file
165                                 if(it != parameters.end()){ 
166                                         path = m->hasPath(it->second);
167                                         //if the user has not given a path then, add inputdir. else leave path alone.
168                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
169                                 }
170                                 
171                                 it = parameters.find("name");
172                                 //user has given a template file
173                                 if(it != parameters.end()){ 
174                                         path = m->hasPath(it->second);
175                                         //if the user has not given a path then, add inputdir. else leave path alone.
176                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
177                                 }
178                 
179                 it = parameters.find("group");
180                                 //user has given a template file
181                                 if(it != parameters.end()){ 
182                                         path = m->hasPath(it->second);
183                                         //if the user has not given a path then, add inputdir. else leave path alone.
184                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
185                                 }
186                 
187                 it = parameters.find("count");
188                                 //user has given a template file
189                                 if(it != parameters.end()){ 
190                                         path = m->hasPath(it->second);
191                                         //if the user has not given a path then, add inputdir. else leave path alone.
192                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
193                                 }
194                                 
195                         }
196             
197                         
198                         //check for required parameters
199                         fastafile = validParameter.validFile(parameters, "fasta", true);
200                         if (fastafile == "not found") {                                 
201                                 fastafile = m->getFastaFile(); 
202                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
203                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
204                         }else if (fastafile == "not open") { fastafile = ""; abort = true; }    
205                         else { m->setFastaFile(fastafile); }
206                         
207             //if the user changes the output directory command factory will send this info to us in the output parameter 
208                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
209
210                         //check for optional parameter and set defaults
211                         // ...at some point should added some additional type checking...
212                         string temp;
213                         temp = validParameter.validFile(parameters, "keepprimer", false);  if (temp == "not found")    {        temp = "f";     }
214                         keepprimer = m->isTrue(temp);   
215             
216             temp = validParameter.validFile(parameters, "keepdots", false);  if (temp == "not found")    {      temp = "t";     }
217                         keepdots = m->isTrue(temp);     
218             
219                         temp = validParameter.validFile(parameters, "oligos", true);
220                         if (temp == "not found"){       oligosfile = "";                }
221                         else if(temp == "not open"){    oligosfile = ""; abort = true;  } 
222                         else                                    {       oligosfile = temp; m->setOligosFile(oligosfile);                }
223                         
224             ecolifile = validParameter.validFile(parameters, "ecoli", true);
225                         if (ecolifile == "not found"){  ecolifile = "";         }
226                         else if(ecolifile == "not open"){       ecolifile = ""; abort = true;   } 
227                         
228             namefile = validParameter.validFile(parameters, "name", true);
229                         if (namefile == "not found"){   namefile = "";          }
230                         else if(namefile == "not open"){        namefile = ""; abort = true;    } 
231             else { m->setNameFile(namefile); }
232             
233             groupfile = validParameter.validFile(parameters, "group", true);
234                         if (groupfile == "not found"){  groupfile = "";         }
235                         else if(groupfile == "not open"){       groupfile = ""; abort = true;   } 
236             else { m->setGroupFile(groupfile); }
237             
238             countfile = validParameter.validFile(parameters, "count", true);
239                         if (countfile == "not open") { countfile = ""; abort = true; }
240                         else if (countfile == "not found") { countfile = "";  } 
241                         else { m->setCountTableFile(countfile); }
242             
243             if ((namefile != "") && (countfile != "")) {
244                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
245             }
246                         
247             if ((groupfile != "") && (countfile != "")) {
248                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
249             }
250             
251             taxfile = validParameter.validFile(parameters, "taxonomy", true);
252                         if (taxfile == "not found"){    taxfile = "";           }
253                         else if(taxfile == "not open"){ taxfile = ""; abort = true;     } 
254             else { m->setTaxonomyFile(taxfile); }
255                                                 
256                         temp = validParameter.validFile(parameters, "start", false);    if (temp == "not found") { temp = "-1"; }
257                         m->mothurConvert(temp, start);
258             
259             temp = validParameter.validFile(parameters, "end", false);  if (temp == "not found") { temp = "-1"; }
260                         m->mothurConvert(temp, end);
261                         
262                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
263                         m->setProcessors(temp);
264                         m->mothurConvert(temp, processors); 
265                         
266             nomatch = validParameter.validFile(parameters, "nomatch", false);   if (nomatch == "not found") { nomatch = "reject"; }
267                         
268             if ((nomatch != "reject") && (nomatch != "keep")) { m->mothurOut("[ERROR]: " + nomatch + " is not a valid entry for nomatch. Choices are reject and keep.\n");  abort = true; }
269             
270             //didnt set anything
271                         if ((oligosfile == "") && (ecolifile == "") && (start == -1) && (end == -1)) {
272                 m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
273             }
274             
275             if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
276             
277             if ((ecolifile != "") && (start != -1) && (end != -1)) {
278                 m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend.  When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
279             }
280
281             
282             if ((oligosfile != "") && (ecolifile != "")) {
283                  m->mothurOut("[ERROR]: You can not use an ecoli file at the same time as an oligos file.\n"); abort = true;
284             }
285                         
286                         //check to make sure you didn't forget the name file by mistake                 
287                         if (countfile == "") { 
288                 if (namefile == "") {
289                     vector<string> files; files.push_back(fastafile);
290                     parser.getNameFile(files);
291                 }
292             }
293                 }
294         
295         }
296         catch(exception& e) {
297                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
298                 exit(1);
299         }
300 }
301 //***************************************************************************************************************
302
303 int PcrSeqsCommand::execute(){
304         try{
305         
306                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
307                 
308         int start = time(NULL);
309         
310         string thisOutputDir = outputDir;
311                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
312         map<string, string> variables; 
313         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
314         variables["[extension]"] = m->getExtension(fastafile);
315                 string trimSeqFile = getOutputFileName("fasta",variables);
316                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
317         variables["[tag]"] = "scrap";
318         string badSeqFile = getOutputFileName("fasta",variables);
319                 
320                 
321         length = 0;
322                 if(oligosfile != ""){    readOligos();     }  if (m->control_pressed) {  return 0; }
323         if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
324         
325         vector<unsigned long long> positions; 
326         int numFastaSeqs = 0;
327 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
328         positions = m->divideFile(fastafile, processors);
329         for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
330 #else
331         if (processors == 1) {
332             lines.push_back(linePair(0, 1000));
333         }else {
334             positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
335             if (positions.size() < processors) { processors = positions.size(); }
336             
337             //figure out how many sequences you have to process
338             int numSeqsPerProcessor = numFastaSeqs / processors;
339             for (int i = 0; i < processors; i++) {
340                 int startIndex =  i * numSeqsPerProcessor;
341                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
342                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
343             }
344         }
345 #endif
346         if (m->control_pressed) {  return 0; }
347
348         set<string> badNames;
349         if(processors == 1) {    numFastaSeqs = driverPcr(fastafile, trimSeqFile, badSeqFile, badNames, lines[0]);   }
350         else                {    numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames);       }  
351                 
352                 if (m->control_pressed) {  return 0; }          
353         
354         //don't write or keep if blank
355         if (badNames.size() != 0)   { writeAccnos(badNames);        }   
356         if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile);  }
357         else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
358         
359         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
360         if (namefile != "")                     {               readName(badNames);             }   
361         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
362         if (groupfile != "")            {               readGroup(badNames);    }
363         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
364                 if (taxfile != "")                      {               readTax(badNames);              }
365                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
366                 if (countfile != "")                    {               readCount(badNames);            }
367                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
368       
369         m->mothurOutEndLine();
370                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
371                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
372                 m->mothurOutEndLine();
373                 m->mothurOutEndLine();
374                 
375                 //set fasta file as new current fastafile
376                 string current = "";
377                 itTypes = outputTypes.find("fasta");
378                 if (itTypes != outputTypes.end()) {
379                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
380                 }
381                 
382                 itTypes = outputTypes.find("name");
383                 if (itTypes != outputTypes.end()) {
384                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
385                 }
386                 
387                 itTypes = outputTypes.find("group");
388                 if (itTypes != outputTypes.end()) {
389                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
390                 }
391                 
392                 itTypes = outputTypes.find("accnos");
393                 if (itTypes != outputTypes.end()) {
394                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
395                 }
396                 
397                 itTypes = outputTypes.find("taxonomy");
398                 if (itTypes != outputTypes.end()) {
399                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
400                 }
401         
402         itTypes = outputTypes.find("count");
403                 if (itTypes != outputTypes.end()) {
404                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
405                 }
406         
407                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
408                 m->mothurOutEndLine();
409
410                 
411                 return 0;       
412         
413         }
414         catch(exception& e) {
415                 m->errorOut(e, "PcrSeqsCommand", "execute");
416                 exit(1);
417         }
418 }
419 /**************************************************************************************************/
420 int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string badFileName, set<string>& badSeqNames) {
421         try {
422         
423         vector<int> processIDS;   
424         int process = 1;
425                 int num = 0;
426         
427 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
428         
429                 //loop through and create all the processes you want
430                 while (process != processors) {
431                         int pid = fork();
432                         
433                         if (pid > 0) {
434                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
435                                 process++;
436                         }else if (pid == 0){
437                                 num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badSeqNames, lines[process]);
438                                 
439                                 //pass numSeqs to parent
440                                 ofstream out;
441                                 string tempFile = filename + toString(getpid()) + ".num.temp";
442                                 m->openOutputFile(tempFile, out);
443                                 out << num << '\t' << badSeqNames.size() << endl;
444                 for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
445                     out << (*it) << endl;
446                 }
447                                 out.close();
448                                 
449                                 exit(0);
450                         }else { 
451                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
452                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
453                                 exit(0);
454                         }
455                 }
456                 
457         num = driverPcr(filename, goodFileName, badFileName, badSeqNames, lines[0]);
458         
459                 //force parent to wait until all the processes are done
460                 for (int i=0;i<processIDS.size();i++) { 
461                         int temp = processIDS[i];
462                         wait(&temp);
463                 }
464                 
465                 for (int i = 0; i < processIDS.size(); i++) {
466                         ifstream in;
467                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
468                         m->openInputFile(tempFile, in);
469             int numBadNames = 0; string name = "";
470                         if (!in.eof()) { int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in); }
471             for (int j = 0; j < numBadNames; j++) {
472                 in >> name; m->gobble(in);
473                 badSeqNames.insert(name);
474             }
475                         in.close(); m->mothurRemove(tempFile);
476             
477             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
478             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
479             
480             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
481             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
482                 }
483     #else
484         
485         //////////////////////////////////////////////////////////////////////////////////////////////////////
486                 //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
487                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
488                 //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
489                 //////////////////////////////////////////////////////////////////////////////////////////////////////
490                 
491                 vector<pcrData*> pDataArray; 
492                 DWORD   dwThreadIdArray[processors-1];
493                 HANDLE  hThreadArray[processors-1]; 
494                 
495                 //Create processor worker threads.
496                 for( int i=0; i<processors-1; i++ ){
497             
498             string extension = "";
499             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
500             
501                         // Allocate memory for thread data.
502                         pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
503                         pDataArray.push_back(tempPcr);
504                         
505                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
506                         hThreadArray[i] = CreateThread(NULL, 0, MyPcrThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
507                 }
508                 
509         //do your part
510         num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"),badSeqNames, lines[processors-1]);
511         processIDS.push_back(processors-1);
512         
513                 //Wait until all threads have terminated.
514                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
515                 
516                 //Close all thread handles and free memory allocations.
517                 for(int i=0; i < pDataArray.size(); i++){
518                         num += pDataArray[i]->count;
519             for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it);       }
520                         CloseHandle(hThreadArray[i]);
521                         delete pDataArray[i];
522                 }
523         
524         for (int i = 0; i < processIDS.size(); i++) {
525             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
526             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
527             
528             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
529             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
530                 }
531         
532 #endif  
533         
534         return num;
535         
536         }
537         catch(exception& e) {
538                 m->errorOut(e, "PcrSeqsCommand", "createProcesses");
539                 exit(1);
540         }
541 }
542
543 //**********************************************************************************************************************
544 int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, set<string>& badSeqNames, linePair filePos){
545         try {
546                 ofstream goodFile;
547                 m->openOutputFile(goodFasta, goodFile);
548         
549         ofstream badFile;
550                 m->openOutputFile(badFasta, badFile);
551                 
552                 ifstream inFASTA;
553                 m->openInputFile(filename, inFASTA);
554         
555                 inFASTA.seekg(filePos.start);
556         
557                 bool done = false;
558                 int count = 0;
559         set<int> lengths;
560         
561                 while (!done) {
562             
563                         if (m->control_pressed) {  break; }
564                         
565                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
566             
567             string trashCode = "";
568                         if (currSeq.getName() != "") {
569                 
570                 bool goodSeq = true;
571                 if (oligosfile != "") {
572                     map<int, int> mapAligned;
573                     bool aligned = isAligned(currSeq.getAligned(), mapAligned);
574                     
575                     //process primers
576                     if (primers.size() != 0) {
577                         int primerStart = 0; int primerEnd = 0;
578                         bool good = findForward(currSeq, primerStart, primerEnd);
579                         
580                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
581                         else{
582                             //are you aligned
583                             if (aligned) { 
584                                 if (!keepprimer)    {  
585                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
586                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
587                                 } 
588                                 else                {  
589                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
590                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
591                                 }
592                             }else { 
593                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
594                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
595                             }
596                         }
597                     }
598                     
599                     //process reverse primers
600                     if (revPrimer.size() != 0) {
601                         int primerStart = 0; int primerEnd = 0;
602                         bool good = findReverse(currSeq, primerStart, primerEnd);
603                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
604                         else{ 
605                             //are you aligned
606                             if (aligned) { 
607                                 if (!keepprimer)    {  
608                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
609                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
610                                 } 
611                                 else                {  
612                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
613                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
614                                 } 
615                             }
616                             else { 
617                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
618                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
619                             }
620                         }
621                     }
622                 }else if (ecolifile != "") {
623                     //make sure the seqs are aligned
624                     lengths.insert(currSeq.getAligned().length());
625                     if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
626                     else if (currSeq.getAligned().length() != length) {
627                         m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; 
628                     }else {
629                         if (keepdots)   { 
630                             currSeq.filterToPos(start); 
631                             currSeq.filterFromPos(end);
632                         }else {
633                             string seqString = currSeq.getAligned().substr(0, end);
634                             seqString = seqString.substr(start);
635                             currSeq.setAligned(seqString); 
636                         }
637                     }
638                 }else{ //using start and end to trim
639                     //make sure the seqs are aligned
640                     lengths.insert(currSeq.getAligned().length());
641                     if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
642                     else {
643                         if (end != -1) {
644                             if (end > currSeq.getAligned().length()) {  m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
645                             else {
646                                 if (keepdots)   { currSeq.filterFromPos(end); }
647                                 else {
648                                     string seqString = currSeq.getAligned().substr(0, end);
649                                     currSeq.setAligned(seqString); 
650                                 }
651                             }
652                         }
653                         if (start != -1) { 
654                             if (keepdots)   {  currSeq.filterToPos(start);  }
655                             else {
656                                 string seqString = currSeq.getAligned().substr(start);
657                                 currSeq.setAligned(seqString); 
658                             }
659                         }
660                     }
661                 }
662                 
663                 //trimming removed all bases
664                 if (currSeq.getUnaligned() == "") { goodSeq = false; }
665                 
666                                 if(goodSeq == 1)    {   currSeq.printSequence(goodFile);        }
667                                 else {  
668                     badSeqNames.insert(currSeq.getName()); 
669                     currSeq.setName(currSeq.getName() + '|' + trashCode);
670                     currSeq.printSequence(badFile); 
671                 }
672                 count++;
673                         }
674                         
675 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
676             unsigned long long pos = inFASTA.tellg();
677             if ((pos == -1) || (pos >= filePos.end)) { break; }
678 #else
679             if (inFASTA.eof()) { break; }
680 #endif
681                         
682                         //report progress
683                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
684                 }
685                 //report progress
686                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
687                 
688         badFile.close();
689                 goodFile.close();
690                 inFASTA.close();
691                 
692                 return count;
693         }
694         catch(exception& e) {
695                 m->errorOut(e, "PcrSeqsCommand", "driverPcr");
696                 exit(1);
697         }
698 }
699 //********************************************************************/
700 bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
701         try {
702                 
703                 string rawSequence = seq.getUnaligned();
704                 
705                 for(int j=0;j<primers.size();j++){
706                         string oligo = primers[j];
707                         
708                         if(rawSequence.length() < oligo.length()) {  break;  }
709                         
710                         //search for primer
711             int olength = oligo.length();
712             for (int j = 0; j < rawSequence.length()-olength; j++){
713                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
714                 string rawChunk = rawSequence.substr(j, olength);
715                 if(compareDNASeq(oligo, rawChunk)) {
716                     primerStart = j;
717                     primerEnd = primerStart + olength;
718                     return true;
719                 }
720                 
721             }
722         }       
723                 
724         primerStart = 0; primerEnd = 0;
725                 return false;
726                 
727         }
728         catch(exception& e) {
729                 m->errorOut(e, "TrimOligos", "stripForward");
730                 exit(1);
731         }
732 }
733 //******************************************************************/
734 bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
735         try {
736                 
737                 string rawSequence = seq.getUnaligned();
738                 
739                 for(int i=0;i<revPrimer.size();i++){
740                         string oligo = revPrimer[i];
741                         if(rawSequence.length() < oligo.length()) {  break;  }
742                         
743                         //search for primer
744             int olength = oligo.length();
745             for (int j = rawSequence.length()-olength; j >= 0; j--){
746                  if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
747                 string rawChunk = rawSequence.substr(j, olength);
748             
749                 if(compareDNASeq(oligo, rawChunk)) {
750                     primerStart = j;
751                     primerEnd = primerStart + olength;
752                     return true;
753                 }
754                 
755             }
756                 }       
757                 
758         primerStart = 0; primerEnd = 0;
759                 return false;
760         }
761         catch(exception& e) {
762                 m->errorOut(e, "PcrSeqsCommand", "findReverse");
763                 exit(1);
764         }
765 }
766 //********************************************************************/
767 bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
768         try {
769         bool isAligned = false;
770         
771         int countBases = 0;
772         for (int i = 0; i < seq.length(); i++) {
773             if (!isalpha(seq[i])) { isAligned = true; }
774             else { aligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned.
775         }                                                   //ie. the 3rd base may be at spot 10 in the alignment
776                                                             //later when we trim we want to trim from spot 10.
777         return isAligned;
778     }
779         catch(exception& e) {
780                 m->errorOut(e, "PcrSeqsCommand", "isAligned");
781                 exit(1);
782         }
783 }
784 //********************************************************************/
785 string PcrSeqsCommand::reverseOligo(string oligo){
786         try {
787         string reverse = "";
788        
789         for(int i=oligo.length()-1;i>=0;i--){
790             
791             if(oligo[i] == 'A')         {       reverse += 'T'; }
792             else if(oligo[i] == 'T'){   reverse += 'A'; }
793             else if(oligo[i] == 'U'){   reverse += 'A'; }
794             
795             else if(oligo[i] == 'G'){   reverse += 'C'; }
796             else if(oligo[i] == 'C'){   reverse += 'G'; }
797             
798             else if(oligo[i] == 'R'){   reverse += 'Y'; }
799             else if(oligo[i] == 'Y'){   reverse += 'R'; }
800             
801             else if(oligo[i] == 'M'){   reverse += 'K'; }
802             else if(oligo[i] == 'K'){   reverse += 'M'; }
803             
804             else if(oligo[i] == 'W'){   reverse += 'W'; }
805             else if(oligo[i] == 'S'){   reverse += 'S'; }
806             
807             else if(oligo[i] == 'B'){   reverse += 'V'; }
808             else if(oligo[i] == 'V'){   reverse += 'B'; }
809             
810             else if(oligo[i] == 'D'){   reverse += 'H'; }
811             else if(oligo[i] == 'H'){   reverse += 'D'; }
812
813             else                                                {       reverse += 'N'; }
814         }
815
816         
817         return reverse;
818     }
819         catch(exception& e) {
820                 m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
821                 exit(1);
822         }
823 }
824
825 //***************************************************************************************************************
826 bool PcrSeqsCommand::readOligos(){
827         try {
828                 ifstream inOligos;
829                 m->openInputFile(oligosfile, inOligos);
830                 
831                 string type, oligo, group;
832                 
833                 while(!inOligos.eof()){
834             
835                         inOligos >> type; 
836             
837                         if(type[0] == '#'){ //ignore
838                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
839                                 m->gobble(inOligos);
840                         }else{
841                                 m->gobble(inOligos);
842                                 //make type case insensitive
843                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
844                                 
845                                 inOligos >> oligo;
846                                 
847                                 for(int i=0;i<oligo.length();i++){
848                                         oligo[i] = toupper(oligo[i]);
849                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
850                                 }
851                                 
852                                 if(type == "FORWARD"){
853                                         // get rest of line in case there is a primer name
854                                         while (!inOligos.eof()) {       
855                         char c = inOligos.get(); 
856                         if (c == 10 || c == 13){        break;  } 
857                         else if (c == 32 || c == 9){;} //space or tab
858                                         } 
859                                         primers.push_back(oligo);
860                 }else if(type == "REVERSE"){
861                     string oligoRC = reverseOligo(oligo);
862                     revPrimer.push_back(oligoRC);
863                     //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
864                                 }else if(type == "BARCODE"){
865                                         inOligos >> group;
866                                 }else if((type == "LINKER")||(type == "SPACER")) {;}
867                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
868                         }
869                         m->gobble(inOligos);
870                 }       
871                 inOligos.close();
872                 
873                 if ((primers.size() == 0) && (revPrimer.size() == 0)) {
874                         m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers.  Please correct."); m->mothurOutEndLine();
875             m->control_pressed = true;
876                         return false;
877                 }
878         
879         return true;
880         
881     }catch(exception& e) {
882                 m->errorOut(e, "PcrSeqsCommand", "readOligos");
883                 exit(1);
884         }
885 }
886 //***************************************************************************************************************
887 bool PcrSeqsCommand::readEcoli(){
888         try {
889                 ifstream in;
890                 m->openInputFile(ecolifile, in);
891                 
892         //read seq
893         if (!in.eof()){ 
894             Sequence ecoli(in); 
895             length = ecoli.getAligned().length();
896             start = ecoli.getStartPos();
897             end = ecoli.getEndPos();
898         }else { in.close(); m->control_pressed = true; return false; }
899         in.close();    
900                         
901         return true;
902     }
903         catch(exception& e) {
904                 m->errorOut(e, "PcrSeqsCommand", "readEcoli");
905                 exit(1);
906         }
907     
908 }
909 //***************************************************************************************************************
910 int PcrSeqsCommand::writeAccnos(set<string> badNames){
911         try {
912                 string thisOutputDir = outputDir;
913                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
914         map<string, string> variables; 
915                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
916                 string outputFileName = getOutputFileName("accnos",variables);
917         outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
918         
919         ofstream out;
920         m->openOutputFile(outputFileName, out);
921         
922         for (set<string>::iterator it = badNames.begin(); it != badNames.end(); it++) {
923             if (m->control_pressed) { break; }
924             out << (*it) << endl;
925         }
926         
927         out.close();
928         return 0;
929     }
930         catch(exception& e) {
931                 m->errorOut(e, "PcrSeqsCommand", "writeAccnos");
932                 exit(1);
933         }
934     
935 }
936 //******************************************************************/
937 bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
938         try {
939                 bool success = 1;
940                 int length = oligo.length();
941                 
942                 for(int i=0;i<length;i++){
943                         
944                         if(oligo[i] != seq[i]){
945                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
946                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
947                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
948                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
949                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
950                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
951                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
952                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
953                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
954                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
955                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
956                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
957                                 
958                                 if(success == 0)        {       break;   }
959                         }
960                         else{
961                                 success = 1;
962                         }
963                 }
964                 
965                 return success;
966         }
967         catch(exception& e) {
968                 m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
969                 exit(1);
970         }
971         
972 }
973 //***************************************************************************************************************
974 int PcrSeqsCommand::readName(set<string>& names){
975         try {
976                 string thisOutputDir = outputDir;
977                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
978                 map<string, string> variables; 
979                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
980         variables["[extension]"] = m->getExtension(namefile);
981                 string outputFileName = getOutputFileName("name", variables);
982         
983                 ofstream out;
984                 m->openOutputFile(outputFileName, out);
985         
986                 ifstream in;
987                 m->openInputFile(namefile, in);
988                 string name, firstCol, secondCol;
989                 
990                 bool wroteSomething = false;
991                 int removedCount = 0;
992                 
993                 while(!in.eof()){
994                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
995                         
996                         in >> firstCol;         m->gobble(in);          
997                         in >> secondCol;                        
998                         
999             string savedSecond = secondCol;
1000                         vector<string> parsedNames;
1001                         m->splitAtComma(secondCol, parsedNames);
1002                         
1003                         vector<string> validSecond;  validSecond.clear();
1004                         for (int i = 0; i < parsedNames.size(); i++) {
1005                                 if (names.count(parsedNames[i]) == 0) {
1006                                         validSecond.push_back(parsedNames[i]);
1007                                 }
1008                         }
1009                         
1010                         if (validSecond.size() != parsedNames.size()) {  //we want to get rid of someone, so get rid of everyone
1011                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
1012                                 removedCount += parsedNames.size();
1013                         }else {
1014                 out << firstCol << '\t' << savedSecond << endl;
1015                 wroteSomething = true;
1016             }
1017                         m->gobble(in);
1018                 }
1019                 in.close();
1020                 out.close();
1021                 
1022                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1023                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
1024                 
1025                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
1026                 
1027                 return 0;
1028         }
1029         catch(exception& e) {
1030                 m->errorOut(e, "PcrSeqsCommand", "readName");
1031                 exit(1);
1032         }
1033 }
1034 //**********************************************************************************************************************
1035 int PcrSeqsCommand::readGroup(set<string> names){
1036         try {
1037                 string thisOutputDir = outputDir;
1038                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
1039                 map<string, string> variables; 
1040                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
1041         variables["[extension]"] = m->getExtension(groupfile);
1042                 string outputFileName = getOutputFileName("group", variables);
1043                 
1044                 ofstream out;
1045                 m->openOutputFile(outputFileName, out);
1046         
1047                 ifstream in;
1048                 m->openInputFile(groupfile, in);
1049                 string name, group;
1050                 
1051                 bool wroteSomething = false;
1052                 int removedCount = 0;
1053                 
1054                 while(!in.eof()){
1055                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1056                         
1057                         in >> name;                             //read from first column
1058                         in >> group;                    //read from second column
1059                         
1060                         //if this name is in the accnos file
1061                         if (names.count(name) == 0) {
1062                                 wroteSomething = true;
1063                                 out << name << '\t' << group << endl;
1064                         }else {  removedCount++;  }
1065             
1066                         m->gobble(in);
1067                 }
1068                 in.close();
1069                 out.close();
1070                 
1071                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1072                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
1073                 
1074                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
1075         
1076                 
1077                 return 0;
1078         }
1079         catch(exception& e) {
1080                 m->errorOut(e, "PcrSeqsCommand", "readGroup");
1081                 exit(1);
1082         }
1083 }
1084 //**********************************************************************************************************************
1085 int PcrSeqsCommand::readTax(set<string> names){
1086         try {
1087                 string thisOutputDir = outputDir;
1088                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1089                 map<string, string> variables; 
1090                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
1091         variables["[extension]"] = m->getExtension(taxfile);
1092                 string outputFileName = getOutputFileName("taxonomy", variables);
1093
1094                 ofstream out;
1095                 m->openOutputFile(outputFileName, out);
1096         
1097                 ifstream in;
1098                 m->openInputFile(taxfile, in);
1099                 string name, tax;
1100                 
1101                 bool wroteSomething = false;
1102                 int removedCount = 0;
1103                 
1104                 while(!in.eof()){
1105                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1106                         
1107                         in >> name;                             //read from first column
1108                         in >> tax;                      //read from second column
1109                         
1110                         //if this name is in the accnos file
1111                         if (names.count(name) == 0) {
1112                                 wroteSomething = true;
1113                                 out << name << '\t' << tax << endl;
1114                         }else {  removedCount++;  }
1115             
1116                         m->gobble(in);
1117                 }
1118                 in.close();
1119                 out.close();
1120                 
1121                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1122                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
1123                 
1124                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
1125                 
1126                 return 0;
1127         }
1128         catch(exception& e) {
1129                 m->errorOut(e, "PcrSeqsCommand", "readTax");
1130                 exit(1);
1131         }
1132 }
1133 //***************************************************************************************************************
1134 int PcrSeqsCommand::readCount(set<string> badSeqNames){
1135         try {
1136                 ifstream in;
1137                 m->openInputFile(countfile, in);
1138                 set<string>::iterator it;
1139                 
1140                 map<string, string> variables; 
1141                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
1142         variables["[extension]"] = m->getExtension(countfile);
1143                 string goodCountFile = getOutputFileName("count", variables);
1144
1145         outputNames.push_back(goodCountFile);  outputTypes["count"].push_back(goodCountFile);
1146                 ofstream goodCountOut;  m->openOutputFile(goodCountFile, goodCountOut);
1147                 
1148         string headers = m->getline(in); m->gobble(in);
1149         goodCountOut << headers << endl;
1150         
1151         string name, rest; int thisTotal, removedCount; removedCount = 0;
1152         bool wroteSomething = false;
1153         while (!in.eof()) {
1154             
1155                         if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
1156             
1157                         in >> name; m->gobble(in); 
1158             in >> thisTotal; m->gobble(in);
1159             rest = m->getline(in); m->gobble(in);
1160             
1161                         if (badSeqNames.count(name) != 0) { removedCount+=thisTotal; }
1162                         else{
1163                 wroteSomething = true;
1164                                 goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
1165                         }
1166                 }
1167                 in.close();
1168                 goodCountOut.close();
1169         
1170         if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1171         
1172         if (wroteSomething == false) {  m->mothurOut("Your count file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
1173         
1174         //check for groups that have been eliminated
1175         CountTable ct;
1176         if (ct.testGroups(goodCountFile)) {
1177             ct.readTable(goodCountFile);
1178             ct.printTable(goodCountFile);
1179         }
1180                 
1181                 if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1182         
1183         m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
1184
1185                 
1186                 return 0;
1187         
1188         }
1189         catch(exception& e) {
1190                 m->errorOut(e, "PcrSeqsCommand", "readCOunt");
1191                 exit(1);
1192         }
1193 }
1194 /**************************************************************************************/
1195
1196