]> git.donarmstrong.com Git - mothur.git/blob - prcseqscommand.cpp
added classify.shared command and random forest files. added count file to pcr.seqs...
[mothur.git] / prcseqscommand.cpp
1 //
2 //  prcseqscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/14/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "pcrseqscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> PcrSeqsCommand::setParameters(){ 
13         try {
14                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
15                 CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(poligos);
16         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
17         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
19         CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
20         CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(pecoli);
21                 CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
22                 CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
23                 CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "",false,false); parameters.push_back(pnomatch);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer);
26         CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots);
27         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29         
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "PcrSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string PcrSeqsCommand::getHelpString(){ 
41         try {
42                 string helpString = "";
43                 helpString += "The pcr.seqs command reads a fasta file.\n";
44         helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
45                 helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
46         helpString += "The start parameter allows you to provide a starting position to trim to.\n";
47         helpString += "The end parameter allows you to provide a ending position to trim from.\n";
48         helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file.  if nomatch=true, then do nothing to sequence.\n";
49         helpString += "The processors parameter allows you to use multiple processors.\n";
50         helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
51         helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
52                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
53                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "PcrSeqsCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61
62 //**********************************************************************************************************************
63 string PcrSeqsCommand::getOutputFileNameTag(string type, string inputName=""){  
64         try {
65         string outputFileName = "";
66                 map<string, vector<string> >::iterator it;
67         
68         //is this a type this command creates
69         it = outputTypes.find(type);
70         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
71         else {
72             if (type == "fasta") {  outputFileName =  "pcr.fasta"; }
73             else if (type == "taxonomy") {  outputFileName =  "pcr" + m->getExtension(inputName); }
74             else if (type == "group") {  outputFileName =  "pcr" + m->getExtension(inputName); }
75             else if (type == "name") {  outputFileName =  "pcr" + m->getExtension(inputName); }
76             else if (type == "count") {  outputFileName =  "pcr" + m->getExtension(inputName); }
77             else if (type == "accnos") {  outputFileName =  "bad.accnos"; }
78             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
79         }
80         return outputFileName;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "PcrSeqsCommand", "getOutputFileNameTag");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88
89 PcrSeqsCommand::PcrSeqsCommand(){       
90         try {
91                 abort = true; calledHelp = true; 
92                 setParameters();
93                 vector<string> tempOutNames;
94                 outputTypes["fasta"] = tempOutNames;
95                 outputTypes["taxonomy"] = tempOutNames;
96                 outputTypes["group"] = tempOutNames;
97                 outputTypes["name"] = tempOutNames;
98         outputTypes["count"] = tempOutNames;
99         outputTypes["accnos"] = tempOutNames;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
103                 exit(1);
104         }
105 }
106 //***************************************************************************************************************
107
108 PcrSeqsCommand::PcrSeqsCommand(string option)  {
109         try {
110                 
111                 abort = false; calledHelp = false;   
112                 
113                 //allow user to run help
114                 if(option == "help") { help(); abort = true; calledHelp = true; }
115                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
116                 
117                 else {
118                         vector<string> myArray = setParameters();
119                         
120                         OptionParser parser(option);
121                         map<string,string> parameters = parser.getParameters();
122                         
123                         ValidParameters validParameter;
124                         map<string,string>::iterator it;
125                         
126                         //check to make sure all parameters are valid for command
127                         for (it = parameters.begin(); it != parameters.end(); it++) { 
128                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
129                         }
130                         
131                         //initialize outputTypes
132                         vector<string> tempOutNames;
133                         outputTypes["fasta"] = tempOutNames;
134                         outputTypes["taxonomy"] = tempOutNames;
135                         outputTypes["group"] = tempOutNames;
136                         outputTypes["name"] = tempOutNames;
137             outputTypes["accnos"] = tempOutNames;
138             outputTypes["count"] = tempOutNames;
139                         
140                         //if the user changes the input directory command factory will send this info to us in the output parameter 
141                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
142                         if (inputDir == "not found"){   inputDir = "";          }
143                         else {
144                                 string path;
145                                 it = parameters.find("fasta");
146                                 //user has given a template file
147                                 if(it != parameters.end()){ 
148                                         path = m->hasPath(it->second);
149                                         //if the user has not given a path then, add inputdir. else leave path alone.
150                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
151                                 }
152                                 
153                                 it = parameters.find("oligos");
154                                 //user has given a template file
155                                 if(it != parameters.end()){ 
156                                         path = m->hasPath(it->second);
157                                         //if the user has not given a path then, add inputdir. else leave path alone.
158                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
159                                 }
160                 
161                 it = parameters.find("ecoli");
162                                 //user has given a template file
163                                 if(it != parameters.end()){ 
164                                         path = m->hasPath(it->second);
165                                         //if the user has not given a path then, add inputdir. else leave path alone.
166                                         if (path == "") {       parameters["ecoli"] = inputDir + it->second;            }
167                                 }
168                                 
169                                 it = parameters.find("taxonomy");
170                                 //user has given a template file
171                                 if(it != parameters.end()){ 
172                                         path = m->hasPath(it->second);
173                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
175                                 }
176                                 
177                                 it = parameters.find("name");
178                                 //user has given a template file
179                                 if(it != parameters.end()){ 
180                                         path = m->hasPath(it->second);
181                                         //if the user has not given a path then, add inputdir. else leave path alone.
182                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
183                                 }
184                 
185                 it = parameters.find("group");
186                                 //user has given a template file
187                                 if(it != parameters.end()){ 
188                                         path = m->hasPath(it->second);
189                                         //if the user has not given a path then, add inputdir. else leave path alone.
190                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
191                                 }
192                 
193                 it = parameters.find("count");
194                                 //user has given a template file
195                                 if(it != parameters.end()){ 
196                                         path = m->hasPath(it->second);
197                                         //if the user has not given a path then, add inputdir. else leave path alone.
198                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
199                                 }
200                                 
201                         }
202             
203                         
204                         //check for required parameters
205                         fastafile = validParameter.validFile(parameters, "fasta", true);
206                         if (fastafile == "not found") {                                 
207                                 fastafile = m->getFastaFile(); 
208                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
209                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
210                         }else if (fastafile == "not open") { fastafile = ""; abort = true; }    
211                         else { m->setFastaFile(fastafile); }
212                         
213             //if the user changes the output directory command factory will send this info to us in the output parameter 
214                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
215
216                         //check for optional parameter and set defaults
217                         // ...at some point should added some additional type checking...
218                         string temp;
219                         temp = validParameter.validFile(parameters, "keepprimer", false);  if (temp == "not found")    {        temp = "f";     }
220                         keepprimer = m->isTrue(temp);   
221             
222             temp = validParameter.validFile(parameters, "keepdots", false);  if (temp == "not found")    {      temp = "t";     }
223                         keepdots = m->isTrue(temp);     
224             
225                         temp = validParameter.validFile(parameters, "oligos", true);
226                         if (temp == "not found"){       oligosfile = "";                }
227                         else if(temp == "not open"){    oligosfile = ""; abort = true;  } 
228                         else                                    {       oligosfile = temp; m->setOligosFile(oligosfile);                }
229                         
230             ecolifile = validParameter.validFile(parameters, "ecoli", true);
231                         if (ecolifile == "not found"){  ecolifile = "";         }
232                         else if(ecolifile == "not open"){       ecolifile = ""; abort = true;   } 
233                         
234             namefile = validParameter.validFile(parameters, "name", true);
235                         if (namefile == "not found"){   namefile = "";          }
236                         else if(namefile == "not open"){        namefile = ""; abort = true;    } 
237             else { m->setNameFile(namefile); }
238             
239             groupfile = validParameter.validFile(parameters, "group", true);
240                         if (groupfile == "not found"){  groupfile = "";         }
241                         else if(groupfile == "not open"){       groupfile = ""; abort = true;   } 
242             else { m->setGroupFile(groupfile); }
243             
244             countfile = validParameter.validFile(parameters, "count", true);
245                         if (countfile == "not open") { countfile = ""; abort = true; }
246                         else if (countfile == "not found") { countfile = "";  } 
247                         else { m->setCountTableFile(countfile); }
248             
249             if ((namefile != "") && (countfile != "")) {
250                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
251             }
252                         
253             if ((groupfile != "") && (countfile != "")) {
254                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
255             }
256             
257             taxfile = validParameter.validFile(parameters, "taxonomy", true);
258                         if (taxfile == "not found"){    taxfile = "";           }
259                         else if(taxfile == "not open"){ taxfile = ""; abort = true;     } 
260             else { m->setTaxonomyFile(taxfile); }
261                                                 
262                         temp = validParameter.validFile(parameters, "start", false);    if (temp == "not found") { temp = "-1"; }
263                         m->mothurConvert(temp, start);
264             
265             temp = validParameter.validFile(parameters, "end", false);  if (temp == "not found") { temp = "-1"; }
266                         m->mothurConvert(temp, end);
267                         
268                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
269                         m->setProcessors(temp);
270                         m->mothurConvert(temp, processors); 
271                         
272             nomatch = validParameter.validFile(parameters, "nomatch", false);   if (nomatch == "not found") { nomatch = "reject"; }
273                         
274             if ((nomatch != "reject") && (nomatch != "keep")) { m->mothurOut("[ERROR]: " + nomatch + " is not a valid entry for nomatch. Choices are reject and keep.\n");  abort = true; }
275             
276             //didnt set anything
277                         if ((oligosfile == "") && (ecolifile == "") && (start == -1) && (end == -1)) {
278                 m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
279             }
280             
281             if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
282             
283             if ((ecolifile != "") && (start != -1) && (end != -1)) {
284                 m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend.  When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
285             }
286
287             
288             if ((oligosfile != "") && (ecolifile != "")) {
289                  m->mothurOut("[ERROR]: You can not use an ecoli file at the same time as an oligos file.\n"); abort = true;
290             }
291                         
292                         //check to make sure you didn't forget the name file by mistake                 
293                         if (countfile == "") { 
294                 if (namefile == "") {
295                     vector<string> files; files.push_back(fastafile);
296                     parser.getNameFile(files);
297                 }
298             }
299                 }
300         
301         }
302         catch(exception& e) {
303                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
304                 exit(1);
305         }
306 }
307 //***************************************************************************************************************
308
309 int PcrSeqsCommand::execute(){
310         try{
311         
312                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
313                 
314         int start = time(NULL);
315         
316         string thisOutputDir = outputDir;
317                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
318                 string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
319                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
320         
321         string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "scrap." + getOutputFileNameTag("fasta");
322                 
323                 
324         length = 0;
325                 if(oligosfile != ""){    readOligos();     }  if (m->control_pressed) {  return 0; }
326         if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
327         
328         vector<unsigned long long> positions; 
329         int numFastaSeqs = 0;
330 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
331         positions = m->divideFile(fastafile, processors);
332         for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
333 #else
334         if (processors == 1) {
335             lines.push_back(linePair(0, 1000));
336         }else {
337             positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
338             if (positions.size() < processors) { processors = positions.size(); }
339             
340             //figure out how many sequences you have to process
341             int numSeqsPerProcessor = numFastaSeqs / processors;
342             for (int i = 0; i < processors; i++) {
343                 int startIndex =  i * numSeqsPerProcessor;
344                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
345                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
346             }
347         }
348 #endif
349         if (m->control_pressed) {  return 0; }
350
351         set<string> badNames;
352         if(processors == 1) {    numFastaSeqs = driverPcr(fastafile, trimSeqFile, badSeqFile, badNames, lines[0]);   }
353         else                {    numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames);       }  
354                 
355                 if (m->control_pressed) {  return 0; }          
356         
357         //don't write or keep if blank
358         if (badNames.size() != 0)   { writeAccnos(badNames);        }   
359         if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile);  }
360         else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
361         
362         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
363         if (namefile != "")                     {               readName(badNames);             }   
364         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
365         if (groupfile != "")            {               readGroup(badNames);    }
366         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
367                 if (taxfile != "")                      {               readTax(badNames);              }
368                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
369                 if (countfile != "")                    {               readCount(badNames);            }
370                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
371       
372         m->mothurOutEndLine();
373                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
374                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
375                 m->mothurOutEndLine();
376                 m->mothurOutEndLine();
377                 
378                 //set fasta file as new current fastafile
379                 string current = "";
380                 itTypes = outputTypes.find("fasta");
381                 if (itTypes != outputTypes.end()) {
382                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
383                 }
384                 
385                 itTypes = outputTypes.find("name");
386                 if (itTypes != outputTypes.end()) {
387                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
388                 }
389                 
390                 itTypes = outputTypes.find("group");
391                 if (itTypes != outputTypes.end()) {
392                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
393                 }
394                 
395                 itTypes = outputTypes.find("accnos");
396                 if (itTypes != outputTypes.end()) {
397                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
398                 }
399                 
400                 itTypes = outputTypes.find("taxonomy");
401                 if (itTypes != outputTypes.end()) {
402                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
403                 }
404         
405         itTypes = outputTypes.find("count");
406                 if (itTypes != outputTypes.end()) {
407                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
408                 }
409         
410                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
411                 m->mothurOutEndLine();
412
413                 
414                 return 0;       
415         
416         }
417         catch(exception& e) {
418                 m->errorOut(e, "PcrSeqsCommand", "execute");
419                 exit(1);
420         }
421 }
422 /**************************************************************************************************/
423 int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string badFileName, set<string>& badSeqNames) {
424         try {
425         
426         vector<int> processIDS;   
427         int process = 1;
428                 int num = 0;
429         
430 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
431         
432                 //loop through and create all the processes you want
433                 while (process != processors) {
434                         int pid = fork();
435                         
436                         if (pid > 0) {
437                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
438                                 process++;
439                         }else if (pid == 0){
440                                 num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badSeqNames, lines[process]);
441                                 
442                                 //pass numSeqs to parent
443                                 ofstream out;
444                                 string tempFile = filename + toString(getpid()) + ".num.temp";
445                                 m->openOutputFile(tempFile, out);
446                                 out << num << '\t' << badSeqNames.size() << endl;
447                 for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
448                     out << (*it) << endl;
449                 }
450                                 out.close();
451                                 
452                                 exit(0);
453                         }else { 
454                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
455                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
456                                 exit(0);
457                         }
458                 }
459                 
460         num = driverPcr(filename, goodFileName, badFileName, badSeqNames, lines[0]);
461         
462                 //force parent to wait until all the processes are done
463                 for (int i=0;i<processIDS.size();i++) { 
464                         int temp = processIDS[i];
465                         wait(&temp);
466                 }
467                 
468                 for (int i = 0; i < processIDS.size(); i++) {
469                         ifstream in;
470                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
471                         m->openInputFile(tempFile, in);
472             int numBadNames = 0; string name = "";
473                         if (!in.eof()) { int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in); }
474             for (int j = 0; j < numBadNames; j++) {
475                 in >> name; m->gobble(in);
476                 badSeqNames.insert(name);
477             }
478                         in.close(); m->mothurRemove(tempFile);
479             
480             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
481             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
482             
483             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
484             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
485                 }
486     #else
487         
488         //////////////////////////////////////////////////////////////////////////////////////////////////////
489                 //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
490                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
491                 //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
492                 //////////////////////////////////////////////////////////////////////////////////////////////////////
493                 
494                 vector<pcrData*> pDataArray; 
495                 DWORD   dwThreadIdArray[processors-1];
496                 HANDLE  hThreadArray[processors-1]; 
497                 
498                 //Create processor worker threads.
499                 for( int i=0; i<processors-1; i++ ){
500             
501             string extension = "";
502             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
503             
504                         // Allocate memory for thread data.
505                         pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
506                         pDataArray.push_back(tempPcr);
507                         
508                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
509                         hThreadArray[i] = CreateThread(NULL, 0, MyPcrThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
510                 }
511                 
512         //do your part
513         num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"),badSeqNames, lines[processors-1]);
514         processIDS.push_back(processors-1);
515         
516                 //Wait until all threads have terminated.
517                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
518                 
519                 //Close all thread handles and free memory allocations.
520                 for(int i=0; i < pDataArray.size(); i++){
521                         num += pDataArray[i]->count;
522             for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it);       }
523                         CloseHandle(hThreadArray[i]);
524                         delete pDataArray[i];
525                 }
526         
527         for (int i = 0; i < processIDS.size(); i++) {
528             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
529             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
530             
531             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
532             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
533                 }
534         
535 #endif  
536         
537         return num;
538         
539         }
540         catch(exception& e) {
541                 m->errorOut(e, "PcrSeqsCommand", "createProcesses");
542                 exit(1);
543         }
544 }
545
546 //**********************************************************************************************************************
547 int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, set<string>& badSeqNames, linePair filePos){
548         try {
549                 ofstream goodFile;
550                 m->openOutputFile(goodFasta, goodFile);
551         
552         ofstream badFile;
553                 m->openOutputFile(badFasta, badFile);
554                 
555                 ifstream inFASTA;
556                 m->openInputFile(filename, inFASTA);
557         
558                 inFASTA.seekg(filePos.start);
559         
560                 bool done = false;
561                 int count = 0;
562         set<int> lengths;
563         
564                 while (!done) {
565             
566                         if (m->control_pressed) {  break; }
567                         
568                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
569             
570             string trashCode = "";
571                         if (currSeq.getName() != "") {
572                 
573                 bool goodSeq = true;
574                 if (oligosfile != "") {
575                     map<int, int> mapAligned;
576                     bool aligned = isAligned(currSeq.getAligned(), mapAligned);
577                     
578                     //process primers
579                     if (primers.size() != 0) {
580                         int primerStart = 0; int primerEnd = 0;
581                         bool good = findForward(currSeq, primerStart, primerEnd);
582                         
583                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
584                         else{
585                             //are you aligned
586                             if (aligned) { 
587                                 if (!keepprimer)    {  
588                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
589                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
590                                 } 
591                                 else                {  
592                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
593                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
594                                 }
595                             }else { 
596                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
597                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
598                             }
599                         }
600                     }
601                     
602                     //process reverse primers
603                     if (revPrimer.size() != 0) {
604                         int primerStart = 0; int primerEnd = 0;
605                         bool good = findReverse(currSeq, primerStart, primerEnd);
606                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
607                         else{ 
608                             //are you aligned
609                             if (aligned) { 
610                                 if (!keepprimer)    {  
611                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
612                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
613                                 } 
614                                 else                {  
615                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
616                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
617                                 } 
618                             }
619                             else { 
620                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
621                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
622                             }
623                         }
624                     }
625                 }else if (ecolifile != "") {
626                     //make sure the seqs are aligned
627                     lengths.insert(currSeq.getAligned().length());
628                     if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
629                     else if (currSeq.getAligned().length() != length) {
630                         m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; 
631                     }else {
632                         if (keepdots)   { 
633                             currSeq.filterToPos(start); 
634                             currSeq.filterFromPos(end);
635                         }else {
636                             string seqString = currSeq.getAligned().substr(0, end);
637                             seqString = seqString.substr(start);
638                             currSeq.setAligned(seqString); 
639                         }
640                     }
641                 }else{ //using start and end to trim
642                     //make sure the seqs are aligned
643                     lengths.insert(currSeq.getAligned().length());
644                     if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
645                     else {
646                         if (end != -1) {
647                             if (end > currSeq.getAligned().length()) {  m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
648                             else {
649                                 if (keepdots)   { currSeq.filterFromPos(end); }
650                                 else {
651                                     string seqString = currSeq.getAligned().substr(0, end);
652                                     currSeq.setAligned(seqString); 
653                                 }
654                             }
655                         }
656                         if (start != -1) { 
657                             if (keepdots)   {  currSeq.filterToPos(start);  }
658                             else {
659                                 string seqString = currSeq.getAligned().substr(start);
660                                 currSeq.setAligned(seqString); 
661                             }
662                         }
663                     }
664                 }
665                 
666                 //trimming removed all bases
667                 if (currSeq.getUnaligned() == "") { goodSeq = false; }
668                 
669                                 if(goodSeq == 1)    {   currSeq.printSequence(goodFile);        }
670                                 else {  
671                     badSeqNames.insert(currSeq.getName()); 
672                     currSeq.setName(currSeq.getName() + '|' + trashCode);
673                     currSeq.printSequence(badFile); 
674                 }
675                 count++;
676                         }
677                         
678 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
679             unsigned long long pos = inFASTA.tellg();
680             if ((pos == -1) || (pos >= filePos.end)) { break; }
681 #else
682             if (inFASTA.eof()) { break; }
683 #endif
684                         
685                         //report progress
686                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
687                 }
688                 //report progress
689                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
690                 
691         badFile.close();
692                 goodFile.close();
693                 inFASTA.close();
694                 
695                 return count;
696         }
697         catch(exception& e) {
698                 m->errorOut(e, "PcrSeqsCommand", "driverPcr");
699                 exit(1);
700         }
701 }
702 //********************************************************************/
703 bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
704         try {
705                 
706                 string rawSequence = seq.getUnaligned();
707                 
708                 for(int j=0;j<primers.size();j++){
709                         string oligo = primers[j];
710                         
711                         if(rawSequence.length() < oligo.length()) {  break;  }
712                         
713                         //search for primer
714             int olength = oligo.length();
715             for (int j = 0; j < rawSequence.length()-olength; j++){
716                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
717                 string rawChunk = rawSequence.substr(j, olength);
718                 if(compareDNASeq(oligo, rawChunk)) {
719                     primerStart = j;
720                     primerEnd = primerStart + olength;
721                     return true;
722                 }
723                 
724             }
725         }       
726                 
727         primerStart = 0; primerEnd = 0;
728                 return false;
729                 
730         }
731         catch(exception& e) {
732                 m->errorOut(e, "TrimOligos", "stripForward");
733                 exit(1);
734         }
735 }
736 //******************************************************************/
737 bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
738         try {
739                 
740                 string rawSequence = seq.getUnaligned();
741                 
742                 for(int i=0;i<revPrimer.size();i++){
743                         string oligo = revPrimer[i];
744                         if(rawSequence.length() < oligo.length()) {  break;  }
745                         
746                         //search for primer
747             int olength = oligo.length();
748             for (int j = rawSequence.length()-olength; j >= 0; j--){
749                  if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
750                 string rawChunk = rawSequence.substr(j, olength);
751             
752                 if(compareDNASeq(oligo, rawChunk)) {
753                     primerStart = j;
754                     primerEnd = primerStart + olength;
755                     return true;
756                 }
757                 
758             }
759                 }       
760                 
761         primerStart = 0; primerEnd = 0;
762                 return false;
763         }
764         catch(exception& e) {
765                 m->errorOut(e, "PcrSeqsCommand", "findReverse");
766                 exit(1);
767         }
768 }
769 //********************************************************************/
770 bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
771         try {
772         bool isAligned = false;
773         
774         int countBases = 0;
775         for (int i = 0; i < seq.length(); i++) {
776             if (!isalpha(seq[i])) { isAligned = true; }
777             else { aligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned.
778         }                                                   //ie. the 3rd base may be at spot 10 in the alignment
779                                                             //later when we trim we want to trim from spot 10.
780         return isAligned;
781     }
782         catch(exception& e) {
783                 m->errorOut(e, "PcrSeqsCommand", "isAligned");
784                 exit(1);
785         }
786 }
787 //********************************************************************/
788 string PcrSeqsCommand::reverseOligo(string oligo){
789         try {
790         string reverse = "";
791        
792         for(int i=oligo.length()-1;i>=0;i--){
793             
794             if(oligo[i] == 'A')         {       reverse += 'T'; }
795             else if(oligo[i] == 'T'){   reverse += 'A'; }
796             else if(oligo[i] == 'U'){   reverse += 'A'; }
797             
798             else if(oligo[i] == 'G'){   reverse += 'C'; }
799             else if(oligo[i] == 'C'){   reverse += 'G'; }
800             
801             else if(oligo[i] == 'R'){   reverse += 'Y'; }
802             else if(oligo[i] == 'Y'){   reverse += 'R'; }
803             
804             else if(oligo[i] == 'M'){   reverse += 'K'; }
805             else if(oligo[i] == 'K'){   reverse += 'M'; }
806             
807             else if(oligo[i] == 'W'){   reverse += 'W'; }
808             else if(oligo[i] == 'S'){   reverse += 'S'; }
809             
810             else if(oligo[i] == 'B'){   reverse += 'V'; }
811             else if(oligo[i] == 'V'){   reverse += 'B'; }
812             
813             else if(oligo[i] == 'D'){   reverse += 'H'; }
814             else if(oligo[i] == 'H'){   reverse += 'D'; }
815
816             else                                                {       reverse += 'N'; }
817         }
818
819         
820         return reverse;
821     }
822         catch(exception& e) {
823                 m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
824                 exit(1);
825         }
826 }
827
828 //***************************************************************************************************************
829 bool PcrSeqsCommand::readOligos(){
830         try {
831                 ifstream inOligos;
832                 m->openInputFile(oligosfile, inOligos);
833                 
834                 string type, oligo, group;
835                 
836                 while(!inOligos.eof()){
837             
838                         inOligos >> type; 
839             
840                         if(type[0] == '#'){ //ignore
841                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
842                                 m->gobble(inOligos);
843                         }else{
844                                 m->gobble(inOligos);
845                                 //make type case insensitive
846                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
847                                 
848                                 inOligos >> oligo;
849                                 
850                                 for(int i=0;i<oligo.length();i++){
851                                         oligo[i] = toupper(oligo[i]);
852                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
853                                 }
854                                 
855                                 if(type == "FORWARD"){
856                                         // get rest of line in case there is a primer name
857                                         while (!inOligos.eof()) {       
858                         char c = inOligos.get(); 
859                         if (c == 10 || c == 13){        break;  } 
860                         else if (c == 32 || c == 9){;} //space or tab
861                                         } 
862                                         primers.push_back(oligo);
863                 }else if(type == "REVERSE"){
864                     string oligoRC = reverseOligo(oligo);
865                     revPrimer.push_back(oligoRC);
866                     //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
867                                 }else if(type == "BARCODE"){
868                                         inOligos >> group;
869                                 }else if((type == "LINKER")||(type == "SPACER")) {;}
870                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
871                         }
872                         m->gobble(inOligos);
873                 }       
874                 inOligos.close();
875                 
876                 if ((primers.size() == 0) && (revPrimer.size() == 0)) {
877                         m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers.  Please correct."); m->mothurOutEndLine();
878             m->control_pressed = true;
879                         return false;
880                 }
881         
882         return true;
883         
884     }catch(exception& e) {
885                 m->errorOut(e, "PcrSeqsCommand", "readOligos");
886                 exit(1);
887         }
888 }
889 //***************************************************************************************************************
890 bool PcrSeqsCommand::readEcoli(){
891         try {
892                 ifstream in;
893                 m->openInputFile(ecolifile, in);
894                 
895         //read seq
896         if (!in.eof()){ 
897             Sequence ecoli(in); 
898             length = ecoli.getAligned().length();
899             start = ecoli.getStartPos();
900             end = ecoli.getEndPos();
901         }else { in.close(); m->control_pressed = true; return false; }
902         in.close();    
903                         
904         return true;
905     }
906         catch(exception& e) {
907                 m->errorOut(e, "PcrSeqsCommand", "readEcoli");
908                 exit(1);
909         }
910     
911 }
912 //***************************************************************************************************************
913 int PcrSeqsCommand::writeAccnos(set<string> badNames){
914         try {
915                 string thisOutputDir = outputDir;
916                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
917                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos");
918         outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
919         
920         ofstream out;
921         m->openOutputFile(outputFileName, out);
922         
923         for (set<string>::iterator it = badNames.begin(); it != badNames.end(); it++) {
924             if (m->control_pressed) { break; }
925             out << (*it) << endl;
926         }
927         
928         out.close();
929         return 0;
930     }
931         catch(exception& e) {
932                 m->errorOut(e, "PcrSeqsCommand", "writeAccnos");
933                 exit(1);
934         }
935     
936 }
937 //******************************************************************/
938 bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
939         try {
940                 bool success = 1;
941                 int length = oligo.length();
942                 
943                 for(int i=0;i<length;i++){
944                         
945                         if(oligo[i] != seq[i]){
946                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
947                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
948                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
949                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
950                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
951                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
952                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
953                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
954                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
955                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
956                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
957                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
958                                 
959                                 if(success == 0)        {       break;   }
960                         }
961                         else{
962                                 success = 1;
963                         }
964                 }
965                 
966                 return success;
967         }
968         catch(exception& e) {
969                 m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
970                 exit(1);
971         }
972         
973 }
974 //***************************************************************************************************************
975 int PcrSeqsCommand::readName(set<string>& names){
976         try {
977                 string thisOutputDir = outputDir;
978                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
979                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
980         
981                 ofstream out;
982                 m->openOutputFile(outputFileName, out);
983         
984                 ifstream in;
985                 m->openInputFile(namefile, in);
986                 string name, firstCol, secondCol;
987                 
988                 bool wroteSomething = false;
989                 int removedCount = 0;
990                 
991                 while(!in.eof()){
992                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
993                         
994                         in >> firstCol;         m->gobble(in);          
995                         in >> secondCol;                        
996                         
997             string savedSecond = secondCol;
998                         vector<string> parsedNames;
999                         m->splitAtComma(secondCol, parsedNames);
1000                         
1001                         vector<string> validSecond;  validSecond.clear();
1002                         for (int i = 0; i < parsedNames.size(); i++) {
1003                                 if (names.count(parsedNames[i]) == 0) {
1004                                         validSecond.push_back(parsedNames[i]);
1005                                 }
1006                         }
1007                         
1008                         if (validSecond.size() != parsedNames.size()) {  //we want to get rid of someone, so get rid of everyone
1009                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
1010                                 removedCount += parsedNames.size();
1011                         }else {
1012                 out << firstCol << '\t' << savedSecond << endl;
1013                 wroteSomething = true;
1014             }
1015                         m->gobble(in);
1016                 }
1017                 in.close();
1018                 out.close();
1019                 
1020                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1021                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
1022                 
1023                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
1024                 
1025                 return 0;
1026         }
1027         catch(exception& e) {
1028                 m->errorOut(e, "PcrSeqsCommand", "readName");
1029                 exit(1);
1030         }
1031 }
1032 //**********************************************************************************************************************
1033 int PcrSeqsCommand::readGroup(set<string> names){
1034         try {
1035                 string thisOutputDir = outputDir;
1036                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
1037                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
1038                 
1039                 ofstream out;
1040                 m->openOutputFile(outputFileName, out);
1041         
1042                 ifstream in;
1043                 m->openInputFile(groupfile, in);
1044                 string name, group;
1045                 
1046                 bool wroteSomething = false;
1047                 int removedCount = 0;
1048                 
1049                 while(!in.eof()){
1050                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1051                         
1052                         in >> name;                             //read from first column
1053                         in >> group;                    //read from second column
1054                         
1055                         //if this name is in the accnos file
1056                         if (names.count(name) == 0) {
1057                                 wroteSomething = true;
1058                                 out << name << '\t' << group << endl;
1059                         }else {  removedCount++;  }
1060             
1061                         m->gobble(in);
1062                 }
1063                 in.close();
1064                 out.close();
1065                 
1066                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1067                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
1068                 
1069                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
1070         
1071                 
1072                 return 0;
1073         }
1074         catch(exception& e) {
1075                 m->errorOut(e, "PcrSeqsCommand", "readGroup");
1076                 exit(1);
1077         }
1078 }
1079 //**********************************************************************************************************************
1080 int PcrSeqsCommand::readTax(set<string> names){
1081         try {
1082                 string thisOutputDir = outputDir;
1083                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1084                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
1085                 ofstream out;
1086                 m->openOutputFile(outputFileName, out);
1087         
1088                 ifstream in;
1089                 m->openInputFile(taxfile, in);
1090                 string name, tax;
1091                 
1092                 bool wroteSomething = false;
1093                 int removedCount = 0;
1094                 
1095                 while(!in.eof()){
1096                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1097                         
1098                         in >> name;                             //read from first column
1099                         in >> tax;                      //read from second column
1100                         
1101                         //if this name is in the accnos file
1102                         if (names.count(name) == 0) {
1103                                 wroteSomething = true;
1104                                 out << name << '\t' << tax << endl;
1105                         }else {  removedCount++;  }
1106             
1107                         m->gobble(in);
1108                 }
1109                 in.close();
1110                 out.close();
1111                 
1112                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1113                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
1114                 
1115                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
1116                 
1117                 return 0;
1118         }
1119         catch(exception& e) {
1120                 m->errorOut(e, "PcrSeqsCommand", "readTax");
1121                 exit(1);
1122         }
1123 }
1124 //***************************************************************************************************************
1125 int PcrSeqsCommand::readCount(set<string> badSeqNames){
1126         try {
1127                 ifstream in;
1128                 m->openInputFile(countfile, in);
1129                 set<string>::iterator it;
1130                 
1131                 string goodCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
1132         outputNames.push_back(goodCountFile);  outputTypes["count"].push_back(goodCountFile);
1133                 ofstream goodCountOut;  m->openOutputFile(goodCountFile, goodCountOut);
1134                 
1135         string headers = m->getline(in); m->gobble(in);
1136         goodCountOut << headers << endl;
1137         
1138         string name, rest; int thisTotal, removedCount; removedCount = 0;
1139         bool wroteSomething = false;
1140         while (!in.eof()) {
1141             
1142                         if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
1143             
1144                         in >> name; m->gobble(in); 
1145             in >> thisTotal; m->gobble(in);
1146             rest = m->getline(in); m->gobble(in);
1147             
1148                         if (badSeqNames.count(name) != 0) { removedCount+=thisTotal; }
1149                         else{
1150                 wroteSomething = true;
1151                                 goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
1152                         }
1153                 }
1154                 in.close();
1155                 goodCountOut.close();
1156         
1157         if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1158         
1159         if (wroteSomething == false) {  m->mothurOut("Your count file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
1160         
1161         //check for groups that have been eliminated
1162         CountTable ct;
1163         if (ct.testGroups(goodCountFile)) {
1164             ct.readTable(goodCountFile);
1165             ct.printTable(goodCountFile);
1166         }
1167                 
1168                 if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1169         
1170         m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
1171
1172                 
1173                 return 0;
1174         
1175         }
1176         catch(exception& e) {
1177                 m->errorOut(e, "PcrSeqsCommand", "readCOunt");
1178                 exit(1);
1179         }
1180 }
1181 /**************************************************************************************/
1182
1183