]> git.donarmstrong.com Git - mothur.git/blob - prcseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / prcseqscommand.cpp
1 //
2 //  prcseqscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/14/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "pcrseqscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> PcrSeqsCommand::setParameters(){ 
13         try {
14                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
15                 CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false,true); parameters.push_back(poligos);
16         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
17         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
19         CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false,true); parameters.push_back(ptax);
20         CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false); parameters.push_back(pecoli);
21                 CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart);
22                 CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
23                 CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
25                 CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
26         CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
27         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29         
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "PcrSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string PcrSeqsCommand::getHelpString(){ 
41         try {
42                 string helpString = "";
43                 helpString += "The pcr.seqs command reads a fasta file.\n";
44         helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, processors, keepprimer and keepdots.\n";
45                 helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
46         helpString += "The start parameter allows you to provide a starting position to trim to.\n";
47         helpString += "The end parameter allows you to provide a ending position to trim from.\n";
48         helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file.  if nomatch=true, then do nothing to sequence.\n";
49         helpString += "The processors parameter allows you to use multiple processors.\n";
50         helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
51         helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
52                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
53                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "PcrSeqsCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 string PcrSeqsCommand::getOutputPattern(string type) {
63     try {
64         string pattern = "";
65         
66         if (type == "fasta")            {   pattern = "[filename],pcr,[extension]-[filename],[tag],pcr,[extension]";    }
67         else if (type == "taxonomy")    {   pattern = "[filename],pcr,[extension]";    }
68         else if (type == "name")        {   pattern = "[filename],pcr,[extension]";    }
69         else if (type == "group")       {   pattern = "[filename],pcr,[extension]";    }
70         else if (type == "count")       {   pattern = "[filename],pcr,[extension]";    }
71         else if (type == "accnos")      {   pattern = "[filename],bad.accnos";    }
72         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
73         
74         return pattern;
75     }
76     catch(exception& e) {
77         m->errorOut(e, "PcrSeqsCommand", "getOutputPattern");
78         exit(1);
79     }
80 }
81 //**********************************************************************************************************************
82
83 PcrSeqsCommand::PcrSeqsCommand(){       
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["fasta"] = tempOutNames;
89                 outputTypes["taxonomy"] = tempOutNames;
90                 outputTypes["group"] = tempOutNames;
91                 outputTypes["name"] = tempOutNames;
92         outputTypes["count"] = tempOutNames;
93         outputTypes["accnos"] = tempOutNames;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
97                 exit(1);
98         }
99 }
100 //***************************************************************************************************************
101
102 PcrSeqsCommand::PcrSeqsCommand(string option)  {
103         try {
104                 
105                 abort = false; calledHelp = false;   
106                 
107                 //allow user to run help
108                 if(option == "help") { help(); abort = true; calledHelp = true; }
109                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110                 
111                 else {
112                         vector<string> myArray = setParameters();
113                         
114                         OptionParser parser(option);
115                         map<string,string> parameters = parser.getParameters();
116                         
117                         ValidParameters validParameter;
118                         map<string,string>::iterator it;
119                         
120                         //check to make sure all parameters are valid for command
121                         for (it = parameters.begin(); it != parameters.end(); it++) { 
122                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
123                         }
124                         
125                         //initialize outputTypes
126                         vector<string> tempOutNames;
127                         outputTypes["fasta"] = tempOutNames;
128                         outputTypes["taxonomy"] = tempOutNames;
129                         outputTypes["group"] = tempOutNames;
130                         outputTypes["name"] = tempOutNames;
131             outputTypes["accnos"] = tempOutNames;
132             outputTypes["count"] = tempOutNames;
133                         
134                         //if the user changes the input directory command factory will send this info to us in the output parameter 
135                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
136                         if (inputDir == "not found"){   inputDir = "";          }
137                         else {
138                                 string path;
139                                 it = parameters.find("fasta");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
145                                 }
146                                 
147                                 it = parameters.find("oligos");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
153                                 }
154                 
155                 it = parameters.find("ecoli");
156                                 //user has given a template file
157                                 if(it != parameters.end()){ 
158                                         path = m->hasPath(it->second);
159                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                         if (path == "") {       parameters["ecoli"] = inputDir + it->second;            }
161                                 }
162                                 
163                                 it = parameters.find("taxonomy");
164                                 //user has given a template file
165                                 if(it != parameters.end()){ 
166                                         path = m->hasPath(it->second);
167                                         //if the user has not given a path then, add inputdir. else leave path alone.
168                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
169                                 }
170                                 
171                                 it = parameters.find("name");
172                                 //user has given a template file
173                                 if(it != parameters.end()){ 
174                                         path = m->hasPath(it->second);
175                                         //if the user has not given a path then, add inputdir. else leave path alone.
176                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
177                                 }
178                 
179                 it = parameters.find("group");
180                                 //user has given a template file
181                                 if(it != parameters.end()){ 
182                                         path = m->hasPath(it->second);
183                                         //if the user has not given a path then, add inputdir. else leave path alone.
184                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
185                                 }
186                 
187                 it = parameters.find("count");
188                                 //user has given a template file
189                                 if(it != parameters.end()){ 
190                                         path = m->hasPath(it->second);
191                                         //if the user has not given a path then, add inputdir. else leave path alone.
192                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
193                                 }
194                                 
195                         }
196             
197                         
198                         //check for required parameters
199                         fastafile = validParameter.validFile(parameters, "fasta", true);
200                         if (fastafile == "not found") {                                 
201                                 fastafile = m->getFastaFile(); 
202                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
203                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
204                         }else if (fastafile == "not open") { fastafile = ""; abort = true; }    
205                         else { m->setFastaFile(fastafile); }
206                         
207             //if the user changes the output directory command factory will send this info to us in the output parameter 
208                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
209
210                         //check for optional parameter and set defaults
211                         // ...at some point should added some additional type checking...
212                         string temp;
213                         temp = validParameter.validFile(parameters, "keepprimer", false);  if (temp == "not found")    {        temp = "f";     }
214                         keepprimer = m->isTrue(temp);   
215             
216             temp = validParameter.validFile(parameters, "keepdots", false);  if (temp == "not found")    {      temp = "t";     }
217                         keepdots = m->isTrue(temp);     
218             
219                         temp = validParameter.validFile(parameters, "oligos", true);
220                         if (temp == "not found"){       oligosfile = "";                }
221                         else if(temp == "not open"){    oligosfile = ""; abort = true;  } 
222                         else                                    {       oligosfile = temp; m->setOligosFile(oligosfile);                }
223                         
224             ecolifile = validParameter.validFile(parameters, "ecoli", true);
225                         if (ecolifile == "not found"){  ecolifile = "";         }
226                         else if(ecolifile == "not open"){       ecolifile = ""; abort = true;   } 
227                         
228             namefile = validParameter.validFile(parameters, "name", true);
229                         if (namefile == "not found"){   namefile = "";          }
230                         else if(namefile == "not open"){        namefile = ""; abort = true;    } 
231             else { m->setNameFile(namefile); }
232             
233             groupfile = validParameter.validFile(parameters, "group", true);
234                         if (groupfile == "not found"){  groupfile = "";         }
235                         else if(groupfile == "not open"){       groupfile = ""; abort = true;   } 
236             else { m->setGroupFile(groupfile); }
237             
238             countfile = validParameter.validFile(parameters, "count", true);
239                         if (countfile == "not open") { countfile = ""; abort = true; }
240                         else if (countfile == "not found") { countfile = "";  } 
241                         else { m->setCountTableFile(countfile); }
242             
243             if ((namefile != "") && (countfile != "")) {
244                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
245             }
246                         
247             if ((groupfile != "") && (countfile != "")) {
248                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
249             }
250             
251             taxfile = validParameter.validFile(parameters, "taxonomy", true);
252                         if (taxfile == "not found"){    taxfile = "";           }
253                         else if(taxfile == "not open"){ taxfile = ""; abort = true;     } 
254             else { m->setTaxonomyFile(taxfile); }
255                                                 
256                         temp = validParameter.validFile(parameters, "start", false);    if (temp == "not found") { temp = "-1"; }
257                         m->mothurConvert(temp, start);
258             
259             temp = validParameter.validFile(parameters, "end", false);  if (temp == "not found") { temp = "-1"; }
260                         m->mothurConvert(temp, end);
261                         
262                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
263                         m->setProcessors(temp);
264                         m->mothurConvert(temp, processors); 
265                         
266             nomatch = validParameter.validFile(parameters, "nomatch", false);   if (nomatch == "not found") { nomatch = "reject"; }
267                         
268             if ((nomatch != "reject") && (nomatch != "keep")) { m->mothurOut("[ERROR]: " + nomatch + " is not a valid entry for nomatch. Choices are reject and keep.\n");  abort = true; }
269             
270             //didnt set anything
271                         if ((oligosfile == "") && (ecolifile == "") && (start == -1) && (end == -1)) {
272                 m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
273             }
274             
275             if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
276             
277             if ((ecolifile != "") && (start != -1) && (end != -1)) {
278                 m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend.  When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
279             }
280
281             
282             if ((oligosfile != "") && (ecolifile != "")) {
283                  m->mothurOut("[ERROR]: You can not use an ecoli file at the same time as an oligos file.\n"); abort = true;
284             }
285                         
286                         //check to make sure you didn't forget the name file by mistake                 
287                         if (countfile == "") { 
288                 if (namefile == "") {
289                     vector<string> files; files.push_back(fastafile);
290                     parser.getNameFile(files);
291                 }
292             }
293                 }
294         
295         }
296         catch(exception& e) {
297                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
298                 exit(1);
299         }
300 }
301 //***************************************************************************************************************
302
303 int PcrSeqsCommand::execute(){
304         try{
305         
306                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
307                 
308         int start = time(NULL);
309         
310         string thisOutputDir = outputDir;
311                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
312         map<string, string> variables; 
313         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
314         variables["[extension]"] = m->getExtension(fastafile);
315                 string trimSeqFile = getOutputFileName("fasta",variables);
316                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
317         variables["[tag]"] = "scrap";
318         string badSeqFile = getOutputFileName("fasta",variables);
319                 
320                 
321         length = 0;
322                 if(oligosfile != ""){    readOligos();     }  if (m->control_pressed) {  return 0; }
323         if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
324         
325         vector<unsigned long long> positions; 
326         int numFastaSeqs = 0;
327 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
328         positions = m->divideFile(fastafile, processors);
329         for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
330 #else
331         if (processors == 1) {
332             lines.push_back(linePair(0, 1000));
333         }else {
334             positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
335             if (positions.size() < processors) { processors = positions.size(); }
336             
337             //figure out how many sequences you have to process
338             int numSeqsPerProcessor = numFastaSeqs / processors;
339             for (int i = 0; i < processors; i++) {
340                 int startIndex =  i * numSeqsPerProcessor;
341                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
342                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
343             }
344         }
345 #endif
346         if (m->control_pressed) {  return 0; }
347
348         set<string> badNames;
349         if(processors == 1) {    numFastaSeqs = driverPcr(fastafile, trimSeqFile, badSeqFile, badNames, lines[0]);   }
350         else                {    numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames);       }  
351                 
352                 if (m->control_pressed) {  return 0; }          
353         
354         //don't write or keep if blank
355         if (badNames.size() != 0)   { writeAccnos(badNames);        }   
356         if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile);  }
357         else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
358         
359         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
360         if (namefile != "")                     {               readName(badNames);             }   
361         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
362         if (groupfile != "")            {               readGroup(badNames);    }
363         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
364                 if (taxfile != "")                      {               readTax(badNames);              }
365                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
366                 if (countfile != "")                    {               readCount(badNames);            }
367                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
368       
369         m->mothurOutEndLine();
370                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
371                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
372                 m->mothurOutEndLine();
373                 m->mothurOutEndLine();
374                 
375                 //set fasta file as new current fastafile
376                 string current = "";
377                 itTypes = outputTypes.find("fasta");
378                 if (itTypes != outputTypes.end()) {
379                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
380                 }
381                 
382                 itTypes = outputTypes.find("name");
383                 if (itTypes != outputTypes.end()) {
384                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
385                 }
386                 
387                 itTypes = outputTypes.find("group");
388                 if (itTypes != outputTypes.end()) {
389                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
390                 }
391                 
392                 itTypes = outputTypes.find("accnos");
393                 if (itTypes != outputTypes.end()) {
394                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
395                 }
396                 
397                 itTypes = outputTypes.find("taxonomy");
398                 if (itTypes != outputTypes.end()) {
399                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
400                 }
401         
402         itTypes = outputTypes.find("count");
403                 if (itTypes != outputTypes.end()) {
404                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
405                 }
406         
407                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
408                 m->mothurOutEndLine();
409
410                 
411                 return 0;       
412         
413         }
414         catch(exception& e) {
415                 m->errorOut(e, "PcrSeqsCommand", "execute");
416                 exit(1);
417         }
418 }
419 /**************************************************************************************************/
420 int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string badFileName, set<string>& badSeqNames) {
421         try {
422         
423         vector<int> processIDS;   
424         int process = 1;
425                 int num = 0;
426         
427 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
428         
429                 //loop through and create all the processes you want
430                 while (process != processors) {
431                         int pid = fork();
432                         
433                         if (pid > 0) {
434                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
435                                 process++;
436                         }else if (pid == 0){
437                                 num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badSeqNames, lines[process]);
438                                 
439                                 //pass numSeqs to parent
440                                 ofstream out;
441                                 string tempFile = filename + toString(getpid()) + ".num.temp";
442                                 m->openOutputFile(tempFile, out);
443                                 out << num << '\t' << badSeqNames.size() << endl;
444                 for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
445                     out << (*it) << endl;
446                 }
447                                 out.close();
448                                 
449                                 exit(0);
450                         }else { 
451                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
452                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
453                                 exit(0);
454                         }
455                 }
456                 
457         num = driverPcr(filename, goodFileName, badFileName, badSeqNames, lines[0]);
458         
459                 //force parent to wait until all the processes are done
460                 for (int i=0;i<processIDS.size();i++) { 
461                         int temp = processIDS[i];
462                         wait(&temp);
463                 }
464                 
465                 for (int i = 0; i < processIDS.size(); i++) {
466                         ifstream in;
467                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
468                         m->openInputFile(tempFile, in);
469             int numBadNames = 0; string name = "";
470                         if (!in.eof()) { int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in); }
471             for (int j = 0; j < numBadNames; j++) {
472                 in >> name; m->gobble(in);
473                 badSeqNames.insert(name);
474             }
475                         in.close(); m->mothurRemove(tempFile);
476             
477             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
478             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
479             
480             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
481             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
482                 }
483     #else
484         
485         //////////////////////////////////////////////////////////////////////////////////////////////////////
486                 //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
487                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
488                 //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
489                 //////////////////////////////////////////////////////////////////////////////////////////////////////
490                 
491                 vector<pcrData*> pDataArray; 
492                 DWORD   dwThreadIdArray[processors-1];
493                 HANDLE  hThreadArray[processors-1]; 
494                 
495                 //Create processor worker threads.
496                 for( int i=0; i<processors-1; i++ ){
497             
498             string extension = "";
499             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
500             
501                         // Allocate memory for thread data.
502                         pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
503                         pDataArray.push_back(tempPcr);
504                         
505                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
506                         hThreadArray[i] = CreateThread(NULL, 0, MyPcrThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
507                 }
508                 
509         //do your part
510         num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"),badSeqNames, lines[processors-1]);
511         processIDS.push_back(processors-1);
512         
513                 //Wait until all threads have terminated.
514                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
515                 
516                 //Close all thread handles and free memory allocations.
517                 for(int i=0; i < pDataArray.size(); i++){
518                         num += pDataArray[i]->count;
519             if (pDataArray[i]->count != pDataArray[i]->fend) {
520                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->fend) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
521             }
522             for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it);       }
523                         CloseHandle(hThreadArray[i]);
524                         delete pDataArray[i];
525                 }
526         
527         for (int i = 0; i < processIDS.size(); i++) {
528             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
529             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
530             
531             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
532             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
533                 }
534         
535 #endif  
536         
537         return num;
538         
539         }
540         catch(exception& e) {
541                 m->errorOut(e, "PcrSeqsCommand", "createProcesses");
542                 exit(1);
543         }
544 }
545
546 //**********************************************************************************************************************
547 int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, set<string>& badSeqNames, linePair filePos){
548         try {
549                 ofstream goodFile;
550                 m->openOutputFile(goodFasta, goodFile);
551         
552         ofstream badFile;
553                 m->openOutputFile(badFasta, badFile);
554                 
555                 ifstream inFASTA;
556                 m->openInputFile(filename, inFASTA);
557         
558                 inFASTA.seekg(filePos.start);
559         
560                 bool done = false;
561                 int count = 0;
562         set<int> lengths;
563         
564                 while (!done) {
565             
566                         if (m->control_pressed) {  break; }
567                         
568                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
569             
570             string trashCode = "";
571                         if (currSeq.getName() != "") {
572                 
573                 bool goodSeq = true;
574                 if (oligosfile != "") {
575                     map<int, int> mapAligned;
576                     bool aligned = isAligned(currSeq.getAligned(), mapAligned);
577                     
578                     //process primers
579                     if (primers.size() != 0) {
580                         int primerStart = 0; int primerEnd = 0;
581                         bool good = findForward(currSeq, primerStart, primerEnd);
582                         
583                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
584                         else{
585                             //are you aligned
586                             if (aligned) { 
587                                 if (!keepprimer)    {  
588                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
589                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
590                                 } 
591                                 else                {  
592                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
593                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
594                                 }
595                             }else { 
596                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
597                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
598                             }
599                         }
600                     }
601                     
602                     //process reverse primers
603                     if (revPrimer.size() != 0) {
604                         int primerStart = 0; int primerEnd = 0;
605                         bool good = findReverse(currSeq, primerStart, primerEnd);
606                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
607                         else{ 
608                             //are you aligned
609                             if (aligned) { 
610                                 if (!keepprimer)    {  
611                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
612                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
613                                 } 
614                                 else                {  
615                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
616                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
617                                 } 
618                             }
619                             else { 
620                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
621                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
622                             }
623                         }
624                     }
625                 }else if (ecolifile != "") {
626                     //make sure the seqs are aligned
627                     lengths.insert(currSeq.getAligned().length());
628                     if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
629                     else if (currSeq.getAligned().length() != length) {
630                         m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; 
631                     }else {
632                         if (keepdots)   { 
633                             currSeq.filterToPos(start); 
634                             currSeq.filterFromPos(end);
635                         }else {
636                             string seqString = currSeq.getAligned().substr(0, end);
637                             seqString = seqString.substr(start);
638                             currSeq.setAligned(seqString); 
639                         }
640                     }
641                 }else{ //using start and end to trim
642                     //make sure the seqs are aligned
643                     lengths.insert(currSeq.getAligned().length());
644                     if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
645                     else {
646                         if (end != -1) {
647                             if (end > currSeq.getAligned().length()) {  m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
648                             else {
649                                 if (keepdots)   { currSeq.filterFromPos(end); }
650                                 else {
651                                     string seqString = currSeq.getAligned().substr(0, end);
652                                     currSeq.setAligned(seqString); 
653                                 }
654                             }
655                         }
656                         if (start != -1) { 
657                             if (keepdots)   {  currSeq.filterToPos(start);  }
658                             else {
659                                 string seqString = currSeq.getAligned().substr(start);
660                                 currSeq.setAligned(seqString); 
661                             }
662                         }
663                     }
664                 }
665                 
666                 //trimming removed all bases
667                 if (currSeq.getUnaligned() == "") { goodSeq = false; }
668                 
669                                 if(goodSeq == 1)    {   currSeq.printSequence(goodFile);        }
670                                 else {  
671                     badSeqNames.insert(currSeq.getName()); 
672                     currSeq.setName(currSeq.getName() + '|' + trashCode);
673                     currSeq.printSequence(badFile); 
674                 }
675                 count++;
676                         }
677                         
678 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
679             unsigned long long pos = inFASTA.tellg();
680             if ((pos == -1) || (pos >= filePos.end)) { break; }
681 #else
682             if (inFASTA.eof()) { break; }
683 #endif
684                         
685                         //report progress
686                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
687                 }
688                 //report progress
689                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
690                 
691         badFile.close();
692                 goodFile.close();
693                 inFASTA.close();
694                 
695                 return count;
696         }
697         catch(exception& e) {
698                 m->errorOut(e, "PcrSeqsCommand", "driverPcr");
699                 exit(1);
700         }
701 }
702 //********************************************************************/
703 bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
704         try {
705                 
706                 string rawSequence = seq.getUnaligned();
707                 
708                 for(int j=0;j<primers.size();j++){
709                         string oligo = primers[j];
710                         
711                         if(rawSequence.length() < oligo.length()) {  break;  }
712                         
713                         //search for primer
714             int olength = oligo.length();
715             for (int j = 0; j < rawSequence.length()-olength; j++){
716                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
717                 string rawChunk = rawSequence.substr(j, olength);
718                 if(compareDNASeq(oligo, rawChunk)) {
719                     primerStart = j;
720                     primerEnd = primerStart + olength;
721                     return true;
722                 }
723                 
724             }
725         }       
726                 
727         primerStart = 0; primerEnd = 0;
728                 return false;
729                 
730         }
731         catch(exception& e) {
732                 m->errorOut(e, "TrimOligos", "stripForward");
733                 exit(1);
734         }
735 }
736 //******************************************************************/
737 bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
738         try {
739                 
740                 string rawSequence = seq.getUnaligned();
741                 
742                 for(int i=0;i<revPrimer.size();i++){
743                         string oligo = revPrimer[i];
744                         if(rawSequence.length() < oligo.length()) {  break;  }
745                         
746                         //search for primer
747             int olength = oligo.length();
748             for (int j = rawSequence.length()-olength; j >= 0; j--){
749                  if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
750                 string rawChunk = rawSequence.substr(j, olength);
751             
752                 if(compareDNASeq(oligo, rawChunk)) {
753                     primerStart = j;
754                     primerEnd = primerStart + olength;
755                     return true;
756                 }
757                 
758             }
759                 }       
760                 
761         primerStart = 0; primerEnd = 0;
762                 return false;
763         }
764         catch(exception& e) {
765                 m->errorOut(e, "PcrSeqsCommand", "findReverse");
766                 exit(1);
767         }
768 }
769 //********************************************************************/
770 bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
771         try {
772         bool isAligned = false;
773         
774         int countBases = 0;
775         for (int i = 0; i < seq.length(); i++) {
776             if (!isalpha(seq[i])) { isAligned = true; }
777             else { aligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned.
778         }                                                   //ie. the 3rd base may be at spot 10 in the alignment
779                                                             //later when we trim we want to trim from spot 10.
780         return isAligned;
781     }
782         catch(exception& e) {
783                 m->errorOut(e, "PcrSeqsCommand", "isAligned");
784                 exit(1);
785         }
786 }
787 //********************************************************************/
788 string PcrSeqsCommand::reverseOligo(string oligo){
789         try {
790         string reverse = "";
791        
792         for(int i=oligo.length()-1;i>=0;i--){
793             
794             if(oligo[i] == 'A')         {       reverse += 'T'; }
795             else if(oligo[i] == 'T'){   reverse += 'A'; }
796             else if(oligo[i] == 'U'){   reverse += 'A'; }
797             
798             else if(oligo[i] == 'G'){   reverse += 'C'; }
799             else if(oligo[i] == 'C'){   reverse += 'G'; }
800             
801             else if(oligo[i] == 'R'){   reverse += 'Y'; }
802             else if(oligo[i] == 'Y'){   reverse += 'R'; }
803             
804             else if(oligo[i] == 'M'){   reverse += 'K'; }
805             else if(oligo[i] == 'K'){   reverse += 'M'; }
806             
807             else if(oligo[i] == 'W'){   reverse += 'W'; }
808             else if(oligo[i] == 'S'){   reverse += 'S'; }
809             
810             else if(oligo[i] == 'B'){   reverse += 'V'; }
811             else if(oligo[i] == 'V'){   reverse += 'B'; }
812             
813             else if(oligo[i] == 'D'){   reverse += 'H'; }
814             else if(oligo[i] == 'H'){   reverse += 'D'; }
815
816             else                                                {       reverse += 'N'; }
817         }
818
819         
820         return reverse;
821     }
822         catch(exception& e) {
823                 m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
824                 exit(1);
825         }
826 }
827
828 //***************************************************************************************************************
829 bool PcrSeqsCommand::readOligos(){
830         try {
831                 ifstream inOligos;
832                 m->openInputFile(oligosfile, inOligos);
833                 
834                 string type, oligo, group;
835                 
836                 while(!inOligos.eof()){
837             
838                         inOligos >> type; 
839             
840                         if(type[0] == '#'){ //ignore
841                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
842                                 m->gobble(inOligos);
843                         }else{
844                                 m->gobble(inOligos);
845                                 //make type case insensitive
846                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
847                                 
848                                 inOligos >> oligo;
849                                 
850                                 for(int i=0;i<oligo.length();i++){
851                                         oligo[i] = toupper(oligo[i]);
852                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
853                                 }
854                                 
855                                 if(type == "FORWARD"){
856                                         // get rest of line in case there is a primer name
857                                         while (!inOligos.eof()) {       
858                         char c = inOligos.get(); 
859                         if (c == 10 || c == 13){        break;  } 
860                         else if (c == 32 || c == 9){;} //space or tab
861                                         } 
862                                         primers.push_back(oligo);
863                 }else if(type == "REVERSE"){
864                     string oligoRC = reverseOligo(oligo);
865                     revPrimer.push_back(oligoRC);
866                     //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
867                                 }else if(type == "BARCODE"){
868                                         inOligos >> group;
869                                 }else if((type == "LINKER")||(type == "SPACER")) {;}
870                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
871                         }
872                         m->gobble(inOligos);
873                 }       
874                 inOligos.close();
875                 
876                 if ((primers.size() == 0) && (revPrimer.size() == 0)) {
877                         m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers.  Please correct."); m->mothurOutEndLine();
878             m->control_pressed = true;
879                         return false;
880                 }
881         
882         return true;
883         
884     }catch(exception& e) {
885                 m->errorOut(e, "PcrSeqsCommand", "readOligos");
886                 exit(1);
887         }
888 }
889 //***************************************************************************************************************
890 bool PcrSeqsCommand::readEcoli(){
891         try {
892                 ifstream in;
893                 m->openInputFile(ecolifile, in);
894                 
895         //read seq
896         if (!in.eof()){ 
897             Sequence ecoli(in); 
898             length = ecoli.getAligned().length();
899             start = ecoli.getStartPos();
900             end = ecoli.getEndPos();
901         }else { in.close(); m->control_pressed = true; return false; }
902         in.close();    
903                         
904         return true;
905     }
906         catch(exception& e) {
907                 m->errorOut(e, "PcrSeqsCommand", "readEcoli");
908                 exit(1);
909         }
910     
911 }
912 //***************************************************************************************************************
913 int PcrSeqsCommand::writeAccnos(set<string> badNames){
914         try {
915                 string thisOutputDir = outputDir;
916                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
917         map<string, string> variables; 
918                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
919                 string outputFileName = getOutputFileName("accnos",variables);
920         outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
921         
922         ofstream out;
923         m->openOutputFile(outputFileName, out);
924         
925         for (set<string>::iterator it = badNames.begin(); it != badNames.end(); it++) {
926             if (m->control_pressed) { break; }
927             out << (*it) << endl;
928         }
929         
930         out.close();
931         return 0;
932     }
933         catch(exception& e) {
934                 m->errorOut(e, "PcrSeqsCommand", "writeAccnos");
935                 exit(1);
936         }
937     
938 }
939 //******************************************************************/
940 bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
941         try {
942                 bool success = 1;
943                 int length = oligo.length();
944                 
945                 for(int i=0;i<length;i++){
946                         
947                         if(oligo[i] != seq[i]){
948                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
949                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
950                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
951                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
952                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
953                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
954                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
955                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
956                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
957                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
958                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
959                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
960                                 
961                                 if(success == 0)        {       break;   }
962                         }
963                         else{
964                                 success = 1;
965                         }
966                 }
967                 
968                 return success;
969         }
970         catch(exception& e) {
971                 m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
972                 exit(1);
973         }
974         
975 }
976 //***************************************************************************************************************
977 int PcrSeqsCommand::readName(set<string>& names){
978         try {
979                 string thisOutputDir = outputDir;
980                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
981                 map<string, string> variables; 
982                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
983         variables["[extension]"] = m->getExtension(namefile);
984                 string outputFileName = getOutputFileName("name", variables);
985         
986                 ofstream out;
987                 m->openOutputFile(outputFileName, out);
988         
989                 ifstream in;
990                 m->openInputFile(namefile, in);
991                 string name, firstCol, secondCol;
992                 
993                 bool wroteSomething = false;
994                 int removedCount = 0;
995                 
996                 while(!in.eof()){
997                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
998                         
999                         in >> firstCol;         m->gobble(in);          
1000                         in >> secondCol;                        
1001                         
1002             string savedSecond = secondCol;
1003                         vector<string> parsedNames;
1004                         m->splitAtComma(secondCol, parsedNames);
1005                         
1006                         vector<string> validSecond;  validSecond.clear();
1007                         for (int i = 0; i < parsedNames.size(); i++) {
1008                                 if (names.count(parsedNames[i]) == 0) {
1009                                         validSecond.push_back(parsedNames[i]);
1010                                 }
1011                         }
1012                         
1013                         if (validSecond.size() != parsedNames.size()) {  //we want to get rid of someone, so get rid of everyone
1014                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
1015                                 removedCount += parsedNames.size();
1016                         }else {
1017                 out << firstCol << '\t' << savedSecond << endl;
1018                 wroteSomething = true;
1019             }
1020                         m->gobble(in);
1021                 }
1022                 in.close();
1023                 out.close();
1024                 
1025                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1026                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
1027                 
1028                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
1029                 
1030                 return 0;
1031         }
1032         catch(exception& e) {
1033                 m->errorOut(e, "PcrSeqsCommand", "readName");
1034                 exit(1);
1035         }
1036 }
1037 //**********************************************************************************************************************
1038 int PcrSeqsCommand::readGroup(set<string> names){
1039         try {
1040                 string thisOutputDir = outputDir;
1041                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
1042                 map<string, string> variables; 
1043                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
1044         variables["[extension]"] = m->getExtension(groupfile);
1045                 string outputFileName = getOutputFileName("group", variables);
1046                 
1047                 ofstream out;
1048                 m->openOutputFile(outputFileName, out);
1049         
1050                 ifstream in;
1051                 m->openInputFile(groupfile, in);
1052                 string name, group;
1053                 
1054                 bool wroteSomething = false;
1055                 int removedCount = 0;
1056                 
1057                 while(!in.eof()){
1058                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1059                         
1060                         in >> name;                             //read from first column
1061                         in >> group;                    //read from second column
1062                         
1063                         //if this name is in the accnos file
1064                         if (names.count(name) == 0) {
1065                                 wroteSomething = true;
1066                                 out << name << '\t' << group << endl;
1067                         }else {  removedCount++;  }
1068             
1069                         m->gobble(in);
1070                 }
1071                 in.close();
1072                 out.close();
1073                 
1074                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1075                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
1076                 
1077                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
1078         
1079                 
1080                 return 0;
1081         }
1082         catch(exception& e) {
1083                 m->errorOut(e, "PcrSeqsCommand", "readGroup");
1084                 exit(1);
1085         }
1086 }
1087 //**********************************************************************************************************************
1088 int PcrSeqsCommand::readTax(set<string> names){
1089         try {
1090                 string thisOutputDir = outputDir;
1091                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1092                 map<string, string> variables; 
1093                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
1094         variables["[extension]"] = m->getExtension(taxfile);
1095                 string outputFileName = getOutputFileName("taxonomy", variables);
1096
1097                 ofstream out;
1098                 m->openOutputFile(outputFileName, out);
1099         
1100                 ifstream in;
1101                 m->openInputFile(taxfile, in);
1102                 string name, tax;
1103                 
1104                 bool wroteSomething = false;
1105                 int removedCount = 0;
1106                 
1107                 while(!in.eof()){
1108                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1109                         
1110                         in >> name;                             //read from first column
1111                         in >> tax;                      //read from second column
1112                         
1113                         //if this name is in the accnos file
1114                         if (names.count(name) == 0) {
1115                                 wroteSomething = true;
1116                                 out << name << '\t' << tax << endl;
1117                         }else {  removedCount++;  }
1118             
1119                         m->gobble(in);
1120                 }
1121                 in.close();
1122                 out.close();
1123                 
1124                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1125                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
1126                 
1127                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
1128                 
1129                 return 0;
1130         }
1131         catch(exception& e) {
1132                 m->errorOut(e, "PcrSeqsCommand", "readTax");
1133                 exit(1);
1134         }
1135 }
1136 //***************************************************************************************************************
1137 int PcrSeqsCommand::readCount(set<string> badSeqNames){
1138         try {
1139                 ifstream in;
1140                 m->openInputFile(countfile, in);
1141                 set<string>::iterator it;
1142                 
1143                 map<string, string> variables; 
1144                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
1145         variables["[extension]"] = m->getExtension(countfile);
1146                 string goodCountFile = getOutputFileName("count", variables);
1147
1148         outputNames.push_back(goodCountFile);  outputTypes["count"].push_back(goodCountFile);
1149                 ofstream goodCountOut;  m->openOutputFile(goodCountFile, goodCountOut);
1150                 
1151         string headers = m->getline(in); m->gobble(in);
1152         goodCountOut << headers << endl;
1153         
1154         string name, rest; int thisTotal, removedCount; removedCount = 0;
1155         bool wroteSomething = false;
1156         while (!in.eof()) {
1157             
1158                         if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
1159             
1160                         in >> name; m->gobble(in); 
1161             in >> thisTotal; m->gobble(in);
1162             rest = m->getline(in); m->gobble(in);
1163             
1164                         if (badSeqNames.count(name) != 0) { removedCount+=thisTotal; }
1165                         else{
1166                 wroteSomething = true;
1167                                 goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
1168                         }
1169                 }
1170                 in.close();
1171                 goodCountOut.close();
1172         
1173         if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1174         
1175         if (wroteSomething == false) {  m->mothurOut("Your count file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
1176         
1177         //check for groups that have been eliminated
1178         CountTable ct;
1179         if (ct.testGroups(goodCountFile)) {
1180             ct.readTable(goodCountFile);
1181             ct.printTable(goodCountFile);
1182         }
1183                 
1184                 if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1185         
1186         m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
1187
1188                 
1189                 return 0;
1190         
1191         }
1192         catch(exception& e) {
1193                 m->errorOut(e, "PcrSeqsCommand", "readCOunt");
1194                 exit(1);
1195         }
1196 }
1197 /**************************************************************************************/
1198
1199