]> git.donarmstrong.com Git - mothur.git/blob - prcseqscommand.cpp
changes while testing
[mothur.git] / prcseqscommand.cpp
1 //
2 //  prcseqscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/14/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "pcrseqscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> PcrSeqsCommand::setParameters(){ 
13         try {
14                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
15                 CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false,true); parameters.push_back(poligos);
16         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
17         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
19         CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false,true); parameters.push_back(ptax);
20         CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false); parameters.push_back(pecoli);
21                 CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart);
22                 CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
23                 CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
24         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
25
26                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
27                 CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
28         CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
29         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31         
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "PcrSeqsCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string PcrSeqsCommand::getHelpString(){ 
43         try {
44                 string helpString = "";
45                 helpString += "The pcr.seqs command reads a fasta file.\n";
46         helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, processors, keepprimer and keepdots.\n";
47                 helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
48         helpString += "The start parameter allows you to provide a starting position to trim to.\n";
49         helpString += "The end parameter allows you to provide a ending position to trim from.\n";
50         helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file.  if nomatch=true, then do nothing to sequence.\n";
51         helpString += "The processors parameter allows you to use multiple processors.\n";
52         helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
53         helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
54         helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
56                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "PcrSeqsCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string PcrSeqsCommand::getOutputPattern(string type) {
66     try {
67         string pattern = "";
68         
69         if (type == "fasta")            {   pattern = "[filename],pcr,[extension]-[filename],[tag],pcr,[extension]";    }
70         else if (type == "taxonomy")    {   pattern = "[filename],pcr,[extension]";    }
71         else if (type == "name")        {   pattern = "[filename],pcr,[extension]";    }
72         else if (type == "group")       {   pattern = "[filename],pcr,[extension]";    }
73         else if (type == "count")       {   pattern = "[filename],pcr,[extension]";    }
74         else if (type == "accnos")      {   pattern = "[filename],bad.accnos";    }
75         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
76         
77         return pattern;
78     }
79     catch(exception& e) {
80         m->errorOut(e, "PcrSeqsCommand", "getOutputPattern");
81         exit(1);
82     }
83 }
84 //**********************************************************************************************************************
85
86 PcrSeqsCommand::PcrSeqsCommand(){       
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["fasta"] = tempOutNames;
92                 outputTypes["taxonomy"] = tempOutNames;
93                 outputTypes["group"] = tempOutNames;
94                 outputTypes["name"] = tempOutNames;
95         outputTypes["count"] = tempOutNames;
96         outputTypes["accnos"] = tempOutNames;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
100                 exit(1);
101         }
102 }
103 //***************************************************************************************************************
104
105 PcrSeqsCommand::PcrSeqsCommand(string option)  {
106         try {
107                 
108                 abort = false; calledHelp = false;   
109                 
110                 //allow user to run help
111                 if(option == "help") { help(); abort = true; calledHelp = true; }
112                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113                 
114                 else {
115                         vector<string> myArray = setParameters();
116                         
117                         OptionParser parser(option);
118                         map<string,string> parameters = parser.getParameters();
119                         
120                         ValidParameters validParameter;
121                         map<string,string>::iterator it;
122                         
123                         //check to make sure all parameters are valid for command
124                         for (it = parameters.begin(); it != parameters.end(); it++) { 
125                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
126                         }
127                         
128                         //initialize outputTypes
129                         vector<string> tempOutNames;
130                         outputTypes["fasta"] = tempOutNames;
131                         outputTypes["taxonomy"] = tempOutNames;
132                         outputTypes["group"] = tempOutNames;
133                         outputTypes["name"] = tempOutNames;
134             outputTypes["accnos"] = tempOutNames;
135             outputTypes["count"] = tempOutNames;
136                         
137                         //if the user changes the input directory command factory will send this info to us in the output parameter 
138                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
139                         if (inputDir == "not found"){   inputDir = "";          }
140                         else {
141                                 string path;
142                                 it = parameters.find("fasta");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
148                                 }
149                                 
150                                 it = parameters.find("oligos");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
156                                 }
157                 
158                 it = parameters.find("ecoli");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["ecoli"] = inputDir + it->second;            }
164                                 }
165                                 
166                                 it = parameters.find("taxonomy");
167                                 //user has given a template file
168                                 if(it != parameters.end()){ 
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
172                                 }
173                                 
174                                 it = parameters.find("name");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
180                                 }
181                 
182                 it = parameters.find("group");
183                                 //user has given a template file
184                                 if(it != parameters.end()){ 
185                                         path = m->hasPath(it->second);
186                                         //if the user has not given a path then, add inputdir. else leave path alone.
187                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
188                                 }
189                 
190                 it = parameters.find("count");
191                                 //user has given a template file
192                                 if(it != parameters.end()){ 
193                                         path = m->hasPath(it->second);
194                                         //if the user has not given a path then, add inputdir. else leave path alone.
195                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
196                                 }
197                                 
198                         }
199             
200                         
201                         //check for required parameters
202                         fastafile = validParameter.validFile(parameters, "fasta", true);
203                         if (fastafile == "not found") {                                 
204                                 fastafile = m->getFastaFile(); 
205                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
206                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
207                         }else if (fastafile == "not open") { fastafile = ""; abort = true; }    
208                         else { m->setFastaFile(fastafile); }
209                         
210             //if the user changes the output directory command factory will send this info to us in the output parameter 
211                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
212
213                         //check for optional parameter and set defaults
214                         // ...at some point should added some additional type checking...
215                         string temp;
216                         temp = validParameter.validFile(parameters, "keepprimer", false);  if (temp == "not found")    {        temp = "f";     }
217                         keepprimer = m->isTrue(temp);   
218             
219             temp = validParameter.validFile(parameters, "keepdots", false);  if (temp == "not found")    {      temp = "t";     }
220                         keepdots = m->isTrue(temp);     
221             
222                         temp = validParameter.validFile(parameters, "oligos", true);
223                         if (temp == "not found"){       oligosfile = "";                }
224                         else if(temp == "not open"){    oligosfile = ""; abort = true;  } 
225                         else                                    {       oligosfile = temp; m->setOligosFile(oligosfile);                }
226                         
227             ecolifile = validParameter.validFile(parameters, "ecoli", true);
228                         if (ecolifile == "not found"){  ecolifile = "";         }
229                         else if(ecolifile == "not open"){       ecolifile = ""; abort = true;   } 
230                         
231             namefile = validParameter.validFile(parameters, "name", true);
232                         if (namefile == "not found"){   namefile = "";          }
233                         else if(namefile == "not open"){        namefile = ""; abort = true;    } 
234             else { m->setNameFile(namefile); }
235             
236             groupfile = validParameter.validFile(parameters, "group", true);
237                         if (groupfile == "not found"){  groupfile = "";         }
238                         else if(groupfile == "not open"){       groupfile = ""; abort = true;   } 
239             else { m->setGroupFile(groupfile); }
240             
241             countfile = validParameter.validFile(parameters, "count", true);
242                         if (countfile == "not open") { countfile = ""; abort = true; }
243                         else if (countfile == "not found") { countfile = "";  } 
244                         else { m->setCountTableFile(countfile); }
245             
246             if ((namefile != "") && (countfile != "")) {
247                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
248             }
249                         
250             if ((groupfile != "") && (countfile != "")) {
251                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
252             }
253             
254             taxfile = validParameter.validFile(parameters, "taxonomy", true);
255                         if (taxfile == "not found"){    taxfile = "";           }
256                         else if(taxfile == "not open"){ taxfile = ""; abort = true;     } 
257             else { m->setTaxonomyFile(taxfile); }
258                                                 
259                         temp = validParameter.validFile(parameters, "start", false);    if (temp == "not found") { temp = "-1"; }
260                         m->mothurConvert(temp, start);
261             
262             temp = validParameter.validFile(parameters, "end", false);  if (temp == "not found") { temp = "-1"; }
263                         m->mothurConvert(temp, end);
264                         
265                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
266                         m->setProcessors(temp);
267                         m->mothurConvert(temp, processors); 
268             
269             temp = validParameter.validFile(parameters, "pdiffs", false);               if (temp == "not found") { temp = "0"; }
270                         m->mothurConvert(temp, pdiffs);
271                         
272             nomatch = validParameter.validFile(parameters, "nomatch", false);   if (nomatch == "not found") { nomatch = "reject"; }
273                         
274             if ((nomatch != "reject") && (nomatch != "keep")) { m->mothurOut("[ERROR]: " + nomatch + " is not a valid entry for nomatch. Choices are reject and keep.\n");  abort = true; }
275             
276             //didnt set anything
277                         if ((oligosfile == "") && (ecolifile == "") && (start == -1) && (end == -1)) {
278                 m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
279             }
280             
281             if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
282             
283             if ((ecolifile != "") && (start != -1) && (end != -1)) {
284                 m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend.  When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
285             }
286
287             
288             if ((oligosfile != "") && (ecolifile != "")) {
289                  m->mothurOut("[ERROR]: You can not use an ecoli file at the same time as an oligos file.\n"); abort = true;
290             }
291                         
292                         //check to make sure you didn't forget the name file by mistake                 
293                         if (countfile == "") { 
294                 if (namefile == "") {
295                     vector<string> files; files.push_back(fastafile);
296                     parser.getNameFile(files);
297                 }
298             }
299                 }
300         
301         }
302         catch(exception& e) {
303                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
304                 exit(1);
305         }
306 }
307 //***************************************************************************************************************
308
309 int PcrSeqsCommand::execute(){
310         try{
311         
312                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
313                 
314         int start = time(NULL);
315         fileAligned = true;
316         
317         string thisOutputDir = outputDir;
318                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
319         map<string, string> variables; 
320         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
321         variables["[extension]"] = m->getExtension(fastafile);
322                 string trimSeqFile = getOutputFileName("fasta",variables);
323                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
324         variables["[tag]"] = "scrap";
325         string badSeqFile = getOutputFileName("fasta",variables);
326                 
327                 
328         length = 0;
329                 if(oligosfile != ""){    readOligos();     if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(primers.size()) + ", revprimers = " + toString(revPrimer.size()) + ".\n"); } }  if (m->control_pressed) {  return 0; }
330         if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
331         
332         vector<unsigned long long> positions; 
333         int numFastaSeqs = 0;
334 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
335         positions = m->divideFile(fastafile, processors);
336         for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
337 #else
338         if (processors == 1) {
339             lines.push_back(linePair(0, 1000));
340         }else {
341             positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
342             if (positions.size() < processors) { processors = positions.size(); }
343             
344             //figure out how many sequences you have to process
345             int numSeqsPerProcessor = numFastaSeqs / processors;
346             for (int i = 0; i < processors; i++) {
347                 int startIndex =  i * numSeqsPerProcessor;
348                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
349                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
350             }
351         }
352 #endif
353         if (m->control_pressed) {  return 0; }
354
355         set<string> badNames;
356         numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames);   
357                 
358                 if (m->control_pressed) {  return 0; }          
359         
360         //don't write or keep if blank
361         if (badNames.size() != 0)   { writeAccnos(badNames);        }   
362         if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile);  }
363         else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
364         
365         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
366         if (namefile != "")                     {               readName(badNames);             }   
367         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
368         if (groupfile != "")            {               readGroup(badNames);    }
369         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
370                 if (taxfile != "")                      {               readTax(badNames);              }
371                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
372                 if (countfile != "")                    {               readCount(badNames);            }
373                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
374       
375         m->mothurOutEndLine();
376                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
377                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
378                 m->mothurOutEndLine();
379                 m->mothurOutEndLine();
380                 
381                 //set fasta file as new current fastafile
382                 string current = "";
383                 itTypes = outputTypes.find("fasta");
384                 if (itTypes != outputTypes.end()) {
385                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
386                 }
387                 
388                 itTypes = outputTypes.find("name");
389                 if (itTypes != outputTypes.end()) {
390                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
391                 }
392                 
393                 itTypes = outputTypes.find("group");
394                 if (itTypes != outputTypes.end()) {
395                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
396                 }
397                 
398                 itTypes = outputTypes.find("accnos");
399                 if (itTypes != outputTypes.end()) {
400                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
401                 }
402                 
403                 itTypes = outputTypes.find("taxonomy");
404                 if (itTypes != outputTypes.end()) {
405                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
406                 }
407         
408         itTypes = outputTypes.find("count");
409                 if (itTypes != outputTypes.end()) {
410                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
411                 }
412         
413                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
414                 m->mothurOutEndLine();
415
416                 
417                 return 0;       
418         
419         }
420         catch(exception& e) {
421                 m->errorOut(e, "PcrSeqsCommand", "execute");
422                 exit(1);
423         }
424 }
425 /**************************************************************************************************/
426 int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string badFileName, set<string>& badSeqNames) {
427         try {
428         
429         vector<int> processIDS;   
430         int process = 1;
431                 int num = 0;
432         int pstart = -1; int pend = -1;
433         bool adjustNeeded = false;
434         
435 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
436         
437                 //loop through and create all the processes you want
438                 while (process != processors) {
439                         int pid = fork();
440                         
441                         if (pid > 0) {
442                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
443                                 process++;
444                         }else if (pid == 0){
445                 string locationsFile = toString(getpid()) + ".temp";
446                                 num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, pend, adjustNeeded);
447                                 
448                                 //pass numSeqs to parent
449                                 ofstream out;
450                                 string tempFile = filename + toString(getpid()) + ".num.temp";
451                                 m->openOutputFile(tempFile, out);
452                 out << pstart << '\t' << pend << '\t' << adjustNeeded << endl;
453                                 out << num << '\t' << badSeqNames.size() << endl;
454                 for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
455                     out << (*it) << endl;
456                 }
457                                 out.close();
458                                 
459                                 exit(0);
460                         }else { 
461                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
462                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
463                                 exit(0);
464                         }
465                 }
466                 
467         string locationsFile = toString(getpid()) + ".temp";
468         num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, pend, adjustNeeded);
469         
470                 //force parent to wait until all the processes are done
471                 for (int i=0;i<processIDS.size();i++) { 
472                         int temp = processIDS[i];
473                         wait(&temp);
474                 }
475                 
476                 for (int i = 0; i < processIDS.size(); i++) {
477                         ifstream in;
478                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
479                         m->openInputFile(tempFile, in);
480             int numBadNames = 0; string name = "";
481             int tpstart = -1; int tpend = -1; bool tempAdjust = false;
482             
483                         if (!in.eof()) {
484                 in >> tpstart >> tpend >> tempAdjust; m->gobble(in);
485                 
486                 if (tempAdjust) { adjustNeeded = true; }
487                 if (tpstart != -1)   {
488                     if (tpstart != pstart) { adjustNeeded = true; }
489                     if (tpstart < pstart) { pstart = tpstart; } //smallest start
490                 } 
491                 if (tpend != -1)     {
492                     if (tpend != pend) { adjustNeeded = true; }
493                     if (tpend > pend) { pend = tpend; } //largest end
494                 } 
495                 int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in);
496             }
497             for (int j = 0; j < numBadNames; j++) {
498                 in >> name; m->gobble(in);
499                 badSeqNames.insert(name);
500             }
501                         in.close(); m->mothurRemove(tempFile);
502             
503             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
504             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
505             
506             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
507             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
508             
509             m->appendFiles((toString(processIDS[i]) + ".temp"), locationsFile);
510             m->mothurRemove((toString(processIDS[i]) + ".temp"));
511                 }
512     #else
513         
514         //////////////////////////////////////////////////////////////////////////////////////////////////////
515                 //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
516                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
517                 //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
518                 //////////////////////////////////////////////////////////////////////////////////////////////////////
519                 
520                 vector<pcrData*> pDataArray; 
521                 DWORD   dwThreadIdArray[processors-1];
522                 HANDLE  hThreadArray[processors-1]; 
523                 
524         string locationsFile = "locationsFile.txt";
525         m->mothurRemove(locationsFile);
526         m->mothurRemove(goodFileName);
527         m->mothurRemove(badFileName);
528         
529                 //Create processor worker threads.
530                 for( int i=0; i<processors-1; i++ ){
531             
532             string extension = "";
533             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
534             
535                         // Allocate memory for thread data.
536                         pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
537                         pDataArray.push_back(tempPcr);
538                         
539                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
540                         hThreadArray[i] = CreateThread(NULL, 0, MyPcrThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
541                 }
542                 
543         //do your part
544         num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"), (locationsFile+toString(processors-1)+".temp"), badSeqNames, lines[processors-1], pstart, pend, adjustNeeded);
545         processIDS.push_back(processors-1);
546         
547                 //Wait until all threads have terminated.
548                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
549                 
550                 //Close all thread handles and free memory allocations.
551                 for(int i=0; i < pDataArray.size(); i++){
552                         num += pDataArray[i]->count;
553             if (pDataArray[i]->count != pDataArray[i]->fend) {
554                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->fend) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
555             }
556             if (pDataArray[i]->adjustNeeded) { adjustNeeded = true; }
557             if (pDataArray[i]->pstart != -1)   {
558                 if (pDataArray[i]->pstart != pstart) { adjustNeeded = true; }
559                 if (pDataArray[i]->pstart < pstart) { pstart = pDataArray[i]->pstart; }
560             } //smallest start
561             if (pDataArray[i]->pend != -1)     {
562                 if (pDataArray[i]->pend != pend) { adjustNeeded = true; }
563                 if (pDataArray[i]->pend > pend) { pend = pDataArray[i]->pend; }
564             } //largest end
565
566             for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it);       }
567                         CloseHandle(hThreadArray[i]);
568                         delete pDataArray[i];
569                 }
570         
571         for (int i = 0; i < processIDS.size(); i++) {
572             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
573             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
574             
575             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
576             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
577             
578             m->appendFiles((locationsFile+toString(processIDS[i]) + ".temp"), locationsFile);
579             m->mothurRemove((locationsFile+toString(processIDS[i]) + ".temp"));
580                 }
581         
582 #endif  
583         if (fileAligned && adjustNeeded) { adjustDots(goodFileName, locationsFile, pstart, pend); }
584         
585         return num;
586         
587         }
588         catch(exception& e) {
589                 m->errorOut(e, "PcrSeqsCommand", "createProcesses");
590                 exit(1);
591         }
592 }
593
594 //**********************************************************************************************************************
595 int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set<string>& badSeqNames, linePair filePos, int& pstart, int& pend, bool& adjustNeeded){
596         try {
597                 ofstream goodFile;
598                 m->openOutputFile(goodFasta, goodFile);
599         
600         ofstream badFile;
601                 m->openOutputFile(badFasta, badFile);
602         
603         ofstream locationsFile;
604                 m->openOutputFile(locationsName, locationsFile);
605                 
606                 ifstream inFASTA;
607                 m->openInputFile(filename, inFASTA);
608         
609                 inFASTA.seekg(filePos.start);
610         
611                 bool done = false;
612                 int count = 0;
613         set<int> lengths;
614         vector< set<int> > locations; //locations[0] = beginning locations, locations[1] = ending locations
615         locations.resize(2);
616         
617         //pdiffs, bdiffs, primers, barcodes, revPrimers
618         map<string, int> faked;
619         TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
620         
621                 while (!done) {
622             
623                         if (m->control_pressed) {  break; }
624                         
625                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
626             
627             if (fileAligned) { //assume aligned until proven otherwise
628                 lengths.insert(currSeq.getAligned().length());
629                 if (lengths.size() > 1) { fileAligned = false; }
630             }
631             
632             string trashCode = "";
633             string locationsString = "";
634             int thisPStart = -1;
635             int thisPEnd = -1;
636                         if (currSeq.getName() != "") {
637                 
638                 if (m->debug) { m->mothurOut("[DEBUG]: seq name = " + currSeq.getName() + ".\n"); } 
639                 
640                 bool goodSeq = true;
641                 if (oligosfile != "") {
642                     map<int, int> mapAligned;
643                     bool aligned = isAligned(currSeq.getAligned(), mapAligned);
644                     
645                     //process primers
646                     if (primers.size() != 0) {
647                         int primerStart = 0; int primerEnd = 0;
648                         bool good = trim.findForward(currSeq, primerStart, primerEnd);
649                         
650                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
651                         else{
652                             //are you aligned
653                             if (aligned) { 
654                                 if (!keepprimer)    {
655                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerEnd-1]+1);   } //mapAligned[primerEnd-1] is the location of the last base in the primer. we want to trim to the space just after that.  The -1 & +1 ensures if the primer is followed by gaps they are not trimmed causing an aligned sequence dataset to become unaligned.
656                                     else            {
657                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1));
658                                         if (fileAligned) {
659                                             thisPStart = mapAligned[primerEnd-1]+1; //locations[0].insert(mapAligned[primerEnd-1]+1);
660                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
661                                         }
662                                     }
663                                 } 
664                                 else                {  
665                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
666                                     else            {
667                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));
668                                         if (fileAligned) {
669                                             thisPStart = mapAligned[primerStart]; //locations[0].insert(mapAligned[primerStart]);
670                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
671                                         }
672                                     }
673                                 }
674                                 isAligned(currSeq.getAligned(), mapAligned);
675                             }else { 
676                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
677                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
678                             }
679                         }
680                     }
681                     
682                     //process reverse primers
683                     if (revPrimer.size() != 0) {
684                         int primerStart = 0; int primerEnd = 0;
685                         bool good = trim.findReverse(currSeq, primerStart, primerEnd);
686                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
687                         else{
688                             //are you aligned
689                             if (aligned) { 
690                                 if (!keepprimer)    {  
691                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
692                                     else            {
693                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));
694                                         if (fileAligned) {
695                                             thisPEnd = mapAligned[primerStart]; //locations[1].insert(mapAligned[primerStart]);
696                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
697                                         }
698                                     }
699                                 } 
700                                 else                {  
701                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); }
702                                     else            {
703                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1));
704                                         if (fileAligned) {
705                                             thisPEnd = mapAligned[primerEnd-1]+1; //locations[1].insert(mapAligned[primerEnd-1]+1);
706                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
707                                         }
708                                     }
709                                 } 
710                             }
711                             else { 
712                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
713                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
714                             }
715                         }
716                     }
717                 }else if (ecolifile != "") {
718                     //make sure the seqs are aligned
719                     if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
720                     else if (currSeq.getAligned().length() != length) {
721                         m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; 
722                     }else {
723                         if (keepdots)   { 
724                             currSeq.filterToPos(start); 
725                             currSeq.filterFromPos(end);
726                         }else {
727                             string seqString = currSeq.getAligned().substr(0, end);
728                             seqString = seqString.substr(start);
729                             currSeq.setAligned(seqString); 
730                         }
731                     }
732                 }else{ //using start and end to trim
733                     //make sure the seqs are aligned
734                     if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
735                     else {
736                         if (end != -1) {
737                             if (end > currSeq.getAligned().length()) {  m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
738                             else {
739                                 if (keepdots)   { currSeq.filterFromPos(end); }
740                                 else {
741                                     string seqString = currSeq.getAligned().substr(0, end);
742                                     currSeq.setAligned(seqString); 
743                                 }
744                             }
745                         }
746                         if (start != -1) { 
747                             if (keepdots)   {  currSeq.filterToPos(start);  }
748                             else {
749                                 string seqString = currSeq.getAligned().substr(start);
750                                 currSeq.setAligned(seqString); 
751                             }
752                         }
753                     }
754                 }
755                 
756                 //trimming removed all bases
757                 if (currSeq.getUnaligned() == "") { goodSeq = false; }
758                 
759                                 if(goodSeq == 1)    {
760                     currSeq.printSequence(goodFile);
761                     if (m->debug) { m->mothurOut("[DEBUG]: " + locationsString + "\n"); }
762                     if (locationsString != "") { locationsFile << locationsString; }
763                     if (thisPStart != -1)   { locations[0].insert(thisPStart);  }
764                     if (thisPEnd != -1)     { locations[1].insert(thisPEnd);    }
765                 }
766                                 else {  
767                     badSeqNames.insert(currSeq.getName()); 
768                     currSeq.setName(currSeq.getName() + '|' + trashCode);
769                     currSeq.printSequence(badFile); 
770                 }
771                 count++;
772                         }
773                         
774 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
775             unsigned long long pos = inFASTA.tellg();
776             if ((pos == -1) || (pos >= filePos.end)) { break; }
777 #else
778             if (inFASTA.eof()) { break; }
779 #endif
780                         
781                         //report progress
782                         if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+"\n");               }
783                 }
784                 //report progress
785                 if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+"\n");       }
786                 
787         badFile.close();
788                 goodFile.close();
789                 inFASTA.close();
790         locationsFile.close();
791         
792         if (m->debug) { m->mothurOut("[DEBUG]: fileAligned = " + toString(fileAligned) +'\n'); }
793     
794         if (fileAligned && !keepdots) { //print out smallest start value and largest end value
795             if ((locations[0].size() > 1) || (locations[1].size() > 1)) { adjustNeeded = true; }
796             if (primers.size() != 0)    {   set<int>::iterator it = locations[0].begin();  pstart = *it;  }
797             if (revPrimer.size() != 0)  {   set<int>::reverse_iterator it2 = locations[1].rbegin();  pend = *it2; }
798         }
799
800                 return count;
801         }
802         catch(exception& e) {
803                 m->errorOut(e, "PcrSeqsCommand", "driverPcr");
804                 exit(1);
805         }
806 }
807 //********************************************************************/
808 bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
809         try {
810         aligned.clear();
811         bool isAligned = false;
812         
813         int countBases = 0;
814         for (int i = 0; i < seq.length(); i++) {
815             if (!isalpha(seq[i])) { isAligned = true; }
816             else { aligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned.
817         }                                                   //ie. the 3rd base may be at spot 10 in the alignment
818                                                             //later when we trim we want to trim from spot 10.
819         return isAligned;
820     }
821         catch(exception& e) {
822                 m->errorOut(e, "PcrSeqsCommand", "isAligned");
823                 exit(1);
824         }
825 }
826 //**********************************************************************************************************************
827 int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, int pend){
828     try {
829         ifstream inFasta;
830         m->openInputFile(goodFasta, inFasta);
831         
832         ifstream inLocations;
833         m->openInputFile(locations, inLocations);
834         
835         ofstream out;
836         m->openOutputFile(goodFasta+".temp", out);
837         
838         set<int> lengths;
839         //cout << pstart << '\t' << pend << endl;
840         
841         while(!inFasta.eof()) {
842             if(m->control_pressed) { break; }
843             
844             Sequence seq(inFasta); m->gobble(inFasta);
845             
846             string name = "";
847             int thisStart = -1; int thisEnd = -1;
848             if (primers.size() != 0)    { inLocations >> name >> thisStart; m->gobble(inLocations); }
849             if (revPrimer.size() != 0)  { inLocations >> name >> thisEnd;   m->gobble(inLocations); }
850             //cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl;
851             //cout << seq.getName() << '\t' << pstart << '\t' << pend << endl;
852             
853             if (name != seq.getName()) { m->mothurOut("[ERROR]: name mismatch in pcr.seqs.\n"); }
854             else {
855                 if (pstart != -1) {
856                     if (thisStart != -1) {
857                         if (thisStart != pstart) {
858                             string dots = "";
859                             for (int i = pstart; i < thisStart; i++) { dots += "."; }
860                             thisEnd += dots.length();
861                             dots += seq.getAligned();
862                             seq.setAligned(dots);
863                         }
864                     }
865                 }
866                 
867                 if (pend != -1) {
868                     if (thisEnd != -1) {
869                         if (thisEnd != pend) {
870                             string dots = seq.getAligned();
871                             for (int i = thisEnd; i < pend; i++) { dots += "."; }
872                             seq.setAligned(dots);
873                         }
874                     }
875                 }
876                 lengths.insert(seq.getAligned().length());
877             }
878             
879             seq.printSequence(out);
880         }
881         inFasta.close();
882         inLocations.close();
883         out.close();
884         m->mothurRemove(locations);
885         m->mothurRemove(goodFasta);
886         m->renameFile(goodFasta+".temp", goodFasta);
887         
888         //cout << "final lengths = \n";
889         //for (set<int>::iterator it = lengths.begin(); it != lengths.end(); it++) {
890            // cout << *it << endl;
891        // }
892         
893         return 0;
894     }
895     catch(exception& e) {
896         m->errorOut(e, "PcrSeqsCommand", "adjustDots");
897         exit(1);
898     }
899 }
900 //********************************************************************/
901 string PcrSeqsCommand::reverseOligo(string oligo){
902         try {
903         string reverse = "";
904        
905         for(int i=oligo.length()-1;i>=0;i--){
906             
907             if(oligo[i] == 'A')         {       reverse += 'T'; }
908             else if(oligo[i] == 'T'){   reverse += 'A'; }
909             else if(oligo[i] == 'U'){   reverse += 'A'; }
910             
911             else if(oligo[i] == 'G'){   reverse += 'C'; }
912             else if(oligo[i] == 'C'){   reverse += 'G'; }
913             
914             else if(oligo[i] == 'R'){   reverse += 'Y'; }
915             else if(oligo[i] == 'Y'){   reverse += 'R'; }
916             
917             else if(oligo[i] == 'M'){   reverse += 'K'; }
918             else if(oligo[i] == 'K'){   reverse += 'M'; }
919             
920             else if(oligo[i] == 'W'){   reverse += 'W'; }
921             else if(oligo[i] == 'S'){   reverse += 'S'; }
922             
923             else if(oligo[i] == 'B'){   reverse += 'V'; }
924             else if(oligo[i] == 'V'){   reverse += 'B'; }
925             
926             else if(oligo[i] == 'D'){   reverse += 'H'; }
927             else if(oligo[i] == 'H'){   reverse += 'D'; }
928
929             else                                                {       reverse += 'N'; }
930         }
931
932         
933         return reverse;
934     }
935         catch(exception& e) {
936                 m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
937                 exit(1);
938         }
939 }
940
941 //***************************************************************************************************************
942 bool PcrSeqsCommand::readOligos(){
943         try {
944                 ifstream inOligos;
945                 m->openInputFile(oligosfile, inOligos);
946                 
947                 string type, oligo, group;
948         int primerCount = 0;
949                 
950                 while(!inOligos.eof()){
951             
952                         inOligos >> type; 
953             
954                         if(type[0] == '#'){ //ignore
955                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
956                                 m->gobble(inOligos);
957                         }else{
958                                 m->gobble(inOligos);
959                                 //make type case insensitive
960                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
961                                 
962                                 inOligos >> oligo;
963                                 
964                                 for(int i=0;i<oligo.length();i++){
965                                         oligo[i] = toupper(oligo[i]);
966                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
967                                 }
968                                 
969                                 if(type == "FORWARD"){
970                                         // get rest of line in case there is a primer name
971                                         while (!inOligos.eof()) {       
972                         char c = inOligos.get(); 
973                         if (c == 10 || c == 13 || c == -1){     break;  }
974                         else if (c == 32 || c == 9){;} //space or tab
975                                         } 
976                                         primers[oligo] = primerCount; primerCount++;
977                     //cout << "for oligo = " << oligo  << endl;
978                 }else if(type == "REVERSE"){
979                     string oligoRC = reverseOligo(oligo);
980                     revPrimer.push_back(oligoRC);
981                     //cout << "rev oligo = " << oligo << " reverse = " << oligoRC << endl;
982                                 }else if(type == "BARCODE"){
983                     inOligos >> group;
984                 }else if(type == "PRIMER"){
985                                         m->gobble(inOligos);
986                     primers[oligo] = primerCount; primerCount++;
987                                         
988                     string roligo="";
989                     inOligos >> roligo;
990                     
991                     for(int i=0;i<roligo.length();i++){
992                         roligo[i] = toupper(roligo[i]);
993                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
994                     }
995                     revPrimer.push_back(reverseOligo(roligo));
996                     
997                     // get rest of line in case there is a primer name
998                                         while (!inOligos.eof()) {
999                         char c = inOligos.get();
1000                         if (c == 10 || c == 13 || c == -1){     break;  }
1001                         else if (c == 32 || c == 9){;} //space or tab
1002                                         }
1003                     //cout << "prim oligo = " << oligo << " reverse = " << roligo << endl;
1004                                 }else if((type == "LINKER")||(type == "SPACER")) {;}
1005                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1006                         }
1007                         m->gobble(inOligos);
1008                 }       
1009                 inOligos.close();
1010                 
1011                 if ((primers.size() == 0) && (revPrimer.size() == 0)) {
1012                         m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers.  Please correct."); m->mothurOutEndLine();
1013             m->control_pressed = true;
1014                         return false;
1015                 }
1016         
1017         return true;
1018         
1019     }catch(exception& e) {
1020                 m->errorOut(e, "PcrSeqsCommand", "readOligos");
1021                 exit(1);
1022         }
1023 }
1024 //***************************************************************************************************************
1025 bool PcrSeqsCommand::readEcoli(){
1026         try {
1027                 ifstream in;
1028                 m->openInputFile(ecolifile, in);
1029                 
1030         //read seq
1031         if (!in.eof()){ 
1032             Sequence ecoli(in); 
1033             length = ecoli.getAligned().length();
1034             start = ecoli.getStartPos();
1035             end = ecoli.getEndPos();
1036         }else { in.close(); m->control_pressed = true; return false; }
1037         in.close();    
1038                         
1039         return true;
1040     }
1041         catch(exception& e) {
1042                 m->errorOut(e, "PcrSeqsCommand", "readEcoli");
1043                 exit(1);
1044         }
1045     
1046 }
1047 //***************************************************************************************************************
1048 int PcrSeqsCommand::writeAccnos(set<string> badNames){
1049         try {
1050                 string thisOutputDir = outputDir;
1051                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
1052         map<string, string> variables; 
1053                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
1054                 string outputFileName = getOutputFileName("accnos",variables);
1055         outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
1056         
1057         ofstream out;
1058         m->openOutputFile(outputFileName, out);
1059         
1060         for (set<string>::iterator it = badNames.begin(); it != badNames.end(); it++) {
1061             if (m->control_pressed) { break; }
1062             out << (*it) << endl;
1063         }
1064         
1065         out.close();
1066         return 0;
1067     }
1068         catch(exception& e) {
1069                 m->errorOut(e, "PcrSeqsCommand", "writeAccnos");
1070                 exit(1);
1071         }
1072     
1073 }
1074 //***************************************************************************************************************
1075 int PcrSeqsCommand::readName(set<string>& names){
1076         try {
1077                 string thisOutputDir = outputDir;
1078                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
1079                 map<string, string> variables; 
1080                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
1081         variables["[extension]"] = m->getExtension(namefile);
1082                 string outputFileName = getOutputFileName("name", variables);
1083         
1084                 ofstream out;
1085                 m->openOutputFile(outputFileName, out);
1086         
1087                 ifstream in;
1088                 m->openInputFile(namefile, in);
1089                 string name, firstCol, secondCol;
1090                 
1091                 bool wroteSomething = false;
1092                 int removedCount = 0;
1093                 
1094                 while(!in.eof()){
1095                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1096                         
1097                         in >> firstCol;         m->gobble(in);          
1098                         in >> secondCol;                        
1099                         
1100             string savedSecond = secondCol;
1101                         vector<string> parsedNames;
1102                         m->splitAtComma(secondCol, parsedNames);
1103                         
1104                         vector<string> validSecond;  validSecond.clear();
1105                         for (int i = 0; i < parsedNames.size(); i++) {
1106                                 if (names.count(parsedNames[i]) == 0) {
1107                                         validSecond.push_back(parsedNames[i]);
1108                                 }
1109                         }
1110                         
1111                         if (validSecond.size() != parsedNames.size()) {  //we want to get rid of someone, so get rid of everyone
1112                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
1113                                 removedCount += parsedNames.size();
1114                         }else {
1115                 out << firstCol << '\t' << savedSecond << endl;
1116                 wroteSomething = true;
1117             }
1118                         m->gobble(in);
1119                 }
1120                 in.close();
1121                 out.close();
1122                 
1123                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1124                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
1125                 
1126                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
1127                 
1128                 return 0;
1129         }
1130         catch(exception& e) {
1131                 m->errorOut(e, "PcrSeqsCommand", "readName");
1132                 exit(1);
1133         }
1134 }
1135 //**********************************************************************************************************************
1136 int PcrSeqsCommand::readGroup(set<string> names){
1137         try {
1138                 string thisOutputDir = outputDir;
1139                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
1140                 map<string, string> variables; 
1141                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
1142         variables["[extension]"] = m->getExtension(groupfile);
1143                 string outputFileName = getOutputFileName("group", variables);
1144                 
1145                 ofstream out;
1146                 m->openOutputFile(outputFileName, out);
1147         
1148                 ifstream in;
1149                 m->openInputFile(groupfile, in);
1150                 string name, group;
1151                 
1152                 bool wroteSomething = false;
1153                 int removedCount = 0;
1154                 
1155                 while(!in.eof()){
1156                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1157                         
1158                         in >> name;                             //read from first column
1159                         in >> group;                    //read from second column
1160                         
1161                         //if this name is in the accnos file
1162                         if (names.count(name) == 0) {
1163                                 wroteSomething = true;
1164                                 out << name << '\t' << group << endl;
1165                         }else {  removedCount++;  }
1166             
1167                         m->gobble(in);
1168                 }
1169                 in.close();
1170                 out.close();
1171                 
1172                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1173                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
1174                 
1175                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
1176         
1177                 
1178                 return 0;
1179         }
1180         catch(exception& e) {
1181                 m->errorOut(e, "PcrSeqsCommand", "readGroup");
1182                 exit(1);
1183         }
1184 }
1185 //**********************************************************************************************************************
1186 int PcrSeqsCommand::readTax(set<string> names){
1187         try {
1188                 string thisOutputDir = outputDir;
1189                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1190                 map<string, string> variables; 
1191                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
1192         variables["[extension]"] = m->getExtension(taxfile);
1193                 string outputFileName = getOutputFileName("taxonomy", variables);
1194
1195                 ofstream out;
1196                 m->openOutputFile(outputFileName, out);
1197         
1198                 ifstream in;
1199                 m->openInputFile(taxfile, in);
1200                 string name, tax;
1201                 
1202                 bool wroteSomething = false;
1203                 int removedCount = 0;
1204                 
1205                 while(!in.eof()){
1206                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1207                         
1208                         in >> name;                             //read from first column
1209                         in >> tax;                      //read from second column
1210                         
1211                         //if this name is in the accnos file
1212                         if (names.count(name) == 0) {
1213                                 wroteSomething = true;
1214                                 out << name << '\t' << tax << endl;
1215                         }else {  removedCount++;  }
1216             
1217                         m->gobble(in);
1218                 }
1219                 in.close();
1220                 out.close();
1221                 
1222                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1223                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
1224                 
1225                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
1226                 
1227                 return 0;
1228         }
1229         catch(exception& e) {
1230                 m->errorOut(e, "PcrSeqsCommand", "readTax");
1231                 exit(1);
1232         }
1233 }
1234 //***************************************************************************************************************
1235 int PcrSeqsCommand::readCount(set<string> badSeqNames){
1236         try {
1237                 ifstream in;
1238                 m->openInputFile(countfile, in);
1239                 set<string>::iterator it;
1240                 
1241                 map<string, string> variables; 
1242                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
1243         variables["[extension]"] = m->getExtension(countfile);
1244                 string goodCountFile = getOutputFileName("count", variables);
1245
1246         outputNames.push_back(goodCountFile);  outputTypes["count"].push_back(goodCountFile);
1247                 ofstream goodCountOut;  m->openOutputFile(goodCountFile, goodCountOut);
1248                 
1249         string headers = m->getline(in); m->gobble(in);
1250         goodCountOut << headers << endl;
1251         
1252         string name, rest; int thisTotal, removedCount; removedCount = 0;
1253         bool wroteSomething = false;
1254         while (!in.eof()) {
1255             
1256                         if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
1257             
1258                         in >> name; m->gobble(in); 
1259             in >> thisTotal; m->gobble(in);
1260             rest = m->getline(in); m->gobble(in);
1261             
1262                         if (badSeqNames.count(name) != 0) { removedCount+=thisTotal; }
1263                         else{
1264                 wroteSomething = true;
1265                                 goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
1266                         }
1267                 }
1268                 in.close();
1269                 goodCountOut.close();
1270         
1271         if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1272         
1273         if (wroteSomething == false) {  m->mothurOut("Your count file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
1274         
1275         //check for groups that have been eliminated
1276         CountTable ct;
1277         if (ct.testGroups(goodCountFile)) {
1278             ct.readTable(goodCountFile, true);
1279             ct.printTable(goodCountFile);
1280         }
1281                 
1282                 if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1283         
1284         m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
1285
1286                 
1287                 return 0;
1288         
1289         }
1290         catch(exception& e) {
1291                 m->errorOut(e, "PcrSeqsCommand", "readCOunt");
1292                 exit(1);
1293         }
1294 }
1295 /**************************************************************************************/
1296
1297