]> git.donarmstrong.com Git - mothur.git/blob - prcseqscommand.cpp
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
[mothur.git] / prcseqscommand.cpp
1 //
2 //  prcseqscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/14/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "pcrseqscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> PcrSeqsCommand::setParameters(){ 
13         try {
14                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
15                 CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false,true); parameters.push_back(poligos);
16         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
17         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
19         CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false,true); parameters.push_back(ptax);
20         CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false); parameters.push_back(pecoli);
21                 CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart);
22                 CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
23                 CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
24         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
25
26                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
27                 CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
28         CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
29         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31         
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "PcrSeqsCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string PcrSeqsCommand::getHelpString(){ 
43         try {
44                 string helpString = "";
45                 helpString += "The pcr.seqs command reads a fasta file.\n";
46         helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, processors, keepprimer and keepdots.\n";
47                 helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
48         helpString += "The start parameter allows you to provide a starting position to trim to.\n";
49         helpString += "The end parameter allows you to provide a ending position to trim from.\n";
50         helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file.  if nomatch=true, then do nothing to sequence.\n";
51         helpString += "The processors parameter allows you to use multiple processors.\n";
52         helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
53         helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
54         helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
56                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "PcrSeqsCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string PcrSeqsCommand::getOutputPattern(string type) {
66     try {
67         string pattern = "";
68         
69         if (type == "fasta")            {   pattern = "[filename],pcr,[extension]-[filename],[tag],pcr,[extension]";    }
70         else if (type == "taxonomy")    {   pattern = "[filename],pcr,[extension]";    }
71         else if (type == "name")        {   pattern = "[filename],pcr,[extension]";    }
72         else if (type == "group")       {   pattern = "[filename],pcr,[extension]";    }
73         else if (type == "count")       {   pattern = "[filename],pcr,[extension]";    }
74         else if (type == "accnos")      {   pattern = "[filename],bad.accnos";    }
75         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
76         
77         return pattern;
78     }
79     catch(exception& e) {
80         m->errorOut(e, "PcrSeqsCommand", "getOutputPattern");
81         exit(1);
82     }
83 }
84 //**********************************************************************************************************************
85
86 PcrSeqsCommand::PcrSeqsCommand(){       
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["fasta"] = tempOutNames;
92                 outputTypes["taxonomy"] = tempOutNames;
93                 outputTypes["group"] = tempOutNames;
94                 outputTypes["name"] = tempOutNames;
95         outputTypes["count"] = tempOutNames;
96         outputTypes["accnos"] = tempOutNames;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
100                 exit(1);
101         }
102 }
103 //***************************************************************************************************************
104
105 PcrSeqsCommand::PcrSeqsCommand(string option)  {
106         try {
107                 
108                 abort = false; calledHelp = false;   
109                 
110                 //allow user to run help
111                 if(option == "help") { help(); abort = true; calledHelp = true; }
112                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113                 
114                 else {
115                         vector<string> myArray = setParameters();
116                         
117                         OptionParser parser(option);
118                         map<string,string> parameters = parser.getParameters();
119                         
120                         ValidParameters validParameter;
121                         map<string,string>::iterator it;
122                         
123                         //check to make sure all parameters are valid for command
124                         for (it = parameters.begin(); it != parameters.end(); it++) { 
125                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
126                         }
127                         
128                         //initialize outputTypes
129                         vector<string> tempOutNames;
130                         outputTypes["fasta"] = tempOutNames;
131                         outputTypes["taxonomy"] = tempOutNames;
132                         outputTypes["group"] = tempOutNames;
133                         outputTypes["name"] = tempOutNames;
134             outputTypes["accnos"] = tempOutNames;
135             outputTypes["count"] = tempOutNames;
136                         
137                         //if the user changes the input directory command factory will send this info to us in the output parameter 
138                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
139                         if (inputDir == "not found"){   inputDir = "";          }
140                         else {
141                                 string path;
142                                 it = parameters.find("fasta");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
148                                 }
149                                 
150                                 it = parameters.find("oligos");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
156                                 }
157                 
158                 it = parameters.find("ecoli");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["ecoli"] = inputDir + it->second;            }
164                                 }
165                                 
166                                 it = parameters.find("taxonomy");
167                                 //user has given a template file
168                                 if(it != parameters.end()){ 
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
172                                 }
173                                 
174                                 it = parameters.find("name");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
180                                 }
181                 
182                 it = parameters.find("group");
183                                 //user has given a template file
184                                 if(it != parameters.end()){ 
185                                         path = m->hasPath(it->second);
186                                         //if the user has not given a path then, add inputdir. else leave path alone.
187                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
188                                 }
189                 
190                 it = parameters.find("count");
191                                 //user has given a template file
192                                 if(it != parameters.end()){ 
193                                         path = m->hasPath(it->second);
194                                         //if the user has not given a path then, add inputdir. else leave path alone.
195                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
196                                 }
197                                 
198                         }
199             
200                         
201                         //check for required parameters
202                         fastafile = validParameter.validFile(parameters, "fasta", true);
203                         if (fastafile == "not found") {                                 
204                                 fastafile = m->getFastaFile(); 
205                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
206                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
207                         }else if (fastafile == "not open") { fastafile = ""; abort = true; }    
208                         else { m->setFastaFile(fastafile); }
209                         
210             //if the user changes the output directory command factory will send this info to us in the output parameter 
211                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
212
213                         //check for optional parameter and set defaults
214                         // ...at some point should added some additional type checking...
215                         string temp;
216                         temp = validParameter.validFile(parameters, "keepprimer", false);  if (temp == "not found")    {        temp = "f";     }
217                         keepprimer = m->isTrue(temp);   
218             
219             temp = validParameter.validFile(parameters, "keepdots", false);  if (temp == "not found")    {      temp = "t";     }
220                         keepdots = m->isTrue(temp);     
221             
222                         temp = validParameter.validFile(parameters, "oligos", true);
223                         if (temp == "not found"){       oligosfile = "";                }
224                         else if(temp == "not open"){    oligosfile = ""; abort = true;  } 
225                         else                                    {       oligosfile = temp; m->setOligosFile(oligosfile);                }
226                         
227             ecolifile = validParameter.validFile(parameters, "ecoli", true);
228                         if (ecolifile == "not found"){  ecolifile = "";         }
229                         else if(ecolifile == "not open"){       ecolifile = ""; abort = true;   } 
230                         
231             namefile = validParameter.validFile(parameters, "name", true);
232                         if (namefile == "not found"){   namefile = "";          }
233                         else if(namefile == "not open"){        namefile = ""; abort = true;    } 
234             else { m->setNameFile(namefile); }
235             
236             groupfile = validParameter.validFile(parameters, "group", true);
237                         if (groupfile == "not found"){  groupfile = "";         }
238                         else if(groupfile == "not open"){       groupfile = ""; abort = true;   } 
239             else { m->setGroupFile(groupfile); }
240             
241             countfile = validParameter.validFile(parameters, "count", true);
242                         if (countfile == "not open") { countfile = ""; abort = true; }
243                         else if (countfile == "not found") { countfile = "";  } 
244                         else { m->setCountTableFile(countfile); }
245             
246             if ((namefile != "") && (countfile != "")) {
247                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
248             }
249                         
250             if ((groupfile != "") && (countfile != "")) {
251                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
252             }
253             
254             taxfile = validParameter.validFile(parameters, "taxonomy", true);
255                         if (taxfile == "not found"){    taxfile = "";           }
256                         else if(taxfile == "not open"){ taxfile = ""; abort = true;     } 
257             else { m->setTaxonomyFile(taxfile); }
258                                                 
259                         temp = validParameter.validFile(parameters, "start", false);    if (temp == "not found") { temp = "-1"; }
260                         m->mothurConvert(temp, start);
261             
262             temp = validParameter.validFile(parameters, "end", false);  if (temp == "not found") { temp = "-1"; }
263                         m->mothurConvert(temp, end);
264                         
265                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
266                         m->setProcessors(temp);
267                         m->mothurConvert(temp, processors); 
268             
269             temp = validParameter.validFile(parameters, "pdiffs", false);               if (temp == "not found") { temp = "0"; }
270                         m->mothurConvert(temp, pdiffs);
271                         
272             nomatch = validParameter.validFile(parameters, "nomatch", false);   if (nomatch == "not found") { nomatch = "reject"; }
273                         
274             if ((nomatch != "reject") && (nomatch != "keep")) { m->mothurOut("[ERROR]: " + nomatch + " is not a valid entry for nomatch. Choices are reject and keep.\n");  abort = true; }
275             
276             //didnt set anything
277                         if ((oligosfile == "") && (ecolifile == "") && (start == -1) && (end == -1)) {
278                 m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
279             }
280             
281             if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
282             
283             if ((ecolifile != "") && (start != -1) && (end != -1)) {
284                 m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend.  When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
285             }
286
287             
288             if ((oligosfile != "") && (ecolifile != "")) {
289                  m->mothurOut("[ERROR]: You can not use an ecoli file at the same time as an oligos file.\n"); abort = true;
290             }
291                         
292                         //check to make sure you didn't forget the name file by mistake                 
293                         if (countfile == "") { 
294                 if (namefile == "") {
295                     vector<string> files; files.push_back(fastafile);
296                     parser.getNameFile(files);
297                 }
298             }
299                 }
300         
301         }
302         catch(exception& e) {
303                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
304                 exit(1);
305         }
306 }
307 //***************************************************************************************************************
308
309 int PcrSeqsCommand::execute(){
310         try{
311         
312                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
313                 
314         int start = time(NULL);
315         fileAligned = true; pairedOligos = false;
316         
317         string thisOutputDir = outputDir;
318                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
319         map<string, string> variables; 
320         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
321         variables["[extension]"] = m->getExtension(fastafile);
322                 string trimSeqFile = getOutputFileName("fasta",variables);
323                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
324         variables["[tag]"] = "scrap";
325         string badSeqFile = getOutputFileName("fasta",variables);
326                 
327                 
328         length = 0;
329                 if(oligosfile != ""){    readOligos();     if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(numFPrimers) + ", revprimers = " + toString(numRPrimers) + ".\n"); } }  if (m->control_pressed) {  return 0; }
330         if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
331         
332         vector<unsigned long long> positions; 
333         int numFastaSeqs = 0;
334 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
335         positions = m->divideFile(fastafile, processors);
336         for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
337 #else
338         if (processors == 1) {
339             lines.push_back(linePair(0, 1000));
340         }else {
341             positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
342             if (positions.size() < processors) { processors = positions.size(); }
343             
344             //figure out how many sequences you have to process
345             int numSeqsPerProcessor = numFastaSeqs / processors;
346             for (int i = 0; i < processors; i++) {
347                 int startIndex =  i * numSeqsPerProcessor;
348                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
349                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
350             }
351         }
352 #endif
353         if (m->control_pressed) {  return 0; }
354
355         set<string> badNames;
356         numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames);   
357                 
358                 if (m->control_pressed) {  return 0; }          
359         
360         //don't write or keep if blank
361         if (badNames.size() != 0)   { writeAccnos(badNames);        }   
362         if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile);  }
363         else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
364         
365         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
366         if (namefile != "")                     {               readName(badNames);             }   
367         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
368         if (groupfile != "")            {               readGroup(badNames);    }
369         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
370                 if (taxfile != "")                      {               readTax(badNames);              }
371                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
372                 if (countfile != "")                    {               readCount(badNames);            }
373                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
374       
375         m->mothurOutEndLine();
376                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
377                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
378                 m->mothurOutEndLine();
379                 m->mothurOutEndLine();
380                 
381                 //set fasta file as new current fastafile
382                 string current = "";
383                 itTypes = outputTypes.find("fasta");
384                 if (itTypes != outputTypes.end()) {
385                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
386                 }
387                 
388                 itTypes = outputTypes.find("name");
389                 if (itTypes != outputTypes.end()) {
390                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
391                 }
392                 
393                 itTypes = outputTypes.find("group");
394                 if (itTypes != outputTypes.end()) {
395                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
396                 }
397                 
398                 itTypes = outputTypes.find("accnos");
399                 if (itTypes != outputTypes.end()) {
400                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
401                 }
402                 
403                 itTypes = outputTypes.find("taxonomy");
404                 if (itTypes != outputTypes.end()) {
405                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
406                 }
407         
408         itTypes = outputTypes.find("count");
409                 if (itTypes != outputTypes.end()) {
410                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
411                 }
412         
413                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
414                 m->mothurOutEndLine();
415
416                 
417                 return 0;       
418         
419         }
420         catch(exception& e) {
421                 m->errorOut(e, "PcrSeqsCommand", "execute");
422                 exit(1);
423         }
424 }
425 /**************************************************************************************************/
426 int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string badFileName, set<string>& badSeqNames) {
427         try {
428         
429         vector<int> processIDS;   
430         int process = 1;
431                 int num = 0;
432         int pstart = -1; int pend = -1;
433         bool adjustNeeded = false;
434         
435 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
436         
437                 //loop through and create all the processes you want
438                 while (process != processors) {
439                         pid_t pid = fork();
440                         
441                         if (pid > 0) {
442                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
443                                 process++;
444                         }else if (pid == 0){
445                 string locationsFile = m->mothurGetpid(process) + ".temp";
446                                 num = driverPcr(filename, goodFileName + m->mothurGetpid(process) + ".temp", badFileName + m->mothurGetpid(process) + ".temp", locationsFile, badSeqNames, lines[process], pstart, adjustNeeded);
447                                 
448                                 //pass numSeqs to parent
449                                 ofstream out;
450                                 string tempFile = filename + m->mothurGetpid(process) + ".num.temp";
451                                 m->openOutputFile(tempFile, out);
452                 out << pstart << '\t' << adjustNeeded << endl;
453                                 out << num << '\t' << badSeqNames.size() << endl;
454                 for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
455                     out << (*it) << endl;
456                 }
457                                 out.close();
458                                 
459                                 exit(0);
460                         }else { 
461                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
462                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
463                                 exit(0);
464                         }
465                 }
466                 
467         string locationsFile = m->mothurGetpid(process) + ".temp";
468         num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, adjustNeeded);
469         
470                 //force parent to wait until all the processes are done
471                 for (int i=0;i<processIDS.size();i++) { 
472                         int temp = processIDS[i];
473                         wait(&temp);
474                 }
475                 
476                 for (int i = 0; i < processIDS.size(); i++) {
477                         ifstream in;
478                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
479                         m->openInputFile(tempFile, in);
480             int numBadNames = 0; string name = "";
481             int tpstart = -1; bool tempAdjust = false;
482             
483                         if (!in.eof()) {
484                 in >> tpstart >> tempAdjust; m->gobble(in);
485                 
486                 if (tempAdjust) { adjustNeeded = true; }
487                 if (tpstart != -1)   {
488                     if (tpstart != pstart) { adjustNeeded = true; }
489                     if (tpstart < pstart) { pstart = tpstart; } //smallest start
490                 } 
491                 int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in);
492             }
493             for (int j = 0; j < numBadNames; j++) {
494                 in >> name; m->gobble(in);
495                 badSeqNames.insert(name);
496             }
497                         in.close(); m->mothurRemove(tempFile);
498             
499             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
500             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
501             
502             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
503             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
504             
505             m->appendFiles((toString(processIDS[i]) + ".temp"), locationsFile);
506             m->mothurRemove((toString(processIDS[i]) + ".temp"));
507                 }
508     #else
509         
510         //////////////////////////////////////////////////////////////////////////////////////////////////////
511                 //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
512                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
513                 //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
514                 //////////////////////////////////////////////////////////////////////////////////////////////////////
515                 
516                 vector<pcrData*> pDataArray; 
517                 DWORD   dwThreadIdArray[processors-1];
518                 HANDLE  hThreadArray[processors-1]; 
519                 
520         string locationsFile = "locationsFile.txt";
521         m->mothurRemove(locationsFile);
522         m->mothurRemove(goodFileName);
523         m->mothurRemove(badFileName);
524         
525                 //Create processor worker threads.
526                 for( int i=0; i<processors-1; i++ ){
527             
528             string extension = "";
529             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
530             
531                         // Allocate memory for thread data.
532                         pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
533                         pDataArray.push_back(tempPcr);
534                         
535                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
536                         hThreadArray[i] = CreateThread(NULL, 0, MyPcrThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
537                 }
538                 
539         //do your part
540         num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"), (locationsFile+toString(processors-1)+".temp"), badSeqNames, lines[processors-1], pstart, adjustNeeded);
541         processIDS.push_back(processors-1);
542         
543                 //Wait until all threads have terminated.
544                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
545                 
546                 //Close all thread handles and free memory allocations.
547                 for(int i=0; i < pDataArray.size(); i++){
548                         num += pDataArray[i]->count;
549             if (pDataArray[i]->count != pDataArray[i]->fend) {
550                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->fend) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
551             }
552             if (pDataArray[i]->adjustNeeded) { adjustNeeded = true; }
553             if (pDataArray[i]->pstart != -1)   {
554                 if (pDataArray[i]->pstart != pstart) { adjustNeeded = true; }
555                 if (pDataArray[i]->pstart < pstart) { pstart = pDataArray[i]->pstart; }
556             } //smallest start
557             
558             for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it);       }
559                         CloseHandle(hThreadArray[i]);
560                         delete pDataArray[i];
561                 }
562         
563         for (int i = 0; i < processIDS.size(); i++) {
564             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
565             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
566             
567             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
568             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
569             
570             m->appendFiles((locationsFile+toString(processIDS[i]) + ".temp"), locationsFile);
571             m->mothurRemove((locationsFile+toString(processIDS[i]) + ".temp"));
572                 }
573         
574 #endif  
575         
576         
577
578         if (fileAligned && adjustNeeded) {
579             //find pend - pend is the biggest ending value, but we must account for when we adjust the start.  That adjustment may make the "new" end larger then the largest end. So lets find out what that "new" end will be.
580             ifstream inLocations;
581             m->openInputFile(locationsFile, inLocations);
582             
583             while(!inLocations.eof()) {
584                 
585                 if (m->control_pressed) { break; }
586                 
587                 string name = "";
588                 int thisStart = -1; int thisEnd = -1;
589                 if (numFPrimers != 0)    { inLocations >> name >> thisStart; m->gobble(inLocations); }
590                 if (numRPrimers != 0)  { inLocations >> name >> thisEnd;   m->gobble(inLocations); }
591                 else { pend = -1; break; }
592                 
593                 int myDiff = 0;
594                 if (pstart != -1) {
595                     if (thisStart != -1) {
596                         if (thisStart != pstart) { myDiff += (thisStart - pstart); }
597                     }
598                 }
599                 
600                 int myEnd = thisEnd + myDiff;
601                 //cout << name << '\t' << thisStart << '\t' << thisEnd << " diff = " << myDiff << '\t' << myEnd << endl;
602                 
603                 if (thisEnd != -1) {
604                     if (myEnd > pend) { pend = myEnd; }
605                 }
606                 
607             }
608             inLocations.close();
609             
610             adjustDots(goodFileName, locationsFile, pstart, pend);
611         }else { m->mothurRemove(locationsFile); }
612         
613         return num;
614         
615         }
616         catch(exception& e) {
617                 m->errorOut(e, "PcrSeqsCommand", "createProcesses");
618                 exit(1);
619         }
620 }
621
622 //**********************************************************************************************************************
623 int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set<string>& badSeqNames, linePair filePos, int& pstart, bool& adjustNeeded){
624         try {
625                 ofstream goodFile;
626                 m->openOutputFile(goodFasta, goodFile);
627         
628         ofstream badFile;
629                 m->openOutputFile(badFasta, badFile);
630         
631         ofstream locationsFile;
632                 m->openOutputFile(locationsName, locationsFile);
633                 
634                 ifstream inFASTA;
635                 m->openInputFile(filename, inFASTA);
636         
637                 inFASTA.seekg(filePos.start);
638         
639                 bool done = false;
640                 int count = 0;
641         set<int> lengths;
642         set<int> locations; //locations[0] = beginning locations, 
643         
644         //pdiffs, bdiffs, primers, barcodes, revPrimers
645         map<string, int> primers;
646         map<string, int> barcodes; //not used
647         vector<string> revPrimer;
648         if (pairedOligos) {
649             map<int, oligosPair> primerPairs = oligos.getPairedPrimers();
650             for (map<int, oligosPair>::iterator it = primerPairs.begin(); it != primerPairs.end(); it++) {
651                 primers[(it->second).forward] = it->first;
652                 revPrimer.push_back((it->second).reverse);
653             }
654             if (pdiffs != 0) { m->mothurOut("[WARNING]: Pcr.seqs is only designed to allow diffs in forward primers. Reverse primers must be an exact match.\n"); }
655         }else{
656             primers = oligos.getPrimers();
657             revPrimer = oligos.getReversePrimers();
658         }
659         
660         TrimOligos trim(pdiffs, 0, primers, barcodes, revPrimer);
661         
662                 while (!done) {
663             
664                         if (m->control_pressed) {  break; }
665                         
666                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
667             
668             if (fileAligned) { //assume aligned until proven otherwise
669                 lengths.insert(currSeq.getAligned().length());
670                 if (lengths.size() > 1) { fileAligned = false; }
671             }
672             
673             string trashCode = "";
674             string locationsString = "";
675             int thisPStart = -1;
676             int thisPEnd = -1;
677                         if (currSeq.getName() != "") {
678                 
679                 if (m->debug) { m->mothurOut("[DEBUG]: seq name = " + currSeq.getName() + ".\n"); } 
680                 
681                 bool goodSeq = true;
682                 if (oligosfile != "") {
683                     map<int, int> mapAligned;
684                     bool aligned = isAligned(currSeq.getAligned(), mapAligned);
685                     
686                     //process primers
687                     if (primers.size() != 0) {
688                         int primerStart = 0; int primerEnd = 0;
689                         bool good = trim.findForward(currSeq, primerStart, primerEnd);
690                         
691                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
692                         else{
693                             //are you aligned
694                             if (aligned) { 
695                                 if (!keepprimer)    {
696                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerEnd-1]+1);   } //mapAligned[primerEnd-1] is the location of the last base in the primer. we want to trim to the space just after that.  The -1 & +1 ensures if the primer is followed by gaps they are not trimmed causing an aligned sequence dataset to become unaligned.
697                                     else            {
698                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1));
699                                         if (fileAligned) {
700                                             thisPStart = mapAligned[primerEnd-1]+1; //locations[0].insert(mapAligned[primerEnd-1]+1);
701                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
702                                         }
703                                     }
704                                 } 
705                                 else                {  
706                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
707                                     else            {
708                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));
709                                         if (fileAligned) {
710                                             thisPStart = mapAligned[primerStart]; //locations[0].insert(mapAligned[primerStart]);
711                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
712                                         }
713                                     }
714                                 }
715                                 isAligned(currSeq.getAligned(), mapAligned);
716                             }else { 
717                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
718                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
719                             }
720                         }
721                     }
722                     
723                     //process reverse primers
724                     if (revPrimer.size() != 0) {
725                         int primerStart = 0; int primerEnd = 0;
726                         bool good = trim.findReverse(currSeq, primerStart, primerEnd);
727                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
728                         else{
729                             //are you aligned
730                             if (aligned) { 
731                                 if (!keepprimer)    {  
732                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
733                                     else            {
734                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));
735                                         if (fileAligned) {
736                                             thisPEnd = mapAligned[primerStart]; //locations[1].insert(mapAligned[primerStart]);
737                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
738                                         }
739                                     }
740                                 } 
741                                 else                {  
742                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); }
743                                     else            {
744                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1));
745                                         if (fileAligned) {
746                                             thisPEnd = mapAligned[primerEnd-1]+1; //locations[1].insert(mapAligned[primerEnd-1]+1);
747                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
748                                         }
749                                     }
750                                 } 
751                             }
752                             else { 
753                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
754                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
755                             }
756                         }
757                     }
758                 }else if (ecolifile != "") {
759                     //make sure the seqs are aligned
760                     if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
761                     else if (currSeq.getAligned().length() != length) {
762                         m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; 
763                     }else {
764                         if (keepdots)   { 
765                             currSeq.filterToPos(start); 
766                             currSeq.filterFromPos(end);
767                         }else {
768                             string seqString = currSeq.getAligned().substr(0, end);
769                             seqString = seqString.substr(start);
770                             currSeq.setAligned(seqString); 
771                         }
772                     }
773                 }else{ //using start and end to trim
774                     //make sure the seqs are aligned
775                     if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
776                     else {
777                         if (end != -1) {
778                             if (end > currSeq.getAligned().length()) {  m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
779                             else {
780                                 if (keepdots)   { currSeq.filterFromPos(end); }
781                                 else {
782                                     string seqString = currSeq.getAligned().substr(0, end);
783                                     currSeq.setAligned(seqString); 
784                                 }
785                             }
786                         }
787                         if (start != -1) { 
788                             if (keepdots)   {  currSeq.filterToPos(start);  }
789                             else {
790                                 string seqString = currSeq.getAligned().substr(start);
791                                 currSeq.setAligned(seqString); 
792                             }
793                         }
794                     }
795                 }
796                 
797                 //trimming removed all bases
798                 if (currSeq.getUnaligned() == "") { goodSeq = false; }
799                 
800                                 if(goodSeq == 1)    {
801                     currSeq.printSequence(goodFile);
802                     if (m->debug) { m->mothurOut("[DEBUG]: " + locationsString + "\n"); }
803                     if (thisPStart != -1)   { locations.insert(thisPStart);  }
804                     if (locationsString != "") { locationsFile << locationsString; }
805                 }
806                                 else {  
807                     badSeqNames.insert(currSeq.getName()); 
808                     currSeq.setName(currSeq.getName() + '|' + trashCode);
809                     currSeq.printSequence(badFile); 
810                 }
811                 count++;
812                         }
813                         
814 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
815             unsigned long long pos = inFASTA.tellg();
816             if ((pos == -1) || (pos >= filePos.end)) { break; }
817 #else
818             if (inFASTA.eof()) { break; }
819 #endif
820                         
821                         //report progress
822                         if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+"\n");               }
823                 }
824                 //report progress
825                 if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+"\n");       }
826                 
827         badFile.close();
828                 goodFile.close();
829                 inFASTA.close();
830         locationsFile.close();
831         
832         if (m->debug) { m->mothurOut("[DEBUG]: fileAligned = " + toString(fileAligned) +'\n'); }
833     
834         if (fileAligned && !keepdots) { //print out smallest start value and largest end value
835             if (locations.size() > 1) { adjustNeeded = true; }
836             if (primers.size() != 0)    {   set<int>::iterator it = locations.begin();  pstart = *it;  }
837         }
838
839                 return count;
840         }
841         catch(exception& e) {
842                 m->errorOut(e, "PcrSeqsCommand", "driverPcr");
843                 exit(1);
844         }
845 }
846 //********************************************************************/
847 bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
848         try {
849         aligned.clear();
850         bool isAligned = false;
851         
852         int countBases = 0;
853         for (int i = 0; i < seq.length(); i++) {
854             if (!isalpha(seq[i])) { isAligned = true; }
855             else { aligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned.
856         }                                                   //ie. the 3rd base may be at spot 10 in the alignment
857                                                             //later when we trim we want to trim from spot 10.
858         return isAligned;
859     }
860         catch(exception& e) {
861                 m->errorOut(e, "PcrSeqsCommand", "isAligned");
862                 exit(1);
863         }
864 }
865 //**********************************************************************************************************************
866 int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, int pend){
867     try {
868         ifstream inFasta;
869         m->openInputFile(goodFasta, inFasta);
870         
871         ifstream inLocations;
872         m->openInputFile(locations, inLocations);
873         
874         ofstream out;
875         m->openOutputFile(goodFasta+".temp", out);
876         
877         set<int> lengths;
878         //cout << pstart << '\t' << pend << endl;
879         //if (pstart > pend) { //swap them
880         
881         while(!inFasta.eof()) {
882             if(m->control_pressed) { break; }
883             
884             Sequence seq(inFasta); m->gobble(inFasta);
885             
886             string name = "";
887             int thisStart = -1; int thisEnd = -1;
888             if (numFPrimers != 0)    { inLocations >> name >> thisStart; m->gobble(inLocations); }
889             if (numRPrimers != 0)  { inLocations >> name >> thisEnd;   m->gobble(inLocations); }
890             
891             
892             //cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl;
893             //cout << seq.getName() << '\t' << pstart << '\t' << pend << endl;
894             
895             if (name != seq.getName()) { m->mothurOut("[ERROR]: name mismatch in pcr.seqs.\n"); }
896             else {
897                 if (pstart != -1) {
898                     if (thisStart != -1) {
899                         if (thisStart != pstart) {
900                             string dots = "";
901                             for (int i = pstart; i < thisStart; i++) { dots += "."; }
902                             thisEnd += dots.length();
903                             dots += seq.getAligned();
904                             seq.setAligned(dots);
905                         }
906                     }
907                 }
908                 
909                 if (pend != -1) {
910                     if (thisEnd != -1) {
911                         if (thisEnd != pend) {
912                             string dots = seq.getAligned();
913                             for (int i = thisEnd; i < pend; i++) { dots += "."; }
914                             seq.setAligned(dots);
915                         }
916                     }
917                 }
918                 lengths.insert(seq.getAligned().length());
919             }
920             
921             seq.printSequence(out);
922         }
923         inFasta.close();
924         inLocations.close();
925         out.close();
926         m->mothurRemove(locations);
927         m->mothurRemove(goodFasta);
928         m->renameFile(goodFasta+".temp", goodFasta);
929         
930         //cout << "final lengths = \n";
931         //for (set<int>::iterator it = lengths.begin(); it != lengths.end(); it++) {
932            //cout << *it << endl;
933            // cout << lengths.count(*it) << endl;
934        // }
935         
936         return 0;
937     }
938     catch(exception& e) {
939         m->errorOut(e, "PcrSeqsCommand", "adjustDots");
940         exit(1);
941     }
942 }
943 //***************************************************************************************************************
944 bool PcrSeqsCommand::readEcoli(){
945         try {
946                 ifstream in;
947                 m->openInputFile(ecolifile, in);
948                 
949         //read seq
950         if (!in.eof()){ 
951             Sequence ecoli(in); 
952             length = ecoli.getAligned().length();
953             start = ecoli.getStartPos();
954             end = ecoli.getEndPos();
955         }else { in.close(); m->control_pressed = true; return false; }
956         in.close();    
957                         
958         return true;
959     }
960         catch(exception& e) {
961                 m->errorOut(e, "PcrSeqsCommand", "readEcoli");
962                 exit(1);
963         }
964     
965 }
966 //***************************************************************************************************************
967 int PcrSeqsCommand::writeAccnos(set<string> badNames){
968         try {
969                 string thisOutputDir = outputDir;
970                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
971         map<string, string> variables; 
972                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
973                 string outputFileName = getOutputFileName("accnos",variables);
974         outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
975         
976         ofstream out;
977         m->openOutputFile(outputFileName, out);
978         
979         for (set<string>::iterator it = badNames.begin(); it != badNames.end(); it++) {
980             if (m->control_pressed) { break; }
981             out << (*it) << endl;
982         }
983         
984         out.close();
985         return 0;
986     }
987         catch(exception& e) {
988                 m->errorOut(e, "PcrSeqsCommand", "writeAccnos");
989                 exit(1);
990         }
991     
992 }
993 //***************************************************************************************************************
994 int PcrSeqsCommand::readName(set<string>& names){
995         try {
996                 string thisOutputDir = outputDir;
997                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
998                 map<string, string> variables; 
999                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
1000         variables["[extension]"] = m->getExtension(namefile);
1001                 string outputFileName = getOutputFileName("name", variables);
1002         
1003                 ofstream out;
1004                 m->openOutputFile(outputFileName, out);
1005         
1006                 ifstream in;
1007                 m->openInputFile(namefile, in);
1008                 string name, firstCol, secondCol;
1009                 
1010                 bool wroteSomething = false;
1011                 int removedCount = 0;
1012                 
1013                 while(!in.eof()){
1014                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1015                         
1016                         in >> firstCol;         m->gobble(in);          
1017                         in >> secondCol;                        
1018                         
1019             string savedSecond = secondCol;
1020                         vector<string> parsedNames;
1021                         m->splitAtComma(secondCol, parsedNames);
1022                         
1023                         vector<string> validSecond;  validSecond.clear();
1024                         for (int i = 0; i < parsedNames.size(); i++) {
1025                                 if (names.count(parsedNames[i]) == 0) {
1026                                         validSecond.push_back(parsedNames[i]);
1027                                 }
1028                         }
1029                         
1030                         if (validSecond.size() != parsedNames.size()) {  //we want to get rid of someone, so get rid of everyone
1031                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
1032                                 removedCount += parsedNames.size();
1033                         }else {
1034                 out << firstCol << '\t' << savedSecond << endl;
1035                 wroteSomething = true;
1036             }
1037                         m->gobble(in);
1038                 }
1039                 in.close();
1040                 out.close();
1041                 
1042                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1043                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
1044                 
1045                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
1046                 
1047                 return 0;
1048         }
1049         catch(exception& e) {
1050                 m->errorOut(e, "PcrSeqsCommand", "readName");
1051                 exit(1);
1052         }
1053 }
1054 //**********************************************************************************************************************
1055 int PcrSeqsCommand::readGroup(set<string> names){
1056         try {
1057                 string thisOutputDir = outputDir;
1058                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
1059                 map<string, string> variables; 
1060                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
1061         variables["[extension]"] = m->getExtension(groupfile);
1062                 string outputFileName = getOutputFileName("group", variables);
1063                 
1064                 ofstream out;
1065                 m->openOutputFile(outputFileName, out);
1066         
1067                 ifstream in;
1068                 m->openInputFile(groupfile, in);
1069                 string name, group;
1070                 
1071                 bool wroteSomething = false;
1072                 int removedCount = 0;
1073                 
1074                 while(!in.eof()){
1075                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1076                         
1077                         in >> name;                             //read from first column
1078                         in >> group;                    //read from second column
1079                         
1080                         //if this name is in the accnos file
1081                         if (names.count(name) == 0) {
1082                                 wroteSomething = true;
1083                                 out << name << '\t' << group << endl;
1084                         }else {  removedCount++;  }
1085             
1086                         m->gobble(in);
1087                 }
1088                 in.close();
1089                 out.close();
1090                 
1091                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1092                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
1093                 
1094                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
1095         
1096                 
1097                 return 0;
1098         }
1099         catch(exception& e) {
1100                 m->errorOut(e, "PcrSeqsCommand", "readGroup");
1101                 exit(1);
1102         }
1103 }
1104 //**********************************************************************************************************************
1105 int PcrSeqsCommand::readTax(set<string> names){
1106         try {
1107                 string thisOutputDir = outputDir;
1108                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1109                 map<string, string> variables; 
1110                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
1111         variables["[extension]"] = m->getExtension(taxfile);
1112                 string outputFileName = getOutputFileName("taxonomy", variables);
1113
1114                 ofstream out;
1115                 m->openOutputFile(outputFileName, out);
1116         
1117                 ifstream in;
1118                 m->openInputFile(taxfile, in);
1119                 string name, tax;
1120                 
1121                 bool wroteSomething = false;
1122                 int removedCount = 0;
1123                 
1124                 while(!in.eof()){
1125                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1126                         
1127                         in >> name;                             //read from first column
1128                         in >> tax;                      //read from second column
1129                         
1130                         //if this name is in the accnos file
1131                         if (names.count(name) == 0) {
1132                                 wroteSomething = true;
1133                                 out << name << '\t' << tax << endl;
1134                         }else {  removedCount++;  }
1135             
1136                         m->gobble(in);
1137                 }
1138                 in.close();
1139                 out.close();
1140                 
1141                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1142                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
1143                 
1144                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
1145                 
1146                 return 0;
1147         }
1148         catch(exception& e) {
1149                 m->errorOut(e, "PcrSeqsCommand", "readTax");
1150                 exit(1);
1151         }
1152 }
1153 //***************************************************************************************************************
1154 int PcrSeqsCommand::readCount(set<string> badSeqNames){
1155         try {
1156                 ifstream in;
1157                 m->openInputFile(countfile, in);
1158                 set<string>::iterator it;
1159                 
1160                 map<string, string> variables; 
1161                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
1162         variables["[extension]"] = m->getExtension(countfile);
1163                 string goodCountFile = getOutputFileName("count", variables);
1164
1165         outputNames.push_back(goodCountFile);  outputTypes["count"].push_back(goodCountFile);
1166                 ofstream goodCountOut;  m->openOutputFile(goodCountFile, goodCountOut);
1167                 
1168         string headers = m->getline(in); m->gobble(in);
1169         goodCountOut << headers << endl;
1170         
1171         string name, rest; int thisTotal, removedCount; removedCount = 0;
1172         bool wroteSomething = false;
1173         while (!in.eof()) {
1174             
1175                         if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
1176             
1177                         in >> name; m->gobble(in); 
1178             in >> thisTotal; m->gobble(in);
1179             rest = m->getline(in); m->gobble(in);
1180             
1181                         if (badSeqNames.count(name) != 0) { removedCount+=thisTotal; }
1182                         else{
1183                 wroteSomething = true;
1184                                 goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
1185                         }
1186                 }
1187                 in.close();
1188                 goodCountOut.close();
1189         
1190         if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1191         
1192         if (wroteSomething == false) {  m->mothurOut("Your count file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
1193         
1194         //check for groups that have been eliminated
1195         CountTable ct;
1196         if (ct.testGroups(goodCountFile)) {
1197             ct.readTable(goodCountFile, true, false);
1198             ct.printTable(goodCountFile);
1199         }
1200                 
1201                 if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1202         
1203         m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
1204
1205                 
1206                 return 0;
1207         
1208         }
1209         catch(exception& e) {
1210                 m->errorOut(e, "PcrSeqsCommand", "readCOunt");
1211                 exit(1);
1212         }
1213 }
1214 //***************************************************************************************************************
1215
1216 int PcrSeqsCommand::readOligos(){
1217         try {
1218         oligos.read(oligosfile);
1219         
1220         if (m->control_pressed) { return false; } //error in reading oligos
1221         
1222         if (oligos.hasPairedBarcodes()) {
1223             pairedOligos = true;
1224             numFPrimers = oligos.getPairedPrimers().size();
1225         }else {
1226             pairedOligos = false;
1227             numFPrimers = oligos.getPrimers().size();
1228         }
1229         numRPrimers = oligos.getReversePrimers().size();
1230         
1231         if (oligos.getLinkers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove linkers, ignoring.\n"); }
1232         if (oligos.getSpacers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove spacers, ignoring.\n"); }
1233
1234         return true;
1235                 
1236         }
1237         catch(exception& e) {
1238                 m->errorOut(e, "PcrSeqsCommand", "readOligos");
1239                 exit(1);
1240         }
1241 }
1242
1243 /**************************************************************************************/
1244
1245