]> git.donarmstrong.com Git - mothur.git/blob - prcseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / prcseqscommand.cpp
1 //
2 //  prcseqscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/14/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "pcrseqscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> PcrSeqsCommand::setParameters(){ 
13         try {
14                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
15                 CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false,true); parameters.push_back(poligos);
16         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
17         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
19         CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false,true); parameters.push_back(ptax);
20         CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none","",false,false); parameters.push_back(pecoli);
21                 CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pstart);
22                 CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
23                 CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
24         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
25
26                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
27                 CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
28         CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
29         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31         
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "PcrSeqsCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string PcrSeqsCommand::getHelpString(){ 
43         try {
44                 string helpString = "";
45                 helpString += "The pcr.seqs command reads a fasta file.\n";
46         helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, processors, keepprimer and keepdots.\n";
47                 helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
48         helpString += "The start parameter allows you to provide a starting position to trim to.\n";
49         helpString += "The end parameter allows you to provide a ending position to trim from.\n";
50         helpString += "The nomatch parameter allows you to decide what to do with sequences where the primer is not found. Default=reject, meaning remove from fasta file.  if nomatch=true, then do nothing to sequence.\n";
51         helpString += "The processors parameter allows you to use multiple processors.\n";
52         helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
53         helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
54         helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
55                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
56                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
57                 return helpString;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "PcrSeqsCommand", "getHelpString");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 string PcrSeqsCommand::getOutputPattern(string type) {
66     try {
67         string pattern = "";
68         
69         if (type == "fasta")            {   pattern = "[filename],pcr,[extension]-[filename],[tag],pcr,[extension]";    }
70         else if (type == "taxonomy")    {   pattern = "[filename],pcr,[extension]";    }
71         else if (type == "name")        {   pattern = "[filename],pcr,[extension]";    }
72         else if (type == "group")       {   pattern = "[filename],pcr,[extension]";    }
73         else if (type == "count")       {   pattern = "[filename],pcr,[extension]";    }
74         else if (type == "accnos")      {   pattern = "[filename],bad.accnos";    }
75         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
76         
77         return pattern;
78     }
79     catch(exception& e) {
80         m->errorOut(e, "PcrSeqsCommand", "getOutputPattern");
81         exit(1);
82     }
83 }
84 //**********************************************************************************************************************
85
86 PcrSeqsCommand::PcrSeqsCommand(){       
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["fasta"] = tempOutNames;
92                 outputTypes["taxonomy"] = tempOutNames;
93                 outputTypes["group"] = tempOutNames;
94                 outputTypes["name"] = tempOutNames;
95         outputTypes["count"] = tempOutNames;
96         outputTypes["accnos"] = tempOutNames;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
100                 exit(1);
101         }
102 }
103 //***************************************************************************************************************
104
105 PcrSeqsCommand::PcrSeqsCommand(string option)  {
106         try {
107                 
108                 abort = false; calledHelp = false;   
109                 
110                 //allow user to run help
111                 if(option == "help") { help(); abort = true; calledHelp = true; }
112                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113                 
114                 else {
115                         vector<string> myArray = setParameters();
116                         
117                         OptionParser parser(option);
118                         map<string,string> parameters = parser.getParameters();
119                         
120                         ValidParameters validParameter;
121                         map<string,string>::iterator it;
122                         
123                         //check to make sure all parameters are valid for command
124                         for (it = parameters.begin(); it != parameters.end(); it++) { 
125                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
126                         }
127                         
128                         //initialize outputTypes
129                         vector<string> tempOutNames;
130                         outputTypes["fasta"] = tempOutNames;
131                         outputTypes["taxonomy"] = tempOutNames;
132                         outputTypes["group"] = tempOutNames;
133                         outputTypes["name"] = tempOutNames;
134             outputTypes["accnos"] = tempOutNames;
135             outputTypes["count"] = tempOutNames;
136                         
137                         //if the user changes the input directory command factory will send this info to us in the output parameter 
138                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
139                         if (inputDir == "not found"){   inputDir = "";          }
140                         else {
141                                 string path;
142                                 it = parameters.find("fasta");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
148                                 }
149                                 
150                                 it = parameters.find("oligos");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
156                                 }
157                 
158                 it = parameters.find("ecoli");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["ecoli"] = inputDir + it->second;            }
164                                 }
165                                 
166                                 it = parameters.find("taxonomy");
167                                 //user has given a template file
168                                 if(it != parameters.end()){ 
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
172                                 }
173                                 
174                                 it = parameters.find("name");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
180                                 }
181                 
182                 it = parameters.find("group");
183                                 //user has given a template file
184                                 if(it != parameters.end()){ 
185                                         path = m->hasPath(it->second);
186                                         //if the user has not given a path then, add inputdir. else leave path alone.
187                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
188                                 }
189                 
190                 it = parameters.find("count");
191                                 //user has given a template file
192                                 if(it != parameters.end()){ 
193                                         path = m->hasPath(it->second);
194                                         //if the user has not given a path then, add inputdir. else leave path alone.
195                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
196                                 }
197                                 
198                         }
199             
200                         
201                         //check for required parameters
202                         fastafile = validParameter.validFile(parameters, "fasta", true);
203                         if (fastafile == "not found") {                                 
204                                 fastafile = m->getFastaFile(); 
205                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
206                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
207                         }else if (fastafile == "not open") { fastafile = ""; abort = true; }    
208                         else { m->setFastaFile(fastafile); }
209                         
210             //if the user changes the output directory command factory will send this info to us in the output parameter 
211                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
212
213                         //check for optional parameter and set defaults
214                         // ...at some point should added some additional type checking...
215                         string temp;
216                         temp = validParameter.validFile(parameters, "keepprimer", false);  if (temp == "not found")    {        temp = "f";     }
217                         keepprimer = m->isTrue(temp);   
218             
219             temp = validParameter.validFile(parameters, "keepdots", false);  if (temp == "not found")    {      temp = "t";     }
220                         keepdots = m->isTrue(temp);     
221             
222                         temp = validParameter.validFile(parameters, "oligos", true);
223                         if (temp == "not found"){       oligosfile = "";                }
224                         else if(temp == "not open"){    oligosfile = ""; abort = true;  } 
225                         else                                    {       oligosfile = temp; m->setOligosFile(oligosfile);                }
226                         
227             ecolifile = validParameter.validFile(parameters, "ecoli", true);
228                         if (ecolifile == "not found"){  ecolifile = "";         }
229                         else if(ecolifile == "not open"){       ecolifile = ""; abort = true;   } 
230                         
231             namefile = validParameter.validFile(parameters, "name", true);
232                         if (namefile == "not found"){   namefile = "";          }
233                         else if(namefile == "not open"){        namefile = ""; abort = true;    } 
234             else { m->setNameFile(namefile); }
235             
236             groupfile = validParameter.validFile(parameters, "group", true);
237                         if (groupfile == "not found"){  groupfile = "";         }
238                         else if(groupfile == "not open"){       groupfile = ""; abort = true;   } 
239             else { m->setGroupFile(groupfile); }
240             
241             countfile = validParameter.validFile(parameters, "count", true);
242                         if (countfile == "not open") { countfile = ""; abort = true; }
243                         else if (countfile == "not found") { countfile = "";  } 
244                         else { m->setCountTableFile(countfile); }
245             
246             if ((namefile != "") && (countfile != "")) {
247                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
248             }
249                         
250             if ((groupfile != "") && (countfile != "")) {
251                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
252             }
253             
254             taxfile = validParameter.validFile(parameters, "taxonomy", true);
255                         if (taxfile == "not found"){    taxfile = "";           }
256                         else if(taxfile == "not open"){ taxfile = ""; abort = true;     } 
257             else { m->setTaxonomyFile(taxfile); }
258                                                 
259                         temp = validParameter.validFile(parameters, "start", false);    if (temp == "not found") { temp = "-1"; }
260                         m->mothurConvert(temp, start);
261             
262             temp = validParameter.validFile(parameters, "end", false);  if (temp == "not found") { temp = "-1"; }
263                         m->mothurConvert(temp, end);
264                         
265                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
266                         m->setProcessors(temp);
267                         m->mothurConvert(temp, processors); 
268             
269             temp = validParameter.validFile(parameters, "pdiffs", false);               if (temp == "not found") { temp = "0"; }
270                         m->mothurConvert(temp, pdiffs);
271                         
272             nomatch = validParameter.validFile(parameters, "nomatch", false);   if (nomatch == "not found") { nomatch = "reject"; }
273                         
274             if ((nomatch != "reject") && (nomatch != "keep")) { m->mothurOut("[ERROR]: " + nomatch + " is not a valid entry for nomatch. Choices are reject and keep.\n");  abort = true; }
275             
276             //didnt set anything
277                         if ((oligosfile == "") && (ecolifile == "") && (start == -1) && (end == -1)) {
278                 m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
279             }
280             
281             if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
282             
283             if ((ecolifile != "") && (start != -1) && (end != -1)) {
284                 m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend.  When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
285             }
286
287             
288             if ((oligosfile != "") && (ecolifile != "")) {
289                  m->mothurOut("[ERROR]: You can not use an ecoli file at the same time as an oligos file.\n"); abort = true;
290             }
291                         
292                         //check to make sure you didn't forget the name file by mistake                 
293                         if (countfile == "") { 
294                 if (namefile == "") {
295                     vector<string> files; files.push_back(fastafile);
296                     parser.getNameFile(files);
297                 }
298             }
299                 }
300         
301         }
302         catch(exception& e) {
303                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
304                 exit(1);
305         }
306 }
307 //***************************************************************************************************************
308
309 int PcrSeqsCommand::execute(){
310         try{
311         
312                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
313                 
314         int start = time(NULL);
315         fileAligned = true;
316         
317         string thisOutputDir = outputDir;
318                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
319         map<string, string> variables; 
320         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
321         variables["[extension]"] = m->getExtension(fastafile);
322                 string trimSeqFile = getOutputFileName("fasta",variables);
323                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
324         variables["[tag]"] = "scrap";
325         string badSeqFile = getOutputFileName("fasta",variables);
326                 
327                 
328         length = 0;
329                 if(oligosfile != ""){    readOligos();     if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(primers.size()) + ", revprimers = " + toString(revPrimer.size()) + ".\n"); } }  if (m->control_pressed) {  return 0; }
330         if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
331         
332         vector<unsigned long long> positions; 
333         int numFastaSeqs = 0;
334 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
335         positions = m->divideFile(fastafile, processors);
336         for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
337 #else
338         if (processors == 1) {
339             lines.push_back(linePair(0, 1000));
340         }else {
341             positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
342             if (positions.size() < processors) { processors = positions.size(); }
343             
344             //figure out how many sequences you have to process
345             int numSeqsPerProcessor = numFastaSeqs / processors;
346             for (int i = 0; i < processors; i++) {
347                 int startIndex =  i * numSeqsPerProcessor;
348                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
349                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
350             }
351         }
352 #endif
353         if (m->control_pressed) {  return 0; }
354
355         set<string> badNames;
356         numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames);   
357                 
358                 if (m->control_pressed) {  return 0; }          
359         
360         //don't write or keep if blank
361         if (badNames.size() != 0)   { writeAccnos(badNames);        }   
362         if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile);  }
363         else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
364         
365         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
366         if (namefile != "")                     {               readName(badNames);             }   
367         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
368         if (groupfile != "")            {               readGroup(badNames);    }
369         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
370                 if (taxfile != "")                      {               readTax(badNames);              }
371                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
372                 if (countfile != "")                    {               readCount(badNames);            }
373                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
374       
375         m->mothurOutEndLine();
376                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
377                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
378                 m->mothurOutEndLine();
379                 m->mothurOutEndLine();
380                 
381                 //set fasta file as new current fastafile
382                 string current = "";
383                 itTypes = outputTypes.find("fasta");
384                 if (itTypes != outputTypes.end()) {
385                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
386                 }
387                 
388                 itTypes = outputTypes.find("name");
389                 if (itTypes != outputTypes.end()) {
390                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
391                 }
392                 
393                 itTypes = outputTypes.find("group");
394                 if (itTypes != outputTypes.end()) {
395                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
396                 }
397                 
398                 itTypes = outputTypes.find("accnos");
399                 if (itTypes != outputTypes.end()) {
400                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
401                 }
402                 
403                 itTypes = outputTypes.find("taxonomy");
404                 if (itTypes != outputTypes.end()) {
405                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
406                 }
407         
408         itTypes = outputTypes.find("count");
409                 if (itTypes != outputTypes.end()) {
410                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
411                 }
412         
413                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
414                 m->mothurOutEndLine();
415
416                 
417                 return 0;       
418         
419         }
420         catch(exception& e) {
421                 m->errorOut(e, "PcrSeqsCommand", "execute");
422                 exit(1);
423         }
424 }
425 /**************************************************************************************************/
426 int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string badFileName, set<string>& badSeqNames) {
427         try {
428         
429         vector<int> processIDS;   
430         int process = 1;
431                 int num = 0;
432         int pstart = -1; int pend = -1;
433         bool adjustNeeded = false;
434         
435 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
436         
437                 //loop through and create all the processes you want
438                 while (process != processors) {
439                         int pid = fork();
440                         
441                         if (pid > 0) {
442                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
443                                 process++;
444                         }else if (pid == 0){
445                 string locationsFile = toString(getpid()) + ".temp";
446                                 num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, adjustNeeded);
447                                 
448                                 //pass numSeqs to parent
449                                 ofstream out;
450                                 string tempFile = filename + toString(getpid()) + ".num.temp";
451                                 m->openOutputFile(tempFile, out);
452                 out << pstart << '\t' << adjustNeeded << endl;
453                                 out << num << '\t' << badSeqNames.size() << endl;
454                 for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
455                     out << (*it) << endl;
456                 }
457                                 out.close();
458                                 
459                                 exit(0);
460                         }else { 
461                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
462                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
463                                 exit(0);
464                         }
465                 }
466                 
467         string locationsFile = toString(getpid()) + ".temp";
468         num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, adjustNeeded);
469         
470                 //force parent to wait until all the processes are done
471                 for (int i=0;i<processIDS.size();i++) { 
472                         int temp = processIDS[i];
473                         wait(&temp);
474                 }
475                 
476                 for (int i = 0; i < processIDS.size(); i++) {
477                         ifstream in;
478                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
479                         m->openInputFile(tempFile, in);
480             int numBadNames = 0; string name = "";
481             int tpstart = -1; bool tempAdjust = false;
482             
483                         if (!in.eof()) {
484                 in >> tpstart >> tempAdjust; m->gobble(in);
485                 
486                 if (tempAdjust) { adjustNeeded = true; }
487                 if (tpstart != -1)   {
488                     if (tpstart != pstart) { adjustNeeded = true; }
489                     if (tpstart < pstart) { pstart = tpstart; } //smallest start
490                 } 
491                 int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in);
492             }
493             for (int j = 0; j < numBadNames; j++) {
494                 in >> name; m->gobble(in);
495                 badSeqNames.insert(name);
496             }
497                         in.close(); m->mothurRemove(tempFile);
498             
499             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
500             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
501             
502             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
503             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
504             
505             m->appendFiles((toString(processIDS[i]) + ".temp"), locationsFile);
506             m->mothurRemove((toString(processIDS[i]) + ".temp"));
507                 }
508     #else
509         
510         //////////////////////////////////////////////////////////////////////////////////////////////////////
511                 //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
512                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
513                 //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
514                 //////////////////////////////////////////////////////////////////////////////////////////////////////
515                 
516                 vector<pcrData*> pDataArray; 
517                 DWORD   dwThreadIdArray[processors-1];
518                 HANDLE  hThreadArray[processors-1]; 
519                 
520         string locationsFile = "locationsFile.txt";
521         m->mothurRemove(locationsFile);
522         m->mothurRemove(goodFileName);
523         m->mothurRemove(badFileName);
524         
525                 //Create processor worker threads.
526                 for( int i=0; i<processors-1; i++ ){
527             
528             string extension = "";
529             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
530             
531                         // Allocate memory for thread data.
532                         pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
533                         pDataArray.push_back(tempPcr);
534                         
535                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
536                         hThreadArray[i] = CreateThread(NULL, 0, MyPcrThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
537                 }
538                 
539         //do your part
540         num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"), (locationsFile+toString(processors-1)+".temp"), badSeqNames, lines[processors-1], pstart, adjustNeeded);
541         processIDS.push_back(processors-1);
542         
543                 //Wait until all threads have terminated.
544                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
545                 
546                 //Close all thread handles and free memory allocations.
547                 for(int i=0; i < pDataArray.size(); i++){
548                         num += pDataArray[i]->count;
549             if (pDataArray[i]->count != pDataArray[i]->fend) {
550                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->fend) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
551             }
552             if (pDataArray[i]->adjustNeeded) { adjustNeeded = true; }
553             if (pDataArray[i]->pstart != -1)   {
554                 if (pDataArray[i]->pstart != pstart) { adjustNeeded = true; }
555                 if (pDataArray[i]->pstart < pstart) { pstart = pDataArray[i]->pstart; }
556             } //smallest start
557             
558             for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it);       }
559                         CloseHandle(hThreadArray[i]);
560                         delete pDataArray[i];
561                 }
562         
563         for (int i = 0; i < processIDS.size(); i++) {
564             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
565             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
566             
567             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
568             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
569             
570             m->appendFiles((locationsFile+toString(processIDS[i]) + ".temp"), locationsFile);
571             m->mothurRemove((locationsFile+toString(processIDS[i]) + ".temp"));
572                 }
573         
574 #endif  
575         
576         
577
578         if (fileAligned) {
579             //find pend - pend is the biggest ending value, but we must account for when we adjust the start.  That adjustment may make the "new" end larger then the largest end. So lets find out what that "new" end will be.
580             ifstream inLocations;
581             m->openInputFile(locationsFile, inLocations);
582             
583             while(!inLocations.eof()) {
584                 
585                 if (m->control_pressed) { break; }
586                 
587                 string name = "";
588                 int thisStart = -1; int thisEnd = -1;
589                 if (primers.size() != 0)    { inLocations >> name >> thisStart; m->gobble(inLocations); }
590                 if (revPrimer.size() != 0)  { inLocations >> name >> thisEnd;   m->gobble(inLocations); }
591                 else { pend = -1; break; }
592                 
593                 int myDiff = 0;
594                 if (pstart != -1) {
595                     if (thisStart != -1) {
596                         if (thisStart != pstart) { myDiff += (thisStart - pstart); }
597                     }
598                 }
599                 
600                 int myEnd = thisEnd + myDiff;
601                 //cout << name << '\t' << thisStart << '\t' << thisEnd << " diff = " << myDiff << '\t' << myEnd << endl;
602                 
603                 if (thisEnd != -1) {
604                     if (myEnd > pend) { pend = myEnd; }
605                 }
606                 
607             }
608             inLocations.close();
609             
610             adjustDots(goodFileName, locationsFile, pstart, pend);
611         }
612         
613         return num;
614         
615         }
616         catch(exception& e) {
617                 m->errorOut(e, "PcrSeqsCommand", "createProcesses");
618                 exit(1);
619         }
620 }
621
622 //**********************************************************************************************************************
623 int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set<string>& badSeqNames, linePair filePos, int& pstart, bool& adjustNeeded){
624         try {
625                 ofstream goodFile;
626                 m->openOutputFile(goodFasta, goodFile);
627         
628         ofstream badFile;
629                 m->openOutputFile(badFasta, badFile);
630         
631         ofstream locationsFile;
632                 m->openOutputFile(locationsName, locationsFile);
633                 
634                 ifstream inFASTA;
635                 m->openInputFile(filename, inFASTA);
636         
637                 inFASTA.seekg(filePos.start);
638         
639                 bool done = false;
640                 int count = 0;
641         set<int> lengths;
642         set<int> locations; //locations[0] = beginning locations, 
643         
644         //pdiffs, bdiffs, primers, barcodes, revPrimers
645         map<string, int> faked;
646         TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
647         
648                 while (!done) {
649             
650                         if (m->control_pressed) {  break; }
651                         
652                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
653             
654             if (fileAligned) { //assume aligned until proven otherwise
655                 lengths.insert(currSeq.getAligned().length());
656                 if (lengths.size() > 1) { fileAligned = false; }
657             }
658             
659             string trashCode = "";
660             string locationsString = "";
661             int thisPStart = -1;
662             int thisPEnd = -1;
663                         if (currSeq.getName() != "") {
664                 
665                 if (m->debug) { m->mothurOut("[DEBUG]: seq name = " + currSeq.getName() + ".\n"); } 
666                 
667                 bool goodSeq = true;
668                 if (oligosfile != "") {
669                     map<int, int> mapAligned;
670                     bool aligned = isAligned(currSeq.getAligned(), mapAligned);
671                     
672                     //process primers
673                     if (primers.size() != 0) {
674                         int primerStart = 0; int primerEnd = 0;
675                         bool good = trim.findForward(currSeq, primerStart, primerEnd);
676                         
677                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
678                         else{
679                             //are you aligned
680                             if (aligned) { 
681                                 if (!keepprimer)    {
682                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerEnd-1]+1);   } //mapAligned[primerEnd-1] is the location of the last base in the primer. we want to trim to the space just after that.  The -1 & +1 ensures if the primer is followed by gaps they are not trimmed causing an aligned sequence dataset to become unaligned.
683                                     else            {
684                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1));
685                                         if (fileAligned) {
686                                             thisPStart = mapAligned[primerEnd-1]+1; //locations[0].insert(mapAligned[primerEnd-1]+1);
687                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
688                                         }
689                                     }
690                                 } 
691                                 else                {  
692                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
693                                     else            {
694                                         currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));
695                                         if (fileAligned) {
696                                             thisPStart = mapAligned[primerStart]; //locations[0].insert(mapAligned[primerStart]);
697                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
698                                         }
699                                     }
700                                 }
701                                 isAligned(currSeq.getAligned(), mapAligned);
702                             }else { 
703                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
704                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
705                             }
706                         }
707                     }
708                     
709                     //process reverse primers
710                     if (revPrimer.size() != 0) {
711                         int primerStart = 0; int primerEnd = 0;
712                         bool good = trim.findReverse(currSeq, primerStart, primerEnd);
713                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
714                         else{
715                             //are you aligned
716                             if (aligned) { 
717                                 if (!keepprimer)    {  
718                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
719                                     else            {
720                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));
721                                         if (fileAligned) {
722                                             thisPEnd = mapAligned[primerStart]; //locations[1].insert(mapAligned[primerStart]);
723                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
724                                         }
725                                     }
726                                 } 
727                                 else                {  
728                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); }
729                                     else            {
730                                         currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1));
731                                         if (fileAligned) {
732                                             thisPEnd = mapAligned[primerEnd-1]+1; //locations[1].insert(mapAligned[primerEnd-1]+1);
733                                             locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
734                                         }
735                                     }
736                                 } 
737                             }
738                             else { 
739                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
740                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
741                             }
742                         }
743                     }
744                 }else if (ecolifile != "") {
745                     //make sure the seqs are aligned
746                     if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
747                     else if (currSeq.getAligned().length() != length) {
748                         m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; 
749                     }else {
750                         if (keepdots)   { 
751                             currSeq.filterToPos(start); 
752                             currSeq.filterFromPos(end);
753                         }else {
754                             string seqString = currSeq.getAligned().substr(0, end);
755                             seqString = seqString.substr(start);
756                             currSeq.setAligned(seqString); 
757                         }
758                     }
759                 }else{ //using start and end to trim
760                     //make sure the seqs are aligned
761                     if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
762                     else {
763                         if (end != -1) {
764                             if (end > currSeq.getAligned().length()) {  m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
765                             else {
766                                 if (keepdots)   { currSeq.filterFromPos(end); }
767                                 else {
768                                     string seqString = currSeq.getAligned().substr(0, end);
769                                     currSeq.setAligned(seqString); 
770                                 }
771                             }
772                         }
773                         if (start != -1) { 
774                             if (keepdots)   {  currSeq.filterToPos(start);  }
775                             else {
776                                 string seqString = currSeq.getAligned().substr(start);
777                                 currSeq.setAligned(seqString); 
778                             }
779                         }
780                     }
781                 }
782                 
783                 //trimming removed all bases
784                 if (currSeq.getUnaligned() == "") { goodSeq = false; }
785                 
786                                 if(goodSeq == 1)    {
787                     currSeq.printSequence(goodFile);
788                     if (m->debug) { m->mothurOut("[DEBUG]: " + locationsString + "\n"); }
789                     if (thisPStart != -1)   { locations.insert(thisPStart);  }
790                     if (locationsString != "") { locationsFile << locationsString; }
791                 }
792                                 else {  
793                     badSeqNames.insert(currSeq.getName()); 
794                     currSeq.setName(currSeq.getName() + '|' + trashCode);
795                     currSeq.printSequence(badFile); 
796                 }
797                 count++;
798                         }
799                         
800 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
801             unsigned long long pos = inFASTA.tellg();
802             if ((pos == -1) || (pos >= filePos.end)) { break; }
803 #else
804             if (inFASTA.eof()) { break; }
805 #endif
806                         
807                         //report progress
808                         if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+"\n");               }
809                 }
810                 //report progress
811                 if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count)+"\n");       }
812                 
813         badFile.close();
814                 goodFile.close();
815                 inFASTA.close();
816         locationsFile.close();
817         
818         if (m->debug) { m->mothurOut("[DEBUG]: fileAligned = " + toString(fileAligned) +'\n'); }
819     
820         if (fileAligned && !keepdots) { //print out smallest start value and largest end value
821             if (locations.size() > 1) { adjustNeeded = true; }
822             if (primers.size() != 0)    {   set<int>::iterator it = locations.begin();  pstart = *it;  }
823         }
824
825                 return count;
826         }
827         catch(exception& e) {
828                 m->errorOut(e, "PcrSeqsCommand", "driverPcr");
829                 exit(1);
830         }
831 }
832 //********************************************************************/
833 bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
834         try {
835         aligned.clear();
836         bool isAligned = false;
837         
838         int countBases = 0;
839         for (int i = 0; i < seq.length(); i++) {
840             if (!isalpha(seq[i])) { isAligned = true; }
841             else { aligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned.
842         }                                                   //ie. the 3rd base may be at spot 10 in the alignment
843                                                             //later when we trim we want to trim from spot 10.
844         return isAligned;
845     }
846         catch(exception& e) {
847                 m->errorOut(e, "PcrSeqsCommand", "isAligned");
848                 exit(1);
849         }
850 }
851 //**********************************************************************************************************************
852 int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, int pend){
853     try {
854         ifstream inFasta;
855         m->openInputFile(goodFasta, inFasta);
856         
857         ifstream inLocations;
858         m->openInputFile(locations, inLocations);
859         
860         ofstream out;
861         m->openOutputFile(goodFasta+".temp", out);
862         
863         set<int> lengths;
864         //cout << pstart << '\t' << pend << endl;
865         //if (pstart > pend) { //swap them
866         
867         while(!inFasta.eof()) {
868             if(m->control_pressed) { break; }
869             
870             Sequence seq(inFasta); m->gobble(inFasta);
871             
872             string name = "";
873             int thisStart = -1; int thisEnd = -1;
874             if (primers.size() != 0)    { inLocations >> name >> thisStart; m->gobble(inLocations); }
875             if (revPrimer.size() != 0)  { inLocations >> name >> thisEnd;   m->gobble(inLocations); }
876             
877             
878             //cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl;
879             //cout << seq.getName() << '\t' << pstart << '\t' << pend << endl;
880             
881             if (name != seq.getName()) { m->mothurOut("[ERROR]: name mismatch in pcr.seqs.\n"); }
882             else {
883                 if (pstart != -1) {
884                     if (thisStart != -1) {
885                         if (thisStart != pstart) {
886                             string dots = "";
887                             for (int i = pstart; i < thisStart; i++) { dots += "."; }
888                             thisEnd += dots.length();
889                             dots += seq.getAligned();
890                             seq.setAligned(dots);
891                         }
892                     }
893                 }
894                 
895                 if (pend != -1) {
896                     if (thisEnd != -1) {
897                         if (thisEnd != pend) {
898                             string dots = seq.getAligned();
899                             for (int i = thisEnd; i < pend; i++) { dots += "."; }
900                             seq.setAligned(dots);
901                         }
902                     }
903                 }
904                 lengths.insert(seq.getAligned().length());
905             }
906             
907             seq.printSequence(out);
908         }
909         inFasta.close();
910         inLocations.close();
911         out.close();
912         m->mothurRemove(locations);
913         m->mothurRemove(goodFasta);
914         m->renameFile(goodFasta+".temp", goodFasta);
915         
916         //cout << "final lengths = \n";
917         //for (set<int>::iterator it = lengths.begin(); it != lengths.end(); it++) {
918            //cout << *it << endl;
919            // cout << lengths.count(*it) << endl;
920        // }
921         
922         return 0;
923     }
924     catch(exception& e) {
925         m->errorOut(e, "PcrSeqsCommand", "adjustDots");
926         exit(1);
927     }
928 }
929 //********************************************************************/
930 string PcrSeqsCommand::reverseOligo(string oligo){
931         try {
932         string reverse = "";
933        
934         for(int i=oligo.length()-1;i>=0;i--){
935             
936             if(oligo[i] == 'A')         {       reverse += 'T'; }
937             else if(oligo[i] == 'T'){   reverse += 'A'; }
938             else if(oligo[i] == 'U'){   reverse += 'A'; }
939             
940             else if(oligo[i] == 'G'){   reverse += 'C'; }
941             else if(oligo[i] == 'C'){   reverse += 'G'; }
942             
943             else if(oligo[i] == 'R'){   reverse += 'Y'; }
944             else if(oligo[i] == 'Y'){   reverse += 'R'; }
945             
946             else if(oligo[i] == 'M'){   reverse += 'K'; }
947             else if(oligo[i] == 'K'){   reverse += 'M'; }
948             
949             else if(oligo[i] == 'W'){   reverse += 'W'; }
950             else if(oligo[i] == 'S'){   reverse += 'S'; }
951             
952             else if(oligo[i] == 'B'){   reverse += 'V'; }
953             else if(oligo[i] == 'V'){   reverse += 'B'; }
954             
955             else if(oligo[i] == 'D'){   reverse += 'H'; }
956             else if(oligo[i] == 'H'){   reverse += 'D'; }
957
958             else                                                {       reverse += 'N'; }
959         }
960
961         
962         return reverse;
963     }
964         catch(exception& e) {
965                 m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
966                 exit(1);
967         }
968 }
969
970 //***************************************************************************************************************
971 bool PcrSeqsCommand::readOligos(){
972         try {
973                 ifstream inOligos;
974                 m->openInputFile(oligosfile, inOligos);
975                 
976                 string type, oligo, group;
977         int primerCount = 0;
978                 
979                 while(!inOligos.eof()){
980             
981                         inOligos >> type; 
982             
983                         if(type[0] == '#'){ //ignore
984                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
985                                 m->gobble(inOligos);
986                         }else{
987                                 m->gobble(inOligos);
988                                 //make type case insensitive
989                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
990                                 
991                                 inOligos >> oligo;
992                                 
993                                 for(int i=0;i<oligo.length();i++){
994                                         oligo[i] = toupper(oligo[i]);
995                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
996                                 }
997                                 
998                                 if(type == "FORWARD"){
999                                         // get rest of line in case there is a primer name
1000                                         while (!inOligos.eof()) {       
1001                         char c = inOligos.get(); 
1002                         if (c == 10 || c == 13 || c == -1){     break;  }
1003                         else if (c == 32 || c == 9){;} //space or tab
1004                                         } 
1005                                         primers[oligo] = primerCount; primerCount++;
1006                     //cout << "for oligo = " << oligo  << endl;
1007                 }else if(type == "REVERSE"){
1008                     string oligoRC = reverseOligo(oligo);
1009                     revPrimer.push_back(oligoRC);
1010                     //cout << "rev oligo = " << oligo << " reverse = " << oligoRC << endl;
1011                                 }else if(type == "BARCODE"){
1012                     inOligos >> group;
1013                 }else if(type == "PRIMER"){
1014                                         m->gobble(inOligos);
1015                     primers[oligo] = primerCount; primerCount++;
1016                                         
1017                     string roligo="";
1018                     inOligos >> roligo;
1019                     
1020                     for(int i=0;i<roligo.length();i++){
1021                         roligo[i] = toupper(roligo[i]);
1022                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1023                     }
1024                     revPrimer.push_back(reverseOligo(roligo));
1025                     
1026                     // get rest of line in case there is a primer name
1027                                         while (!inOligos.eof()) {
1028                         char c = inOligos.get();
1029                         if (c == 10 || c == 13 || c == -1){     break;  }
1030                         else if (c == 32 || c == 9){;} //space or tab
1031                                         }
1032                     //cout << "prim oligo = " << oligo << " reverse = " << roligo << endl;
1033                                 }else if((type == "LINKER")||(type == "SPACER")) {;}
1034                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
1035                         }
1036                         m->gobble(inOligos);
1037                 }       
1038                 inOligos.close();
1039                 
1040                 if ((primers.size() == 0) && (revPrimer.size() == 0)) {
1041                         m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers.  Please correct."); m->mothurOutEndLine();
1042             m->control_pressed = true;
1043                         return false;
1044                 }
1045         
1046         return true;
1047         
1048     }catch(exception& e) {
1049                 m->errorOut(e, "PcrSeqsCommand", "readOligos");
1050                 exit(1);
1051         }
1052 }
1053 //***************************************************************************************************************
1054 bool PcrSeqsCommand::readEcoli(){
1055         try {
1056                 ifstream in;
1057                 m->openInputFile(ecolifile, in);
1058                 
1059         //read seq
1060         if (!in.eof()){ 
1061             Sequence ecoli(in); 
1062             length = ecoli.getAligned().length();
1063             start = ecoli.getStartPos();
1064             end = ecoli.getEndPos();
1065         }else { in.close(); m->control_pressed = true; return false; }
1066         in.close();    
1067                         
1068         return true;
1069     }
1070         catch(exception& e) {
1071                 m->errorOut(e, "PcrSeqsCommand", "readEcoli");
1072                 exit(1);
1073         }
1074     
1075 }
1076 //***************************************************************************************************************
1077 int PcrSeqsCommand::writeAccnos(set<string> badNames){
1078         try {
1079                 string thisOutputDir = outputDir;
1080                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
1081         map<string, string> variables; 
1082                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
1083                 string outputFileName = getOutputFileName("accnos",variables);
1084         outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
1085         
1086         ofstream out;
1087         m->openOutputFile(outputFileName, out);
1088         
1089         for (set<string>::iterator it = badNames.begin(); it != badNames.end(); it++) {
1090             if (m->control_pressed) { break; }
1091             out << (*it) << endl;
1092         }
1093         
1094         out.close();
1095         return 0;
1096     }
1097         catch(exception& e) {
1098                 m->errorOut(e, "PcrSeqsCommand", "writeAccnos");
1099                 exit(1);
1100         }
1101     
1102 }
1103 //***************************************************************************************************************
1104 int PcrSeqsCommand::readName(set<string>& names){
1105         try {
1106                 string thisOutputDir = outputDir;
1107                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
1108                 map<string, string> variables; 
1109                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
1110         variables["[extension]"] = m->getExtension(namefile);
1111                 string outputFileName = getOutputFileName("name", variables);
1112         
1113                 ofstream out;
1114                 m->openOutputFile(outputFileName, out);
1115         
1116                 ifstream in;
1117                 m->openInputFile(namefile, in);
1118                 string name, firstCol, secondCol;
1119                 
1120                 bool wroteSomething = false;
1121                 int removedCount = 0;
1122                 
1123                 while(!in.eof()){
1124                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1125                         
1126                         in >> firstCol;         m->gobble(in);          
1127                         in >> secondCol;                        
1128                         
1129             string savedSecond = secondCol;
1130                         vector<string> parsedNames;
1131                         m->splitAtComma(secondCol, parsedNames);
1132                         
1133                         vector<string> validSecond;  validSecond.clear();
1134                         for (int i = 0; i < parsedNames.size(); i++) {
1135                                 if (names.count(parsedNames[i]) == 0) {
1136                                         validSecond.push_back(parsedNames[i]);
1137                                 }
1138                         }
1139                         
1140                         if (validSecond.size() != parsedNames.size()) {  //we want to get rid of someone, so get rid of everyone
1141                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
1142                                 removedCount += parsedNames.size();
1143                         }else {
1144                 out << firstCol << '\t' << savedSecond << endl;
1145                 wroteSomething = true;
1146             }
1147                         m->gobble(in);
1148                 }
1149                 in.close();
1150                 out.close();
1151                 
1152                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1153                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
1154                 
1155                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
1156                 
1157                 return 0;
1158         }
1159         catch(exception& e) {
1160                 m->errorOut(e, "PcrSeqsCommand", "readName");
1161                 exit(1);
1162         }
1163 }
1164 //**********************************************************************************************************************
1165 int PcrSeqsCommand::readGroup(set<string> names){
1166         try {
1167                 string thisOutputDir = outputDir;
1168                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
1169                 map<string, string> variables; 
1170                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
1171         variables["[extension]"] = m->getExtension(groupfile);
1172                 string outputFileName = getOutputFileName("group", variables);
1173                 
1174                 ofstream out;
1175                 m->openOutputFile(outputFileName, out);
1176         
1177                 ifstream in;
1178                 m->openInputFile(groupfile, in);
1179                 string name, group;
1180                 
1181                 bool wroteSomething = false;
1182                 int removedCount = 0;
1183                 
1184                 while(!in.eof()){
1185                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1186                         
1187                         in >> name;                             //read from first column
1188                         in >> group;                    //read from second column
1189                         
1190                         //if this name is in the accnos file
1191                         if (names.count(name) == 0) {
1192                                 wroteSomething = true;
1193                                 out << name << '\t' << group << endl;
1194                         }else {  removedCount++;  }
1195             
1196                         m->gobble(in);
1197                 }
1198                 in.close();
1199                 out.close();
1200                 
1201                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1202                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
1203                 
1204                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
1205         
1206                 
1207                 return 0;
1208         }
1209         catch(exception& e) {
1210                 m->errorOut(e, "PcrSeqsCommand", "readGroup");
1211                 exit(1);
1212         }
1213 }
1214 //**********************************************************************************************************************
1215 int PcrSeqsCommand::readTax(set<string> names){
1216         try {
1217                 string thisOutputDir = outputDir;
1218                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1219                 map<string, string> variables; 
1220                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
1221         variables["[extension]"] = m->getExtension(taxfile);
1222                 string outputFileName = getOutputFileName("taxonomy", variables);
1223
1224                 ofstream out;
1225                 m->openOutputFile(outputFileName, out);
1226         
1227                 ifstream in;
1228                 m->openInputFile(taxfile, in);
1229                 string name, tax;
1230                 
1231                 bool wroteSomething = false;
1232                 int removedCount = 0;
1233                 
1234                 while(!in.eof()){
1235                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1236                         
1237                         in >> name;                             //read from first column
1238                         in >> tax;                      //read from second column
1239                         
1240                         //if this name is in the accnos file
1241                         if (names.count(name) == 0) {
1242                                 wroteSomething = true;
1243                                 out << name << '\t' << tax << endl;
1244                         }else {  removedCount++;  }
1245             
1246                         m->gobble(in);
1247                 }
1248                 in.close();
1249                 out.close();
1250                 
1251                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1252                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
1253                 
1254                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
1255                 
1256                 return 0;
1257         }
1258         catch(exception& e) {
1259                 m->errorOut(e, "PcrSeqsCommand", "readTax");
1260                 exit(1);
1261         }
1262 }
1263 //***************************************************************************************************************
1264 int PcrSeqsCommand::readCount(set<string> badSeqNames){
1265         try {
1266                 ifstream in;
1267                 m->openInputFile(countfile, in);
1268                 set<string>::iterator it;
1269                 
1270                 map<string, string> variables; 
1271                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
1272         variables["[extension]"] = m->getExtension(countfile);
1273                 string goodCountFile = getOutputFileName("count", variables);
1274
1275         outputNames.push_back(goodCountFile);  outputTypes["count"].push_back(goodCountFile);
1276                 ofstream goodCountOut;  m->openOutputFile(goodCountFile, goodCountOut);
1277                 
1278         string headers = m->getline(in); m->gobble(in);
1279         goodCountOut << headers << endl;
1280         
1281         string name, rest; int thisTotal, removedCount; removedCount = 0;
1282         bool wroteSomething = false;
1283         while (!in.eof()) {
1284             
1285                         if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
1286             
1287                         in >> name; m->gobble(in); 
1288             in >> thisTotal; m->gobble(in);
1289             rest = m->getline(in); m->gobble(in);
1290             
1291                         if (badSeqNames.count(name) != 0) { removedCount+=thisTotal; }
1292                         else{
1293                 wroteSomething = true;
1294                                 goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
1295                         }
1296                 }
1297                 in.close();
1298                 goodCountOut.close();
1299         
1300         if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1301         
1302         if (wroteSomething == false) {  m->mothurOut("Your count file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
1303         
1304         //check for groups that have been eliminated
1305         CountTable ct;
1306         if (ct.testGroups(goodCountFile)) {
1307             ct.readTable(goodCountFile, true, false);
1308             ct.printTable(goodCountFile);
1309         }
1310                 
1311                 if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1312         
1313         m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
1314
1315                 
1316                 return 0;
1317         
1318         }
1319         catch(exception& e) {
1320                 m->errorOut(e, "PcrSeqsCommand", "readCOunt");
1321                 exit(1);
1322         }
1323 }
1324 /**************************************************************************************/
1325
1326