]> git.donarmstrong.com Git - mothur.git/blob - prcseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / prcseqscommand.cpp
1 //
2 //  prcseqscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/14/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "pcrseqscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> PcrSeqsCommand::setParameters(){ 
13         try {
14                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
15                 CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(poligos);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17         CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
18         CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
19         CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(pecoli);
20                 CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
21                 CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
22                 CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "",false,false); parameters.push_back(pnomatch);
23                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer);
26         CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots);
27         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29         
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "PcrSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string PcrSeqsCommand::getHelpString(){ 
41         try {
42                 string helpString = "";
43                 helpString += "The pcr.seqs command reads a fasta file ...\n";
44                 
45                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
46                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
47                 return helpString;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "PcrSeqsCommand", "getHelpString");
51                 exit(1);
52         }
53 }
54
55
56 //**********************************************************************************************************************
57
58 PcrSeqsCommand::PcrSeqsCommand(){       
59         try {
60                 abort = true; calledHelp = true; 
61                 setParameters();
62                 vector<string> tempOutNames;
63                 outputTypes["fasta"] = tempOutNames;
64                 outputTypes["taxonomy"] = tempOutNames;
65                 outputTypes["group"] = tempOutNames;
66                 outputTypes["name"] = tempOutNames;
67         outputTypes["accnos"] = tempOutNames;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
71                 exit(1);
72         }
73 }
74 //***************************************************************************************************************
75
76 PcrSeqsCommand::PcrSeqsCommand(string option)  {
77         try {
78                 
79                 abort = false; calledHelp = false;   
80                 
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
84                 
85                 else {
86                         vector<string> myArray = setParameters();
87                         
88                         OptionParser parser(option);
89                         map<string,string> parameters = parser.getParameters();
90                         
91                         ValidParameters validParameter;
92                         map<string,string>::iterator it;
93                         
94                         //check to make sure all parameters are valid for command
95                         for (it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["fasta"] = tempOutNames;
102                         outputTypes["taxonomy"] = tempOutNames;
103                         outputTypes["group"] = tempOutNames;
104                         outputTypes["name"] = tempOutNames;
105             outputTypes["accnos"] = tempOutNames;
106                         
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         if (inputDir == "not found"){   inputDir = "";          }
110                         else {
111                                 string path;
112                                 it = parameters.find("fasta");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
118                                 }
119                                 
120                                 it = parameters.find("oligos");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
126                                 }
127                 
128                 it = parameters.find("ecoli");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["ecoli"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("taxonomy");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
142                                 }
143                                 
144                                 it = parameters.find("name");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
150                                 }
151                 
152                 it = parameters.find("group");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
158                                 }
159                                 
160                         }
161             
162             //if the user changes the output directory command factory will send this info to us in the output parameter 
163                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
164                         
165                         //check for required parameters
166                         fastafile = validParameter.validFile(parameters, "fasta", true);
167                         if (fastafile == "not found") {                                 
168                                 fastafile = m->getFastaFile(); 
169                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
170                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
171                         }else if (fastafile == "not open") { fastafile = ""; abort = true; }    
172                         else { m->setFastaFile(fastafile); }
173                         
174             
175                         //check for optional parameter and set defaults
176                         // ...at some point should added some additional type checking...
177                         string temp;
178                         temp = validParameter.validFile(parameters, "keepprimer", false);  if (temp == "not found")    {        temp = "f";     }
179                         keepprimer = m->isTrue(temp);   
180             
181             temp = validParameter.validFile(parameters, "keepdots", false);  if (temp == "not found")    {      temp = "t";     }
182                         keepdots = m->isTrue(temp);     
183             
184                         temp = validParameter.validFile(parameters, "oligos", true);
185                         if (temp == "not found"){       oligosfile = "";                }
186                         else if(temp == "not open"){    oligosfile = ""; abort = true;  } 
187                         else                                    {       oligosfile = temp; m->setOligosFile(oligosfile);                }
188                         
189             ecolifile = validParameter.validFile(parameters, "ecoli", true);
190                         if (ecolifile == "not found"){  ecolifile = "";         }
191                         else if(ecolifile == "not open"){       ecolifile = ""; abort = true;   } 
192                         
193             namefile = validParameter.validFile(parameters, "name", true);
194                         if (namefile == "not found"){   namefile = "";          }
195                         else if(namefile == "not open"){        namefile = ""; abort = true;    } 
196             else { m->setNameFile(namefile); }
197             
198             groupfile = validParameter.validFile(parameters, "group", true);
199                         if (groupfile == "not found"){  groupfile = "";         }
200                         else if(groupfile == "not open"){       groupfile = ""; abort = true;   } 
201             else { m->setGroupFile(groupfile); }
202             
203             taxfile = validParameter.validFile(parameters, "taxonomy", true);
204                         if (taxfile == "not found"){    taxfile = "";           }
205                         else if(taxfile == "not open"){ taxfile = ""; abort = true;     } 
206             else { m->setTaxonomyFile(taxfile); }
207                         
208                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
209                         m->mothurConvert(temp, pdiffs);
210                         
211                         temp = validParameter.validFile(parameters, "start", false);    if (temp == "not found") { temp = "-1"; }
212                         m->mothurConvert(temp, start);
213             
214             temp = validParameter.validFile(parameters, "end", false);  if (temp == "not found") { temp = "-1"; }
215                         m->mothurConvert(temp, end);
216                         
217                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
218                         m->setProcessors(temp);
219                         m->mothurConvert(temp, processors); 
220                         
221             nomatch = validParameter.validFile(parameters, "nomatch", false);   if (nomatch == "not found") { nomatch = "reject"; }
222                         
223             if ((nomatch != "reject") && (nomatch != "keep")) { m->mothurOut("[ERROR]: " + nomatch + " is not a valid entry for nomatch. Choices are reject and keep.\n");  abort = true; }
224             
225             //didnt set anything
226                         if ((oligosfile == "") && (ecolifile == "") && (start == -1) && (end == -1)) {
227                 m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
228             }
229             
230             if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
231             
232             if ((ecolifile != "") && (start != -1) && (end != -1)) {
233                 m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend.  When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
234             }
235
236             
237             if ((oligosfile != "") && (ecolifile != "")) {
238                  m->mothurOut("[ERROR]: You can not use an ecoli file at the same time as an oligos file.\n"); abort = true;
239             }
240                         
241                         //check to make sure you didn't forget the name file by mistake                 
242                         if (namefile == "") {
243                                 vector<string> files; files.push_back(fastafile);
244                                 parser.getNameFile(files);
245                         }
246                 }
247         
248         }
249         catch(exception& e) {
250                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
251                 exit(1);
252         }
253 }
254 //***************************************************************************************************************
255
256 int PcrSeqsCommand::execute(){
257         try{
258         
259                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
260                 
261         int start = time(NULL);
262         
263         string thisOutputDir = outputDir;
264                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
265                 string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.fasta";
266                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
267         
268         string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.scrap.fasta";
269                 
270                 
271         length = 0;
272                 if(oligosfile != ""){    readOligos();     }  if (m->control_pressed) {  return 0; }
273         if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
274         
275         vector<unsigned long long> positions; 
276         int numFastaSeqs = 0;
277 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
278         positions = m->divideFile(fastafile, processors);
279         for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
280 #else
281         if (processors == 1) {
282             lines.push_back(linePair(0, 1000));
283         }else {
284             positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
285             if (positions.size() < processors) { processors = positions.size(); }
286             
287             //figure out how many sequences you have to process
288             int numSeqsPerProcessor = numFastaSeqs / processors;
289             for (int i = 0; i < processors; i++) {
290                 int startIndex =  i * numSeqsPerProcessor;
291                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
292                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
293             }
294         }
295 #endif
296         if (m->control_pressed) {  return 0; }
297
298         set<string> badNames;
299         if(processors == 1) {    numFastaSeqs = driverPcr(fastafile, trimSeqFile, badSeqFile, badNames, lines[0]);   }
300         else                {    numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames);       }  
301                 
302                 if (m->control_pressed) {  return 0; }          
303         
304         //don't write or keep if blank
305         if (badNames.size() != 0)   { writeAccnos(badNames);        }   
306         if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile);  }
307         else { outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile); }
308         
309         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
310         if (namefile != "")                     {               readName(badNames);             }   
311         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
312         if (groupfile != "")            {               readGroup(badNames);    }
313         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
314                 if (taxfile != "")                      {               readTax(badNames);              }
315                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
316         
317         m->mothurOutEndLine();
318                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
319                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
320                 m->mothurOutEndLine();
321                 m->mothurOutEndLine();
322                 
323                 //set fasta file as new current fastafile
324                 string current = "";
325                 itTypes = outputTypes.find("fasta");
326                 if (itTypes != outputTypes.end()) {
327                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
328                 }
329                 
330                 itTypes = outputTypes.find("name");
331                 if (itTypes != outputTypes.end()) {
332                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
333                 }
334                 
335                 itTypes = outputTypes.find("group");
336                 if (itTypes != outputTypes.end()) {
337                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
338                 }
339                 
340                 itTypes = outputTypes.find("accnos");
341                 if (itTypes != outputTypes.end()) {
342                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
343                 }
344                 
345                 itTypes = outputTypes.find("taxonomy");
346                 if (itTypes != outputTypes.end()) {
347                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
348                 }
349         
350                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
351                 m->mothurOutEndLine();
352
353                 
354                 return 0;       
355         
356         }
357         catch(exception& e) {
358                 m->errorOut(e, "PcrSeqsCommand", "execute");
359                 exit(1);
360         }
361 }
362 /**************************************************************************************************/
363 int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string badFileName, set<string>& badSeqNames) {
364         try {
365         
366         vector<int> processIDS;   
367         int process = 1;
368                 int num = 0;
369         
370 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
371         
372                 //loop through and create all the processes you want
373                 while (process != processors) {
374                         int pid = fork();
375                         
376                         if (pid > 0) {
377                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
378                                 process++;
379                         }else if (pid == 0){
380                                 num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badSeqNames, lines[process]);
381                                 
382                                 //pass numSeqs to parent
383                                 ofstream out;
384                                 string tempFile = filename + toString(getpid()) + ".num.temp";
385                                 m->openOutputFile(tempFile, out);
386                                 out << num << '\t' << badSeqNames.size() << endl;
387                 for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
388                     out << (*it) << endl;
389                 }
390                                 out.close();
391                                 
392                                 exit(0);
393                         }else { 
394                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
395                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
396                                 exit(0);
397                         }
398                 }
399                 
400         num = driverPcr(filename, goodFileName, badFileName, badSeqNames, lines[0]);
401         
402                 //force parent to wait until all the processes are done
403                 for (int i=0;i<processIDS.size();i++) { 
404                         int temp = processIDS[i];
405                         wait(&temp);
406                 }
407                 
408                 for (int i = 0; i < processIDS.size(); i++) {
409                         ifstream in;
410                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
411                         m->openInputFile(tempFile, in);
412             int numBadNames = 0; string name = "";
413                         if (!in.eof()) { int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in); }
414             for (int j = 0; j < numBadNames; j++) {
415                 in >> name; m->gobble(in);
416                 badSeqNames.insert(name);
417             }
418                         in.close(); m->mothurRemove(tempFile);
419             
420             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
421             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
422             
423             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
424             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
425                 }
426     #else
427         
428         //////////////////////////////////////////////////////////////////////////////////////////////////////
429                 //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
430                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
431                 //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
432                 //////////////////////////////////////////////////////////////////////////////////////////////////////
433                 
434                 vector<pcrData*> pDataArray; 
435                 DWORD   dwThreadIdArray[processors-1];
436                 HANDLE  hThreadArray[processors-1]; 
437                 
438                 //Create processor worker threads.
439                 for( int i=0; i<processors-1; i++ ){
440             
441             string extension = "";
442             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
443             
444                         // Allocate memory for thread data.
445                         pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
446                         pDataArray.push_back(tempPcr);
447                         
448                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
449                         hThreadArray[i] = CreateThread(NULL, 0, MyPcrThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
450                 }
451                 
452         //do your part
453         num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"),badSeqNames, lines[processors-1]);
454         processIDS.push_back(processors-1);
455         
456                 //Wait until all threads have terminated.
457                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
458                 
459                 //Close all thread handles and free memory allocations.
460                 for(int i=0; i < pDataArray.size(); i++){
461                         num += pDataArray[i]->count;
462             for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it);       }
463                         CloseHandle(hThreadArray[i]);
464                         delete pDataArray[i];
465                 }
466         
467         for (int i = 0; i < processIDS.size(); i++) {
468             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
469             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
470             
471             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
472             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
473                 }
474         
475 #endif  
476         
477         return num;
478         
479         }
480         catch(exception& e) {
481                 m->errorOut(e, "PcrSeqsCommand", "createProcesses");
482                 exit(1);
483         }
484 }
485
486 //**********************************************************************************************************************
487 int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, set<string>& badSeqNames, linePair filePos){
488         try {
489                 ofstream goodFile;
490                 m->openOutputFile(goodFasta, goodFile);
491         
492         ofstream badFile;
493                 m->openOutputFile(badFasta, badFile);
494                 
495                 ifstream inFASTA;
496                 m->openInputFile(filename, inFASTA);
497         
498                 inFASTA.seekg(filePos.start);
499         
500                 bool done = false;
501                 int count = 0;
502         set<int> lengths;
503         
504                 while (!done) {
505             
506                         if (m->control_pressed) {  break; }
507                         
508                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
509             
510             string trashCode = "";
511                         if (currSeq.getName() != "") {
512                 
513                 bool goodSeq = true;
514                 if (oligosfile != "") {
515                     map<int, int> mapAligned;
516                     bool aligned = isAligned(currSeq.getAligned(), mapAligned);
517                     
518                     //process primers
519                     if (primers.size() != 0) {
520                         int primerStart = 0; int primerEnd = 0;
521                         bool good = findForward(currSeq, primerStart, primerEnd);
522                         
523                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
524                         else{
525                             //are you aligned
526                             if (aligned) { 
527                                 if (!keepprimer)    {  
528                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
529                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
530                                 } 
531                                 else                {  
532                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
533                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
534                                 }
535                             }else { 
536                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
537                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
538                             }
539                         }
540                     }
541                     
542                     //process reverse primers
543                     if (revPrimer.size() != 0) {
544                         int primerStart = 0; int primerEnd = 0;
545                         bool good = findReverse(currSeq, primerStart, primerEnd);
546                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
547                         else{ 
548                             //are you aligned
549                             if (aligned) { 
550                                 if (!keepprimer)    {  
551                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
552                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
553                                 } 
554                                 else                {  
555                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
556                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
557                                 } 
558                             }
559                             else { 
560                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
561                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
562                             }
563                         }
564                     }
565                 }else if (ecolifile != "") {
566                     //make sure the seqs are aligned
567                     lengths.insert(currSeq.getAligned().length());
568                     if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
569                     else if (currSeq.getAligned().length() != length) {
570                         m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; 
571                     }else {
572                         if (keepdots)   { 
573                             currSeq.filterToPos(start); 
574                             currSeq.filterFromPos(end);
575                         }else {
576                             string seqString = currSeq.getAligned().substr(0, end);
577                             seqString = seqString.substr(start);
578                             currSeq.setAligned(seqString); 
579                         }
580                     }
581                 }else{ //using start and end to trim
582                     //make sure the seqs are aligned
583                     lengths.insert(currSeq.getAligned().length());
584                     if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
585                     else {
586                         if (end != -1) {
587                             if (end > currSeq.getAligned().length()) {  m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
588                             else {
589                                 if (keepdots)   { currSeq.filterFromPos(end); }
590                                 else {
591                                     string seqString = currSeq.getAligned().substr(0, end);
592                                     currSeq.setAligned(seqString); 
593                                 }
594                             }
595                         }
596                         if (start != -1) { 
597                             if (keepdots)   {  currSeq.filterToPos(start);  }
598                             else {
599                                 string seqString = currSeq.getAligned().substr(start);
600                                 currSeq.setAligned(seqString); 
601                             }
602                         }
603                     }
604                 }
605                 
606                 //trimming removed all bases
607                 if (currSeq.getUnaligned() == "") { goodSeq = false; }
608                 
609                                 if(goodSeq == 1)    {   currSeq.printSequence(goodFile);        }
610                                 else {  
611                     badSeqNames.insert(currSeq.getName()); 
612                     currSeq.setName(currSeq.getName() + '|' + trashCode);
613                     currSeq.printSequence(badFile); 
614                 }
615                 count++;
616                         }
617                         
618 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
619             unsigned long long pos = inFASTA.tellg();
620             if ((pos == -1) || (pos >= filePos.end)) { break; }
621 #else
622             if (inFASTA.eof()) { break; }
623 #endif
624                         
625                         //report progress
626                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
627                 }
628                 //report progress
629                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
630                 
631         badFile.close();
632                 goodFile.close();
633                 inFASTA.close();
634                 
635                 return count;
636         }
637         catch(exception& e) {
638                 m->errorOut(e, "PcrSeqsCommand", "driverPcr");
639                 exit(1);
640         }
641 }
642 //********************************************************************/
643 bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
644         try {
645                 
646                 string rawSequence = seq.getUnaligned();
647                 
648                 for(int j=0;j<primers.size();j++){
649                         string oligo = primers[j];
650                         
651                         if(rawSequence.length() < oligo.length()) {  break;  }
652                         
653                         //search for primer
654             int olength = oligo.length();
655             for (int j = 0; j < rawSequence.length()-olength; j++){
656                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
657                 string rawChunk = rawSequence.substr(j, olength);
658                 if(compareDNASeq(oligo, rawChunk)) {
659                     primerStart = j;
660                     primerEnd = primerStart + olength;
661                     return true;
662                 }
663                 
664             }
665         }       
666                 
667         primerStart = 0; primerEnd = 0;
668                 return false;
669                 
670         }
671         catch(exception& e) {
672                 m->errorOut(e, "TrimOligos", "stripForward");
673                 exit(1);
674         }
675 }
676 //******************************************************************/
677 bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
678         try {
679                 
680                 string rawSequence = seq.getUnaligned();
681                 
682                 for(int i=0;i<revPrimer.size();i++){
683                         string oligo = revPrimer[i];
684                         if(rawSequence.length() < oligo.length()) {  break;  }
685                         
686                         //search for primer
687             int olength = oligo.length();
688             for (int j = rawSequence.length()-olength; j >= 0; j--){
689                  if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
690                 string rawChunk = rawSequence.substr(j, olength);
691             
692                 if(compareDNASeq(oligo, rawChunk)) {
693                     primerStart = j;
694                     primerEnd = primerStart + olength;
695                     return true;
696                 }
697                 
698             }
699                 }       
700                 
701         primerStart = 0; primerEnd = 0;
702                 return false;
703         }
704         catch(exception& e) {
705                 m->errorOut(e, "PcrSeqsCommand", "findReverse");
706                 exit(1);
707         }
708 }
709 //********************************************************************/
710 bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
711         try {
712         bool isAligned = false;
713         
714         int countBases = 0;
715         for (int i = 0; i < seq.length(); i++) {
716             if (!isalpha(seq[i])) { isAligned = true; }
717             else { aligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned.
718         }                                                   //ie. the 3rd base may be at spot 10 in the alignment
719                                                             //later when we trim we want to trim from spot 10.
720         return isAligned;
721     }
722         catch(exception& e) {
723                 m->errorOut(e, "PcrSeqsCommand", "isAligned");
724                 exit(1);
725         }
726 }
727 //********************************************************************/
728 string PcrSeqsCommand::reverseOligo(string oligo){
729         try {
730         string reverse = "";
731        
732         for(int i=oligo.length()-1;i>=0;i--){
733             
734             if(oligo[i] == 'A')         {       reverse += 'T'; }
735             else if(oligo[i] == 'T'){   reverse += 'A'; }
736             else if(oligo[i] == 'U'){   reverse += 'A'; }
737             
738             else if(oligo[i] == 'G'){   reverse += 'C'; }
739             else if(oligo[i] == 'C'){   reverse += 'G'; }
740             
741             else if(oligo[i] == 'R'){   reverse += 'Y'; }
742             else if(oligo[i] == 'Y'){   reverse += 'R'; }
743             
744             else if(oligo[i] == 'M'){   reverse += 'K'; }
745             else if(oligo[i] == 'K'){   reverse += 'M'; }
746             
747             else if(oligo[i] == 'W'){   reverse += 'W'; }
748             else if(oligo[i] == 'S'){   reverse += 'S'; }
749             
750             else if(oligo[i] == 'B'){   reverse += 'V'; }
751             else if(oligo[i] == 'V'){   reverse += 'B'; }
752             
753             else if(oligo[i] == 'D'){   reverse += 'H'; }
754             else if(oligo[i] == 'H'){   reverse += 'D'; }
755
756             else                                                {       reverse += 'N'; }
757         }
758
759         
760         return reverse;
761     }
762         catch(exception& e) {
763                 m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
764                 exit(1);
765         }
766 }
767
768 //***************************************************************************************************************
769 bool PcrSeqsCommand::readOligos(){
770         try {
771                 ifstream inOligos;
772                 m->openInputFile(oligosfile, inOligos);
773                 
774                 string type, oligo, group;
775                 
776                 while(!inOligos.eof()){
777             
778                         inOligos >> type; 
779             
780                         if(type[0] == '#'){ //ignore
781                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
782                                 m->gobble(inOligos);
783                         }else{
784                                 m->gobble(inOligos);
785                                 //make type case insensitive
786                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
787                                 
788                                 inOligos >> oligo;
789                                 
790                                 for(int i=0;i<oligo.length();i++){
791                                         oligo[i] = toupper(oligo[i]);
792                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
793                                 }
794                                 
795                                 if(type == "FORWARD"){
796                                         // get rest of line in case there is a primer name
797                                         while (!inOligos.eof()) {       
798                         char c = inOligos.get(); 
799                         if (c == 10 || c == 13){        break;  } 
800                         else if (c == 32 || c == 9){;} //space or tab
801                                         } 
802                                         primers.push_back(oligo);
803                 }else if(type == "REVERSE"){
804                     string oligoRC = reverseOligo(oligo);
805                     revPrimer.push_back(oligoRC);
806                     //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
807                                 }else if(type == "BARCODE"){
808                                         inOligos >> group;
809                                 }else if((type == "LINKER")||(type == "SPACER")) {;}
810                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
811                         }
812                         m->gobble(inOligos);
813                 }       
814                 inOligos.close();
815                 
816                 if ((primers.size() == 0) && (revPrimer.size() == 0)) {
817                         m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers.  Please correct."); m->mothurOutEndLine();
818             m->control_pressed = true;
819                         return false;
820                 }
821         
822         return true;
823         
824     }catch(exception& e) {
825                 m->errorOut(e, "PcrSeqsCommand", "readOligos");
826                 exit(1);
827         }
828 }
829 //***************************************************************************************************************
830 bool PcrSeqsCommand::readEcoli(){
831         try {
832                 ifstream in;
833                 m->openInputFile(ecolifile, in);
834                 
835         //read seq
836         if (!in.eof()){ 
837             Sequence ecoli(in); 
838             length = ecoli.getAligned().length();
839             start = ecoli.getStartPos();
840             end = ecoli.getEndPos();
841         }else { in.close(); m->control_pressed = true; return false; }
842         in.close();    
843                         
844         return true;
845     }
846         catch(exception& e) {
847                 m->errorOut(e, "PcrSeqsCommand", "readEcoli");
848                 exit(1);
849         }
850     
851 }
852 //***************************************************************************************************************
853 int PcrSeqsCommand::writeAccnos(set<string> badNames){
854         try {
855                 string thisOutputDir = outputDir;
856                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
857                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos";
858         outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
859         
860         ofstream out;
861         m->openOutputFile(outputFileName, out);
862         
863         for (set<string>::iterator it = badNames.begin(); it != badNames.end(); it++) {
864             if (m->control_pressed) { break; }
865             out << (*it) << endl;
866         }
867         
868         out.close();
869         return 0;
870     }
871         catch(exception& e) {
872                 m->errorOut(e, "PcrSeqsCommand", "writeAccnos");
873                 exit(1);
874         }
875     
876 }
877 //******************************************************************/
878 bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
879         try {
880                 bool success = 1;
881                 int length = oligo.length();
882                 
883                 for(int i=0;i<length;i++){
884                         
885                         if(oligo[i] != seq[i]){
886                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
887                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
888                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
889                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
890                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
891                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
892                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
893                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
894                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
895                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
896                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
897                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
898                                 
899                                 if(success == 0)        {       break;   }
900                         }
901                         else{
902                                 success = 1;
903                         }
904                 }
905                 
906                 return success;
907         }
908         catch(exception& e) {
909                 m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
910                 exit(1);
911         }
912         
913 }
914 //***************************************************************************************************************
915 int PcrSeqsCommand::readName(set<string>& names){
916         try {
917                 string thisOutputDir = outputDir;
918                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
919                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pcr" + m->getExtension(namefile);
920         
921                 ofstream out;
922                 m->openOutputFile(outputFileName, out);
923         
924                 ifstream in;
925                 m->openInputFile(namefile, in);
926                 string name, firstCol, secondCol;
927                 
928                 bool wroteSomething = false;
929                 int removedCount = 0;
930                 
931                 while(!in.eof()){
932                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
933                         
934                         in >> firstCol;         m->gobble(in);          
935                         in >> secondCol;                        
936                         
937             string savedSecond = secondCol;
938                         vector<string> parsedNames;
939                         m->splitAtComma(secondCol, parsedNames);
940                         
941                         vector<string> validSecond;  validSecond.clear();
942                         for (int i = 0; i < parsedNames.size(); i++) {
943                                 if (names.count(parsedNames[i]) == 0) {
944                                         validSecond.push_back(parsedNames[i]);
945                                 }
946                         }
947                         
948                         if (validSecond.size() != parsedNames.size()) {  //we want to get rid of someone, so get rid of everyone
949                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
950                                 removedCount += parsedNames.size();
951                         }else {
952                 out << firstCol << '\t' << savedSecond << endl;
953                 wroteSomething = true;
954             }
955                         m->gobble(in);
956                 }
957                 in.close();
958                 out.close();
959                 
960                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
961                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
962                 
963                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
964                 
965                 return 0;
966         }
967         catch(exception& e) {
968                 m->errorOut(e, "PcrSeqsCommand", "readName");
969                 exit(1);
970         }
971 }
972 //**********************************************************************************************************************
973 int PcrSeqsCommand::readGroup(set<string> names){
974         try {
975                 string thisOutputDir = outputDir;
976                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
977                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pcr" + m->getExtension(groupfile);
978                 
979                 ofstream out;
980                 m->openOutputFile(outputFileName, out);
981         
982                 ifstream in;
983                 m->openInputFile(groupfile, in);
984                 string name, group;
985                 
986                 bool wroteSomething = false;
987                 int removedCount = 0;
988                 
989                 while(!in.eof()){
990                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
991                         
992                         in >> name;                             //read from first column
993                         in >> group;                    //read from second column
994                         
995                         //if this name is in the accnos file
996                         if (names.count(name) == 0) {
997                                 wroteSomething = true;
998                                 out << name << '\t' << group << endl;
999                         }else {  removedCount++;  }
1000             
1001                         m->gobble(in);
1002                 }
1003                 in.close();
1004                 out.close();
1005                 
1006                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1007                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
1008                 
1009                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
1010         
1011                 
1012                 return 0;
1013         }
1014         catch(exception& e) {
1015                 m->errorOut(e, "PcrSeqsCommand", "readGroup");
1016                 exit(1);
1017         }
1018 }
1019 //**********************************************************************************************************************
1020 int PcrSeqsCommand::readTax(set<string> names){
1021         try {
1022                 string thisOutputDir = outputDir;
1023                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1024                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pcr" + m->getExtension(taxfile);
1025                 ofstream out;
1026                 m->openOutputFile(outputFileName, out);
1027         
1028                 ifstream in;
1029                 m->openInputFile(taxfile, in);
1030                 string name, tax;
1031                 
1032                 bool wroteSomething = false;
1033                 int removedCount = 0;
1034                 
1035                 while(!in.eof()){
1036                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1037                         
1038                         in >> name;                             //read from first column
1039                         in >> tax;                      //read from second column
1040                         
1041                         //if this name is in the accnos file
1042                         if (names.count(name) == 0) {
1043                                 wroteSomething = true;
1044                                 out << name << '\t' << tax << endl;
1045                         }else {  removedCount++;  }
1046             
1047                         m->gobble(in);
1048                 }
1049                 in.close();
1050                 out.close();
1051                 
1052                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1053                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
1054                 
1055                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
1056                 
1057                 return 0;
1058         }
1059         catch(exception& e) {
1060                 m->errorOut(e, "PcrSeqsCommand", "readTax");
1061                 exit(1);
1062         }
1063 }
1064 /**************************************************************************************/
1065
1066