]> git.donarmstrong.com Git - mothur.git/blob - prcseqscommand.cpp
added keep dots to pcr.seqs. fixed pre.cluster name file name when group option is...
[mothur.git] / prcseqscommand.cpp
1 //
2 //  prcseqscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/14/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "pcrseqscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> PcrSeqsCommand::setParameters(){ 
13         try {
14                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
15                 CommandParameter poligos("oligos", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(poligos);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
17         CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
18         CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
19         CommandParameter pecoli("ecoli", "InputTypes", "", "", "ecolioligos", "none", "none",false,false); parameters.push_back(pecoli);
20                 CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
21                 CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
22                 CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "",false,false); parameters.push_back(pnomatch);
23                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
24                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25                 CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepprimer);
26         CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pkeepdots);
27         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29         
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "PcrSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string PcrSeqsCommand::getHelpString(){ 
41         try {
42                 string helpString = "";
43                 helpString += "The pcr.seqs command reads a fasta file ...\n";
44                 
45                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
46                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
47                 return helpString;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "PcrSeqsCommand", "getHelpString");
51                 exit(1);
52         }
53 }
54
55
56 //**********************************************************************************************************************
57
58 PcrSeqsCommand::PcrSeqsCommand(){       
59         try {
60                 abort = true; calledHelp = true; 
61                 setParameters();
62                 vector<string> tempOutNames;
63                 outputTypes["fasta"] = tempOutNames;
64                 outputTypes["taxonomy"] = tempOutNames;
65                 outputTypes["group"] = tempOutNames;
66                 outputTypes["name"] = tempOutNames;
67         outputTypes["accnos"] = tempOutNames;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
71                 exit(1);
72         }
73 }
74 //***************************************************************************************************************
75
76 PcrSeqsCommand::PcrSeqsCommand(string option)  {
77         try {
78                 
79                 abort = false; calledHelp = false;   
80                 
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
84                 
85                 else {
86                         vector<string> myArray = setParameters();
87                         
88                         OptionParser parser(option);
89                         map<string,string> parameters = parser.getParameters();
90                         
91                         ValidParameters validParameter;
92                         map<string,string>::iterator it;
93                         
94                         //check to make sure all parameters are valid for command
95                         for (it = parameters.begin(); it != parameters.end(); it++) { 
96                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
97                         }
98                         
99                         //initialize outputTypes
100                         vector<string> tempOutNames;
101                         outputTypes["fasta"] = tempOutNames;
102                         outputTypes["taxonomy"] = tempOutNames;
103                         outputTypes["group"] = tempOutNames;
104                         outputTypes["name"] = tempOutNames;
105             outputTypes["accnos"] = tempOutNames;
106                         
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         if (inputDir == "not found"){   inputDir = "";          }
110                         else {
111                                 string path;
112                                 it = parameters.find("fasta");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
118                                 }
119                                 
120                                 it = parameters.find("oligos");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
126                                 }
127                 
128                 it = parameters.find("ecoli");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["ecoli"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("taxonomy");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
142                                 }
143                                 
144                                 it = parameters.find("name");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
150                                 }
151                 
152                 it = parameters.find("group");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
158                                 }
159                                 
160                         }
161             
162             //if the user changes the output directory command factory will send this info to us in the output parameter 
163                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
164                         
165                         //check for required parameters
166                         fastafile = validParameter.validFile(parameters, "fasta", true);
167                         if (fastafile == "not found") {                                 
168                                 fastafile = m->getFastaFile(); 
169                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
170                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
171                         }else if (fastafile == "not open") { fastafile = ""; abort = true; }    
172                         else { m->setFastaFile(fastafile); }
173                         
174             
175                         //check for optional parameter and set defaults
176                         // ...at some point should added some additional type checking...
177                         string temp;
178                         temp = validParameter.validFile(parameters, "keepprimer", false);  if (temp == "not found")    {        temp = "f";     }
179                         keepprimer = m->isTrue(temp);   
180             
181             temp = validParameter.validFile(parameters, "keepdots", false);  if (temp == "not found")    {      temp = "t";     }
182                         keepdots = m->isTrue(temp);     
183             
184                         temp = validParameter.validFile(parameters, "oligos", true);
185                         if (temp == "not found"){       oligosfile = "";                }
186                         else if(temp == "not open"){    oligosfile = ""; abort = true;  } 
187                         else                                    {       oligosfile = temp; m->setOligosFile(oligosfile);                }
188                         
189             ecolifile = validParameter.validFile(parameters, "ecoli", true);
190                         if (ecolifile == "not found"){  ecolifile = "";         }
191                         else if(ecolifile == "not open"){       ecolifile = ""; abort = true;   } 
192                         
193             namefile = validParameter.validFile(parameters, "name", true);
194                         if (namefile == "not found"){   namefile = "";          }
195                         else if(namefile == "not open"){        namefile = ""; abort = true;    } 
196             else { m->setNameFile(namefile); }
197             
198             groupfile = validParameter.validFile(parameters, "group", true);
199                         if (groupfile == "not found"){  groupfile = "";         }
200                         else if(groupfile == "not open"){       groupfile = ""; abort = true;   } 
201             else { m->setGroupFile(groupfile); }
202             
203             taxfile = validParameter.validFile(parameters, "taxonomy", true);
204                         if (taxfile == "not found"){    taxfile = "";           }
205                         else if(taxfile == "not open"){ taxfile = ""; abort = true;     } 
206             else { m->setTaxonomyFile(taxfile); }
207                         
208                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
209                         m->mothurConvert(temp, pdiffs);
210                         
211                         temp = validParameter.validFile(parameters, "start", false);    if (temp == "not found") { temp = "-1"; }
212                         m->mothurConvert(temp, start);
213             
214             temp = validParameter.validFile(parameters, "end", false);  if (temp == "not found") { temp = "-1"; }
215                         m->mothurConvert(temp, end);
216                         
217                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
218                         m->setProcessors(temp);
219                         m->mothurConvert(temp, processors); 
220                         
221             nomatch = validParameter.validFile(parameters, "nomatch", false);   if (nomatch == "not found") { nomatch = "reject"; }
222                         
223             if ((nomatch != "reject") && (nomatch != "keep")) { m->mothurOut("[ERROR]: " + nomatch + " is not a valid entry for nomatch. Choices are reject and keep.\n");  abort = true; }
224             
225             //didnt set anything
226                         if ((oligosfile == "") && (ecolifile == "") && (start == -1) && (end == -1)) {
227                 m->mothurOut("[ERROR]: You did not set any options. Please provide an oligos or ecoli file, or set start or end.\n"); abort = true;
228             }
229             
230             if ((oligosfile == "") && (ecolifile == "") && (start < 0) && (end == -1)) { m->mothurOut("[ERROR]: Invalid start value.\n"); abort = true; }
231             
232             if ((ecolifile != "") && (start != -1) && (end != -1)) {
233                 m->mothurOut("[ERROR]: You provided an ecoli file , but set the start or end parameters. Unsure what you intend.  When you provide the ecoli file, mothur thinks you want to use the start and end of the sequence in the ecoli file.\n"); abort = true;
234             }
235
236             
237             if ((oligosfile != "") && (ecolifile != "")) {
238                  m->mothurOut("[ERROR]: You can not use an ecoli file at the same time as an oligos file.\n"); abort = true;
239             }
240                         
241                         //check to make sure you didn't forget the name file by mistake                 
242                         if (namefile == "") {
243                                 vector<string> files; files.push_back(fastafile);
244                                 parser.getNameFile(files);
245                         }
246                 }
247         
248         }
249         catch(exception& e) {
250                 m->errorOut(e, "PcrSeqsCommand", "PcrSeqsCommand");
251                 exit(1);
252         }
253 }
254 //***************************************************************************************************************
255
256 int PcrSeqsCommand::execute(){
257         try{
258         
259                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
260                 
261         int start = time(NULL);
262         
263         string thisOutputDir = outputDir;
264                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
265                 string trimSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.fasta";
266                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
267         
268         string badSeqFile = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pcr.scrap.fasta";
269                 outputNames.push_back(badSeqFile); outputTypes["fasta"].push_back(badSeqFile);
270                 
271         length = 0;
272                 if(oligosfile != ""){    readOligos();     }  if (m->control_pressed) {  return 0; }
273         if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
274         
275         vector<unsigned long long> positions; 
276         int numFastaSeqs = 0;
277 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
278         positions = m->divideFile(fastafile, processors);
279         for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(linePair(positions[i], positions[(i+1)]));      }
280 #else
281         if (processors == 1) {
282             lines.push_back(linePair(0, 1000));
283         }else {
284             positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
285             if (positions.size() < processors) { processors = positions.size(); }
286             
287             //figure out how many sequences you have to process
288             int numSeqsPerProcessor = numFastaSeqs / processors;
289             for (int i = 0; i < processors; i++) {
290                 int startIndex =  i * numSeqsPerProcessor;
291                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
292                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
293             }
294         }
295 #endif
296         if (m->control_pressed) {  return 0; }
297
298         set<string> badNames;
299         if(processors == 1) {    numFastaSeqs = driverPcr(fastafile, trimSeqFile, badSeqFile, badNames, lines[0]);   }
300         else                {    numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames);       }  
301                 
302                 if (m->control_pressed) {  return 0; }          
303         
304         //don't write or keep if blank
305         if (badNames.size() != 0)   { writeAccnos(badNames);        }   
306         if (m->isBlank(badSeqFile)) { m->mothurRemove(badSeqFile);  }
307         
308         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
309         if (namefile != "")                     {               readName(badNames);             }   
310         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
311         if (groupfile != "")            {               readGroup(badNames);    }
312         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
313                 if (taxfile != "")                      {               readTax(badNames);              }
314                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
315         
316         m->mothurOutEndLine();
317                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
318                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
319                 m->mothurOutEndLine();
320                 m->mothurOutEndLine();
321                 
322                 //set fasta file as new current fastafile
323                 string current = "";
324                 itTypes = outputTypes.find("fasta");
325                 if (itTypes != outputTypes.end()) {
326                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
327                 }
328                 
329                 itTypes = outputTypes.find("name");
330                 if (itTypes != outputTypes.end()) {
331                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
332                 }
333                 
334                 itTypes = outputTypes.find("group");
335                 if (itTypes != outputTypes.end()) {
336                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
337                 }
338                 
339                 itTypes = outputTypes.find("accnos");
340                 if (itTypes != outputTypes.end()) {
341                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
342                 }
343                 
344                 itTypes = outputTypes.find("taxonomy");
345                 if (itTypes != outputTypes.end()) {
346                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
347                 }
348         
349                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
350                 m->mothurOutEndLine();
351
352                 
353                 return 0;       
354         
355         }
356         catch(exception& e) {
357                 m->errorOut(e, "PcrSeqsCommand", "execute");
358                 exit(1);
359         }
360 }
361 /**************************************************************************************************/
362 int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string badFileName, set<string>& badSeqNames) {
363         try {
364         
365         vector<int> processIDS;   
366         int process = 1;
367                 int num = 0;
368         
369 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
370         
371                 //loop through and create all the processes you want
372                 while (process != processors) {
373                         int pid = fork();
374                         
375                         if (pid > 0) {
376                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
377                                 process++;
378                         }else if (pid == 0){
379                                 num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badSeqNames, lines[process]);
380                                 
381                                 //pass numSeqs to parent
382                                 ofstream out;
383                                 string tempFile = filename + toString(getpid()) + ".num.temp";
384                                 m->openOutputFile(tempFile, out);
385                                 out << num << '\t' << badSeqNames.size() << endl;
386                 for (set<string>::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
387                     out << (*it) << endl;
388                 }
389                                 out.close();
390                                 
391                                 exit(0);
392                         }else { 
393                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
394                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
395                                 exit(0);
396                         }
397                 }
398                 
399         num = driverPcr(filename, goodFileName, badFileName, badSeqNames, lines[0]);
400         
401                 //force parent to wait until all the processes are done
402                 for (int i=0;i<processIDS.size();i++) { 
403                         int temp = processIDS[i];
404                         wait(&temp);
405                 }
406                 
407                 for (int i = 0; i < processIDS.size(); i++) {
408                         ifstream in;
409                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
410                         m->openInputFile(tempFile, in);
411             int numBadNames = 0; string name = "";
412                         if (!in.eof()) { int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in); }
413             for (int j = 0; j < numBadNames; j++) {
414                 in >> name; m->gobble(in);
415                 badSeqNames.insert(name);
416             }
417                         in.close(); m->mothurRemove(tempFile);
418             
419             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
420             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
421             
422             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
423             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
424                 }
425     #else
426         
427         //////////////////////////////////////////////////////////////////////////////////////////////////////
428                 //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
429                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
430                 //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
431                 //////////////////////////////////////////////////////////////////////////////////////////////////////
432                 
433                 vector<pcrData*> pDataArray; 
434                 DWORD   dwThreadIdArray[processors-1];
435                 HANDLE  hThreadArray[processors-1]; 
436                 
437                 //Create processor worker threads.
438                 for( int i=0; i<processors-1; i++ ){
439             
440             string extension = "";
441             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
442             
443                         // Allocate memory for thread data.
444                         pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, lines[i].start, lines[i].end);
445                         pDataArray.push_back(tempPcr);
446                         
447                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
448                         hThreadArray[i] = CreateThread(NULL, 0, MyPcrThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
449                 }
450                 
451         //do your part
452         num = driverPcr(filename, (goodFileName+toString(processors-1)+".temp"), (badFileName+toString(processors-1)+".temp"),badSeqNames, lines[processors-1]);
453         processIDS.push_back(processors-1);
454         
455                 //Wait until all threads have terminated.
456                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
457                 
458                 //Close all thread handles and free memory allocations.
459                 for(int i=0; i < pDataArray.size(); i++){
460                         num += pDataArray[i]->count;
461             for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it);       }
462                         CloseHandle(hThreadArray[i]);
463                         delete pDataArray[i];
464                 }
465         
466         for (int i = 0; i < processIDS.size(); i++) {
467             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
468             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
469             
470             m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName);
471             m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp"));
472                 }
473         
474 #endif  
475         
476         return num;
477         
478         }
479         catch(exception& e) {
480                 m->errorOut(e, "PcrSeqsCommand", "createProcesses");
481                 exit(1);
482         }
483 }
484
485 //**********************************************************************************************************************
486 int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, set<string>& badSeqNames, linePair filePos){
487         try {
488                 ofstream goodFile;
489                 m->openOutputFile(goodFasta, goodFile);
490         
491         ofstream badFile;
492                 m->openOutputFile(badFasta, badFile);
493                 
494                 ifstream inFASTA;
495                 m->openInputFile(filename, inFASTA);
496         
497                 inFASTA.seekg(filePos.start);
498         
499                 bool done = false;
500                 int count = 0;
501         set<int> lengths;
502         
503                 while (!done) {
504             
505                         if (m->control_pressed) {  break; }
506                         
507                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
508             
509             string trashCode = "";
510                         if (currSeq.getName() != "") {
511                 
512                 bool goodSeq = true;
513                 if (oligosfile != "") {
514                     map<int, int> mapAligned;
515                     bool aligned = isAligned(currSeq.getAligned(), mapAligned);
516                     
517                     //process primers
518                     if (primers.size() != 0) {
519                         int primerStart = 0; int primerEnd = 0;
520                         bool good = findForward(currSeq, primerStart, primerEnd);
521                         
522                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "f"; }
523                         else{
524                             //are you aligned
525                             if (aligned) { 
526                                 if (!keepprimer)    {  
527                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
528                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
529                                 } 
530                                 else                {  
531                                     if (keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
532                                     else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
533                                 }
534                             }else { 
535                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
536                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
537                             }
538                         }
539                     }
540                     
541                     //process reverse primers
542                     if (revPrimer.size() != 0) {
543                         int primerStart = 0; int primerEnd = 0;
544                         bool good = findReverse(currSeq, primerStart, primerEnd);
545                         if(!good){      if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
546                         else{ 
547                             //are you aligned
548                             if (aligned) { 
549                                 if (!keepprimer)    {  
550                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
551                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
552                                 } 
553                                 else                {  
554                                     if (keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
555                                     else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
556                                 } 
557                             }
558                             else { 
559                                 if (!keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
560                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
561                             }
562                         }
563                     }
564                 }else if (ecolifile != "") {
565                     //make sure the seqs are aligned
566                     lengths.insert(currSeq.getAligned().length());
567                     if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
568                     else if (currSeq.getAligned().length() != length) {
569                         m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; 
570                     }else {
571                         if (keepdots)   { 
572                             currSeq.filterToPos(start); 
573                             currSeq.filterFromPos(end);
574                         }else {
575                             string seqString = currSeq.getAligned().substr(0, end);
576                             seqString = seqString.substr(start);
577                             currSeq.setAligned(seqString); 
578                         }
579                     }
580                 }else{ //using start and end to trim
581                     //make sure the seqs are aligned
582                     lengths.insert(currSeq.getAligned().length());
583                     if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; }
584                     else {
585                         if (end != -1) {
586                             if (end > currSeq.getAligned().length()) {  m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; }
587                             else {
588                                 if (keepdots)   { currSeq.filterFromPos(end); }
589                                 else {
590                                     string seqString = currSeq.getAligned().substr(0, end);
591                                     currSeq.setAligned(seqString); 
592                                 }
593                             }
594                         }
595                         if (start != -1) { 
596                             if (keepdots)   {  currSeq.filterToPos(start);  }
597                             else {
598                                 string seqString = currSeq.getAligned().substr(start);
599                                 currSeq.setAligned(seqString); 
600                             }
601                         }
602                     }
603                 }
604                                     
605                                 if(goodSeq == 1)    {   currSeq.printSequence(goodFile);        }
606                                 else {  
607                     badSeqNames.insert(currSeq.getName()); 
608                     currSeq.setName(currSeq.getName() + '|' + trashCode);
609                     currSeq.printSequence(badFile); 
610                 }
611                 count++;
612                         }
613                         
614 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
615             unsigned long long pos = inFASTA.tellg();
616             if ((pos == -1) || (pos >= filePos.end)) { break; }
617 #else
618             if (inFASTA.eof()) { break; }
619 #endif
620                         
621                         //report progress
622                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
623                 }
624                 //report progress
625                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
626                 
627         badFile.close();
628                 goodFile.close();
629                 inFASTA.close();
630                 
631                 return count;
632         }
633         catch(exception& e) {
634                 m->errorOut(e, "PcrSeqsCommand", "driverPcr");
635                 exit(1);
636         }
637 }
638 //********************************************************************/
639 bool PcrSeqsCommand::findForward(Sequence& seq, int& primerStart, int& primerEnd){
640         try {
641                 
642                 string rawSequence = seq.getUnaligned();
643                 
644                 for(int j=0;j<primers.size();j++){
645                         string oligo = primers[j];
646                         
647                         if(rawSequence.length() < oligo.length()) {  break;  }
648                         
649                         //search for primer
650             int olength = oligo.length();
651             for (int j = 0; j < rawSequence.length()-olength; j++){
652                 if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
653                 string rawChunk = rawSequence.substr(j, olength);
654                 if(compareDNASeq(oligo, rawChunk)) {
655                     primerStart = j;
656                     primerEnd = primerStart + olength;
657                     return true;
658                 }
659                 
660             }
661         }       
662                 
663         primerStart = 0; primerEnd = 0;
664                 return false;
665                 
666         }
667         catch(exception& e) {
668                 m->errorOut(e, "TrimOligos", "stripForward");
669                 exit(1);
670         }
671 }
672 //******************************************************************/
673 bool PcrSeqsCommand::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
674         try {
675                 
676                 string rawSequence = seq.getUnaligned();
677                 
678                 for(int i=0;i<revPrimer.size();i++){
679                         string oligo = revPrimer[i];
680                         if(rawSequence.length() < oligo.length()) {  break;  }
681                         
682                         //search for primer
683             int olength = oligo.length();
684             for (int j = rawSequence.length()-olength; j >= 0; j--){
685                  if (m->control_pressed) {  primerStart = 0; primerEnd = 0; return false; }
686                 string rawChunk = rawSequence.substr(j, olength);
687             
688                 if(compareDNASeq(oligo, rawChunk)) {
689                     primerStart = j;
690                     primerEnd = primerStart + olength;
691                     return true;
692                 }
693                 
694             }
695                 }       
696                 
697         primerStart = 0; primerEnd = 0;
698                 return false;
699         }
700         catch(exception& e) {
701                 m->errorOut(e, "PcrSeqsCommand", "findReverse");
702                 exit(1);
703         }
704 }
705 //********************************************************************/
706 bool PcrSeqsCommand::isAligned(string seq, map<int, int>& aligned){
707         try {
708         bool isAligned = false;
709         
710         int countBases = 0;
711         for (int i = 0; i < seq.length(); i++) {
712             if (!isalpha(seq[i])) { isAligned = true; }
713             else { aligned[countBases] = i; countBases++; } //maps location in unaligned -> location in aligned.
714         }                                                   //ie. the 3rd base may be at spot 10 in the alignment
715                                                             //later when we trim we want to trim from spot 10.
716         return isAligned;
717     }
718         catch(exception& e) {
719                 m->errorOut(e, "PcrSeqsCommand", "isAligned");
720                 exit(1);
721         }
722 }
723 //********************************************************************/
724 string PcrSeqsCommand::reverseOligo(string oligo){
725         try {
726         string reverse = "";
727        
728         for(int i=oligo.length()-1;i>=0;i--){
729             
730             if(oligo[i] == 'A')         {       reverse += 'T'; }
731             else if(oligo[i] == 'T'){   reverse += 'A'; }
732             else if(oligo[i] == 'U'){   reverse += 'A'; }
733             
734             else if(oligo[i] == 'G'){   reverse += 'C'; }
735             else if(oligo[i] == 'C'){   reverse += 'G'; }
736             
737             else if(oligo[i] == 'R'){   reverse += 'Y'; }
738             else if(oligo[i] == 'Y'){   reverse += 'R'; }
739             
740             else if(oligo[i] == 'M'){   reverse += 'K'; }
741             else if(oligo[i] == 'K'){   reverse += 'M'; }
742             
743             else if(oligo[i] == 'W'){   reverse += 'W'; }
744             else if(oligo[i] == 'S'){   reverse += 'S'; }
745             
746             else if(oligo[i] == 'B'){   reverse += 'V'; }
747             else if(oligo[i] == 'V'){   reverse += 'B'; }
748             
749             else if(oligo[i] == 'D'){   reverse += 'H'; }
750             else if(oligo[i] == 'H'){   reverse += 'D'; }
751
752             else                                                {       reverse += 'N'; }
753         }
754
755         
756         return reverse;
757     }
758         catch(exception& e) {
759                 m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
760                 exit(1);
761         }
762 }
763
764 //***************************************************************************************************************
765 bool PcrSeqsCommand::readOligos(){
766         try {
767                 ifstream inOligos;
768                 m->openInputFile(oligosfile, inOligos);
769                 
770                 string type, oligo, group;
771                 
772                 while(!inOligos.eof()){
773             
774                         inOligos >> type; 
775             
776                         if(type[0] == '#'){ //ignore
777                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
778                                 m->gobble(inOligos);
779                         }else{
780                                 m->gobble(inOligos);
781                                 //make type case insensitive
782                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
783                                 
784                                 inOligos >> oligo;
785                                 
786                                 for(int i=0;i<oligo.length();i++){
787                                         oligo[i] = toupper(oligo[i]);
788                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
789                                 }
790                                 
791                                 if(type == "FORWARD"){
792                                         // get rest of line in case there is a primer name
793                                         while (!inOligos.eof()) {       
794                         char c = inOligos.get(); 
795                         if (c == 10 || c == 13){        break;  } 
796                         else if (c == 32 || c == 9){;} //space or tab
797                                         } 
798                                         primers.push_back(oligo);
799                 }else if(type == "REVERSE"){
800                     string oligoRC = reverseOligo(oligo);
801                     revPrimer.push_back(oligoRC);
802                     //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
803                                 }else if(type == "BARCODE"){
804                                         inOligos >> group;
805                                 }else if((type == "LINKER")||(type == "SPACER")) {;}
806                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
807                         }
808                         m->gobble(inOligos);
809                 }       
810                 inOligos.close();
811                 
812                 if ((primers.size() == 0) && (revPrimer.size() == 0)) {
813                         m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers.  Please correct."); m->mothurOutEndLine();
814             m->control_pressed = true;
815                         return false;
816                 }
817         
818         return true;
819         
820     }catch(exception& e) {
821                 m->errorOut(e, "PcrSeqsCommand", "readOligos");
822                 exit(1);
823         }
824 }
825 //***************************************************************************************************************
826 bool PcrSeqsCommand::readEcoli(){
827         try {
828                 ifstream in;
829                 m->openInputFile(ecolifile, in);
830                 
831         //read seq
832         if (!in.eof()){ 
833             Sequence ecoli(in); 
834             length = ecoli.getAligned().length();
835             start = ecoli.getStartPos();
836             end = ecoli.getEndPos();
837         }else { in.close(); m->control_pressed = true; return false; }
838         in.close();    
839                         
840         return true;
841     }
842         catch(exception& e) {
843                 m->errorOut(e, "PcrSeqsCommand", "readEcoli");
844                 exit(1);
845         }
846     
847 }
848 //***************************************************************************************************************
849 int PcrSeqsCommand::writeAccnos(set<string> badNames){
850         try {
851                 string thisOutputDir = outputDir;
852                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
853                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos";
854         outputNames.push_back(outputFileName); outputTypes["accnos"].push_back(outputFileName);
855         
856         ofstream out;
857         m->openOutputFile(outputFileName, out);
858         
859         for (set<string>::iterator it = badNames.begin(); it != badNames.end(); it++) {
860             if (m->control_pressed) { break; }
861             out << (*it) << endl;
862         }
863         
864         out.close();
865         return 0;
866     }
867         catch(exception& e) {
868                 m->errorOut(e, "PcrSeqsCommand", "writeAccnos");
869                 exit(1);
870         }
871     
872 }
873 //******************************************************************/
874 bool PcrSeqsCommand::compareDNASeq(string oligo, string seq){
875         try {
876                 bool success = 1;
877                 int length = oligo.length();
878                 
879                 for(int i=0;i<length;i++){
880                         
881                         if(oligo[i] != seq[i]){
882                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
883                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
884                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
885                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
886                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
887                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
888                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
889                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
890                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
891                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
892                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
893                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
894                                 
895                                 if(success == 0)        {       break;   }
896                         }
897                         else{
898                                 success = 1;
899                         }
900                 }
901                 
902                 return success;
903         }
904         catch(exception& e) {
905                 m->errorOut(e, "PcrSeqsCommand", "compareDNASeq");
906                 exit(1);
907         }
908         
909 }
910 //***************************************************************************************************************
911 int PcrSeqsCommand::readName(set<string>& names){
912         try {
913                 string thisOutputDir = outputDir;
914                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
915                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pcr" + m->getExtension(namefile);
916         
917                 ofstream out;
918                 m->openOutputFile(outputFileName, out);
919         
920                 ifstream in;
921                 m->openInputFile(namefile, in);
922                 string name, firstCol, secondCol;
923                 
924                 bool wroteSomething = false;
925                 int removedCount = 0;
926                 
927                 while(!in.eof()){
928                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
929                         
930                         in >> firstCol;         m->gobble(in);          
931                         in >> secondCol;                        
932                         
933             string savedSecond = secondCol;
934                         vector<string> parsedNames;
935                         m->splitAtComma(secondCol, parsedNames);
936                         
937                         vector<string> validSecond;  validSecond.clear();
938                         for (int i = 0; i < parsedNames.size(); i++) {
939                                 if (names.count(parsedNames[i]) == 0) {
940                                         validSecond.push_back(parsedNames[i]);
941                                 }
942                         }
943                         
944                         if (validSecond.size() != parsedNames.size()) {  //we want to get rid of someone, so get rid of everyone
945                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
946                                 removedCount += parsedNames.size();
947                         }else {
948                 out << firstCol << '\t' << savedSecond << endl;
949                 wroteSomething = true;
950             }
951                         m->gobble(in);
952                 }
953                 in.close();
954                 out.close();
955                 
956                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
957                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
958                 
959                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
960                 
961                 return 0;
962         }
963         catch(exception& e) {
964                 m->errorOut(e, "PcrSeqsCommand", "readName");
965                 exit(1);
966         }
967 }
968 //**********************************************************************************************************************
969 int PcrSeqsCommand::readGroup(set<string> names){
970         try {
971                 string thisOutputDir = outputDir;
972                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
973                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pcr" + m->getExtension(groupfile);
974                 
975                 ofstream out;
976                 m->openOutputFile(outputFileName, out);
977         
978                 ifstream in;
979                 m->openInputFile(groupfile, in);
980                 string name, group;
981                 
982                 bool wroteSomething = false;
983                 int removedCount = 0;
984                 
985                 while(!in.eof()){
986                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
987                         
988                         in >> name;                             //read from first column
989                         in >> group;                    //read from second column
990                         
991                         //if this name is in the accnos file
992                         if (names.count(name) == 0) {
993                                 wroteSomething = true;
994                                 out << name << '\t' << group << endl;
995                         }else {  removedCount++;  }
996             
997                         m->gobble(in);
998                 }
999                 in.close();
1000                 out.close();
1001                 
1002                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1003                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
1004                 
1005                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
1006         
1007                 
1008                 return 0;
1009         }
1010         catch(exception& e) {
1011                 m->errorOut(e, "PcrSeqsCommand", "readGroup");
1012                 exit(1);
1013         }
1014 }
1015 //**********************************************************************************************************************
1016 int PcrSeqsCommand::readTax(set<string> names){
1017         try {
1018                 string thisOutputDir = outputDir;
1019                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1020                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pcr" + m->getExtension(taxfile);
1021                 ofstream out;
1022                 m->openOutputFile(outputFileName, out);
1023         
1024                 ifstream in;
1025                 m->openInputFile(taxfile, in);
1026                 string name, tax;
1027                 
1028                 bool wroteSomething = false;
1029                 int removedCount = 0;
1030                 
1031                 while(!in.eof()){
1032                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1033                         
1034                         in >> name;                             //read from first column
1035                         in >> tax;                      //read from second column
1036                         
1037                         //if this name is in the accnos file
1038                         if (names.count(name) == 0) {
1039                                 wroteSomething = true;
1040                                 out << name << '\t' << tax << endl;
1041                         }else {  removedCount++;  }
1042             
1043                         m->gobble(in);
1044                 }
1045                 in.close();
1046                 out.close();
1047                 
1048                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1049                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
1050                 
1051                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
1052                 
1053                 return 0;
1054         }
1055         catch(exception& e) {
1056                 m->errorOut(e, "PcrSeqsCommand", "readTax");
1057                 exit(1);
1058         }
1059 }
1060 /**************************************************************************************/
1061
1062