]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
bug fixes
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //***************************************************************************************************************
14
15 TrimSeqsCommand::TrimSeqsCommand(string option)  {
16         try {
17                 
18                 abort = false;
19                 comboStarts = 0;
20                 
21                 //allow user to run help
22                 if(option == "help") { help(); abort = true; }
23                 
24                 else {
25                         //valid paramters for this command
26                         string AlignArray[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
27                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
28                         
29                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
30                         
31                         OptionParser parser(option);
32                         map<string,string> parameters = parser.getParameters();
33                         
34                         ValidParameters validParameter;
35                         map<string,string>::iterator it;
36                         
37                         //check to make sure all parameters are valid for command
38                         for (it = parameters.begin(); it != parameters.end(); it++) { 
39                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
40                         }
41                         
42                         //if the user changes the input directory command factory will send this info to us in the output parameter 
43                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
44                         if (inputDir == "not found"){   inputDir = "";          }
45                         else {
46                                 string path;
47                                 it = parameters.find("fasta");
48                                 //user has given a template file
49                                 if(it != parameters.end()){ 
50                                         path = m->hasPath(it->second);
51                                         //if the user has not given a path then, add inputdir. else leave path alone.
52                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
53                                 }
54                                 
55                                 it = parameters.find("oligos");
56                                 //user has given a template file
57                                 if(it != parameters.end()){ 
58                                         path = m->hasPath(it->second);
59                                         //if the user has not given a path then, add inputdir. else leave path alone.
60                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
61                                 }
62                                 
63                                 it = parameters.find("qfile");
64                                 //user has given a template file
65                                 if(it != parameters.end()){ 
66                                         path = m->hasPath(it->second);
67                                         //if the user has not given a path then, add inputdir. else leave path alone.
68                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
69                                 }
70                         }
71
72                         
73                         //check for required parameters
74                         fastaFile = validParameter.validFile(parameters, "fasta", true);
75                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the screen.seqs command."); m->mothurOutEndLine(); abort = true; }
76                         else if (fastaFile == "not open") { abort = true; }     
77                         
78                         //if the user changes the output directory command factory will send this info to us in the output parameter 
79                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
80                                 outputDir = ""; 
81                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
82                         }
83                 
84                         
85                         //check for optional parameter and set defaults
86                         // ...at some point should added some additional type checking...
87                         string temp;
88                         temp = validParameter.validFile(parameters, "flip", false);
89                         if (temp == "not found"){       flip = 0;       }
90                         else if(m->isTrue(temp))        {       flip = 1;       }
91                 
92                         temp = validParameter.validFile(parameters, "oligos", true);
93                         if (temp == "not found"){       oligoFile = "";         }
94                         else if(temp == "not open"){    abort = true;   } 
95                         else                                    {       oligoFile = temp;               }
96                         
97                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
98                         convert(temp, maxAmbig);  
99
100                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
101                         convert(temp, maxHomoP);  
102
103                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
104                         convert(temp, minLength); 
105                         
106                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
107                         convert(temp, maxLength);
108                         
109                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
110                         convert(temp, bdiffs);
111                         
112                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
113                         convert(temp, pdiffs);
114                         
115                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
116                         convert(temp, tdiffs);
117                         
118                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
119                         
120                         temp = validParameter.validFile(parameters, "qfile", true);     
121                         if (temp == "not found")        {       qFileName = "";         }
122                         else if(temp == "not open")     {       abort = true;           }
123                         else                                            {       qFileName = temp;       }
124                         
125                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
126                         convert(temp, qThreshold);
127                         
128                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "F"; }
129                         qtrim = m->isTrue(temp);
130
131                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
132                         convert(temp, qRollAverage);
133
134                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
135                         convert(temp, qWindowAverage);
136
137                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "100"; }
138                         convert(temp, qWindowSize);
139
140                         temp = validParameter.validFile(parameters, "qstepsize", false);                if (temp == "not found") { temp = "10"; }
141                         convert(temp, qWindowStep);
142
143                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
144                         convert(temp, qAverage);
145                         
146                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
147                         allFiles = m->isTrue(temp);
148                         
149                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
150                         convert(temp, processors); 
151                         
152                         if(allFiles && oligoFile == ""){
153                                 m->mothurOut("You selected allfiles, but didn't enter an oligos file.  Ignoring the allfiles request."); m->mothurOutEndLine();
154                         }
155                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
156                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
157                                 qAverage=0;
158                                 qThreshold=0;
159                         }
160                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
161                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
162                                 abort = true;
163                         }
164                 }
165
166         }
167         catch(exception& e) {
168                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
169                 exit(1);
170         }
171 }
172 //**********************************************************************************************************************
173
174 void TrimSeqsCommand::help(){
175         try {
176                 m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
177                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
178                 m->mothurOut("The fasta parameter is required.\n");
179                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
180                 m->mothurOut("The oligos parameter .... The default is "".\n");
181                 m->mothurOut("The maxambig parameter .... The default is -1.\n");
182                 m->mothurOut("The maxhomop parameter .... The default is 0.\n");
183                 m->mothurOut("The minlength parameter .... The default is 0.\n");
184                 m->mothurOut("The maxlength parameter .... The default is 0.\n");
185                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
186                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
187                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
188                 m->mothurOut("The qfile parameter .....\n");
189                 m->mothurOut("The qthreshold parameter .... The default is 0.\n");
190                 m->mothurOut("The qaverage parameter .... The default is 0.\n");
191                 m->mothurOut("The allfiles parameter .... The default is F.\n");
192                 m->mothurOut("The qtrim parameter .... The default is F.\n");
193                 m->mothurOut("The trim.seqs command should be in the following format: \n");
194                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
195                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
196                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
197                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
198                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
199
200         }
201         catch(exception& e) {
202                 m->errorOut(e, "TrimSeqsCommand", "help");
203                 exit(1);
204         }
205 }
206
207
208 //***************************************************************************************************************
209
210 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
211
212 //***************************************************************************************************************
213
214 int TrimSeqsCommand::execute(){
215         try{
216         
217                 if (abort == true) { return 0; }
218                 
219                 numFPrimers = 0;  //this needs to be initialized
220                 numRPrimers = 0;
221                 
222                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
223                 outputNames.push_back(trimSeqFile);
224                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
225                 outputNames.push_back(scrapSeqFile);
226                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
227                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
228                 if (qFileName != "") {  outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile);  }
229                 string groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
230                 
231                 vector<string> fastaFileNames;
232                 vector<string> qualFileNames;
233                 if(oligoFile != ""){
234                         outputNames.push_back(groupFile);
235                         getOligos(fastaFileNames, qualFileNames);
236                 }
237
238                 vector<unsigned long int> fastaFilePos;
239                 vector<unsigned long int> qFilePos;
240                 
241                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
242                 
243                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
244                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
245                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
246                 }       
247                 if(qFileName == "")     {       qLines = lines; } //files with duds
248                 
249                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
250                                 if(processors == 1){
251                                                                                 
252                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
253                                         
254                                         for (int j = 0; j < fastaFileNames.size(); j++) {
255                                                 rename((fastaFileNames[j] + toString(getpid()) + ".temp").c_str(), fastaFileNames[j].c_str());
256                                         }
257                                         if(qFileName != ""){
258                                                 for (int j = 0; j < qualFileNames.size(); j++) {
259                                                         rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str());
260                                                 }
261                                         }                                               
262
263                                 }else{
264                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); 
265                                         
266                                         rename((trimSeqFile + toString(processIDS[0]) + ".temp").c_str(), trimSeqFile.c_str());
267                                         rename((scrapSeqFile + toString(processIDS[0]) + ".temp").c_str(), scrapSeqFile.c_str());
268                                         rename((groupFile + toString(processIDS[0]) + ".temp").c_str(), groupFile.c_str());
269                                         
270                                         if(qFileName != ""){
271                                                 rename((trimQualFile + toString(processIDS[0]) + ".temp").c_str(), trimQualFile.c_str());
272                                                 rename((scrapQualFile + toString(processIDS[0]) + ".temp").c_str(), scrapQualFile.c_str());
273                                         }
274                                         
275                                         
276                                         for (int j = 0; j < fastaFileNames.size(); j++) {
277                                                 rename((fastaFileNames[j] + toString(processIDS[0]) + ".temp").c_str(), fastaFileNames[j].c_str());
278                                         }
279                                         if(qFileName != ""){
280                                                 for (int j = 0; j < qualFileNames.size(); j++) {
281                                                         rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str());
282                                                 }
283                                         }                                               
284                                         
285                                         //append files
286                                         for(int i=1;i<processors;i++){
287                                                 m->appendFiles((trimSeqFile + toString(processIDS[i]) + ".temp"), trimSeqFile);
288                                                 remove((trimSeqFile + toString(processIDS[i]) + ".temp").c_str());
289                                                 m->appendFiles((scrapSeqFile + toString(processIDS[i]) + ".temp"), scrapSeqFile);
290                                                 remove((scrapSeqFile + toString(processIDS[i]) + ".temp").c_str());
291
292                                                 m->appendFiles((trimQualFile + toString(processIDS[i]) + ".temp"), trimQualFile);
293                                                 remove((trimQualFile + toString(processIDS[i]) + ".temp").c_str());
294                                                 m->appendFiles((scrapQualFile + toString(processIDS[i]) + ".temp"), scrapQualFile);
295                                                 remove((scrapQualFile + toString(processIDS[i]) + ".temp").c_str());
296                                                 
297                                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
298                                                 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
299                                                 for (int j = 0; j < fastaFileNames.size(); j++) {
300                                                         m->appendFiles((fastaFileNames[j] + toString(processIDS[i]) + ".temp"), fastaFileNames[j]);
301                                                         remove((fastaFileNames[j] + toString(processIDS[i]) + ".temp").c_str());
302                                                 }
303                                                 
304                                                 if(qFileName != ""){
305                                                         for (int j = 0; j < qualFileNames.size(); j++) {
306                                                                 m->appendFiles((qualFileNames[j] + toString(processIDS[i]) + ".temp"), qualFileNames[j]);
307                                                                 remove((qualFileNames[j] + toString(processIDS[i]) + ".temp").c_str());
308                                                         }
309                                                 }                                               
310                                                 
311                                                 
312                                         }
313                                 }
314                                 
315                                 if (m->control_pressed) {  return 0; }
316                 #else
317                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
318                                 
319                                 for (int j = 0; j < fastaFileNames.size(); j++) {
320                                         rename((fastaFileNames[j] + toString(j) + ".temp").c_str(), fastaFileNames[j].c_str());
321                                 }
322                                 if(qFileName != ""){
323                                         for (int j = 0; j < qualFileNames.size(); j++) {
324                                                 rename((qualFileNames[j] + toString(j) + ".temp").c_str(), qualFileNames[j].c_str());
325                                         }
326                                 }
327                                                                         
328                                 if (m->control_pressed) {  return 0; }
329                 #endif
330                                                 
331                                                                                 
332                 for(int i=0;i<fastaFileNames.size();i++){
333                         if (m->isBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); }
334                         else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); }
335                         else {
336                                 ifstream inFASTA;
337                                 string seqName;
338                                 m->openInputFile(fastaFileNames[i], inFASTA);
339                                 ofstream outGroups;
340                                 string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups";
341                                 m->openOutputFile(outGroupFilename, outGroups);
342                                 outputNames.push_back(outGroupFilename);
343                                 
344                                 string thisGroup = "";
345                                 if (i > comboStarts) {
346                                         map<string, int>::iterator itCombo;
347                                         for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
348                                                 if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
349                                         }
350                                 }else{ thisGroup = groupVector[i]; }
351                                 
352                                 while(!inFASTA.eof()){
353                                         if(inFASTA.get() == '>'){
354                                                 inFASTA >> seqName;
355                                                 outGroups << seqName << '\t' << thisGroup << endl;
356                                         }
357                                         while (!inFASTA.eof())  {       char c = inFASTA.get(); if (c == 10 || c == 13){        break;  }       }
358                                 }
359                                 outGroups.close();
360                                 inFASTA.close();
361                         }
362                 }
363                 
364                 if(qFileName != ""){
365                         for(int i=0;i<qualFileNames.size();i++){
366                                 if (m->isBlank(qualFileNames[i])) { remove(qualFileNames[i].c_str());   }
367                                 else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); }
368                                 else {
369                                         ifstream inQual;
370                                         string seqName;
371                                         m->openInputFile(qualFileNames[i], inQual);
372 //                                      ofstream outGroups;
373 //                                      
374 //                                      string thisGroup = "";
375 //                                      if (i > comboStarts) {
376 //                                              map<string, int>::iterator itCombo;
377 //                                              for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
378 //                                                      if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
379 //                                              }
380 //                                      }
381 //                                      else{ thisGroup = groupVector[i]; }
382                                         
383                                         inQual.close();
384                                 }
385                         }
386                 }
387                 
388                 
389                 if (m->control_pressed) { 
390                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
391                         return 0;
392                 }
393
394                 m->mothurOutEndLine();
395                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
396                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
397                 m->mothurOutEndLine();
398                 
399                 return 0;       
400                         
401         }
402         catch(exception& e) {
403                 m->errorOut(e, "TrimSeqsCommand", "execute");
404                 exit(1);
405         }
406 }
407                 
408 /**************************************************************************************/
409
410 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames, linePair* line, linePair* qline) {      
411                 
412         try {
413                 
414                 ofstream outFASTA;
415                 int able = m->openOutputFile(trimFile, outFASTA);
416                 
417                 ofstream scrapFASTA;
418                 m->openOutputFile(scrapFile, scrapFASTA);
419                 
420                 ofstream outQual;
421                 ofstream scrapQual;
422                 if(qFileName != ""){
423                         m->openOutputFile(trimQFile, outQual);
424                         m->openOutputFile(scrapQFile, scrapQual);
425                 }
426                 
427                 ofstream outGroups;
428                 vector<ofstream*> fastaFileNames;
429                 vector<ofstream*> qualFileNames;
430                 
431                 if (oligoFile != "") {          
432                         m->openOutputFile(groupFile, outGroups);   
433                         for (int i = 0; i < fastaNames.size(); i++) {
434
435                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
436                                 fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
437                                 if(qFileName != ""){
438                                         qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
439                                 }
440                         #else
441                                 fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate));                      
442                                 if(qFileName != ""){
443                                         qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate));                        
444                                 }
445                         #endif
446                         }
447                 }
448                 
449                 ifstream inFASTA;
450                 m->openInputFile(filename, inFASTA);
451                 inFASTA.seekg(line->start);
452                 
453                 ifstream qFile;
454                 if(qFileName != "")     {       m->openInputFile(qFileName, qFile);     qFile.seekg(qline->start);  }
455                 
456                 bool done = false;
457                 int count = 0;
458         
459                 while (!done) {
460                                 
461                         if (m->control_pressed) { 
462                                 inFASTA.close(); outFASTA.close(); scrapFASTA.close();
463                                 if (oligoFile != "") {   outGroups.close();   }
464                                 
465                                 for(int i=0;i<fastaFileNames.size();i++){  fastaFileNames[i]->close(); delete fastaFileNames[i];  }     
466
467                                 if(qFileName != ""){
468                                         qFile.close();
469                                         for(int i=0;i<qualFileNames.size();i++){  qualFileNames[i]->close(); delete qualFileNames[i];  }        
470                                 }
471                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
472
473                                 return 0;
474                         }
475                         
476                         int success = 1;
477                         
478
479                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
480
481                         QualityScores currQual;
482                         if(qFileName != ""){
483                                 currQual = QualityScores(qFile, currSeq.getNumBases());  m->gobble(qFile);
484                         }
485                         
486                         string origSeq = currSeq.getUnaligned();
487                         if (origSeq != "") {
488                                 int groupBar, groupPrime;
489                                 string trashCode = "";
490                                 int currentSeqsDiffs = 0;
491
492                                 if(qFileName != ""){
493                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
494                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
495                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
496                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
497
498                                         if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
499                                                 success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
500                                         }
501
502                                         if(!success)                            {       trashCode += 'q';       }
503                                 }
504                         
505                                 if(barcodes.size() != 0){
506                                         success = stripBarcode(currSeq, currQual, groupBar);
507                                         if(success > bdiffs)            {       trashCode += 'b';       }
508                                         else{ currentSeqsDiffs += success;  }
509                                 }
510
511                                 if(numFPrimers != 0){
512                                         success = stripForward(currSeq, currQual, groupPrime);
513                                         if(success > pdiffs)            {       trashCode += 'f';       }
514                                         else{ currentSeqsDiffs += success;  }
515                                 }
516                                 
517                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
518
519                                 if(numRPrimers != 0){
520                                         success = stripReverse(currSeq, currQual);
521                                         if(!success)                            {       trashCode += 'r';       }
522                                 }
523                 
524                                 if(minLength > 0 || maxLength > 0){
525                                         success = cullLength(currSeq);
526                                         if(!success)                            {       trashCode += 'l';       }
527                                 }
528                                 if(maxHomoP > 0){
529                                         success = cullHomoP(currSeq);
530                                         if(!success)                            {       trashCode += 'h';       }
531                                 }
532                                 if(maxAmbig != -1){
533                                         success = cullAmbigs(currSeq);
534                                         if(!success)                            {       trashCode += 'n';       }
535                                 }
536                                 
537                                 if(flip){       currSeq.reverseComplement();            currQual.flipQScores(); }               // should go last                       
538                                 
539                                 if(trashCode.length() == 0){
540                                         currSeq.setAligned(currSeq.getUnaligned());
541                                         currSeq.printSequence(outFASTA);
542                                         currQual.printQScores(outQual);
543                                         
544                                         if(barcodes.size() != 0){
545                                                 string thisGroup = groupVector[groupBar];
546                                                 int indexToFastaFile = groupBar;
547                                                 if (primers.size() != 0){
548                                                         //does this primer have a group
549                                                         if (groupVector[groupPrime] != "") {  
550                                                                 thisGroup += "." + groupVector[groupPrime]; 
551                                                                 indexToFastaFile = combos[thisGroup];
552                                                         }
553                                                 }
554                                                 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
555                                                 if(allFiles){
556                                                         currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
557                                                         
558                                                         if(qFileName != ""){
559                                                                 currQual.printQScores(*qualFileNames[indexToFastaFile]);                                                        
560                                                         }
561                                                 }
562                                         }
563                                 }
564                                 else{
565                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
566                                         currSeq.setUnaligned(origSeq);
567                                         currSeq.setAligned(origSeq);
568                                         currSeq.printSequence(scrapFASTA);
569                                         currQual.printQScores(scrapQual);
570                                 }
571                                 count++;
572                         }
573                         
574                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
575                                 unsigned long int pos = inFASTA.tellg();
576                                 if ((pos == -1) || (pos >= line->end)) { break; }
577                         #else
578                                 if (inFASTA.eof()) { break; }
579                         #endif
580                                 
581                         //report progress
582                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
583                         
584                 }
585                 //report progress
586                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
587
588                 
589                 inFASTA.close();
590                 outFASTA.close();
591                 scrapFASTA.close();
592                 if (oligoFile != "") {   outGroups.close();   }
593                 if(qFileName != "")     {       qFile.close();  scrapQual.close(); outQual.close();     }
594                 
595                 for(int i=0;i<fastaFileNames.size();i++){
596                         fastaFileNames[i]->close();
597                         delete fastaFileNames[i];
598                 }               
599                 
600                 if(qFileName != ""){
601                         for(int i=0;i<qualFileNames.size();i++){
602                                 qualFileNames[i]->close();
603                                 delete qualFileNames[i];
604                         }               
605                 }                       
606                 
607                 return count;
608         }
609         catch(exception& e) {
610                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
611                 exit(1);
612         }
613 }
614
615 /**************************************************************************************************/
616
617 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames) {
618         try {
619 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
620                 int process = 0;
621                 int exitCommand = 1;
622                 processIDS.clear();
623                 
624                 //loop through and create all the processes you want
625                 while (process != processors) {
626                         int pid = fork();
627                         
628                         if (pid > 0) {
629                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
630                                 process++;
631                         }else if (pid == 0){
632                                 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
633                                 exit(0);
634                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
635                 }
636                 
637                 //force parent to wait until all the processes are done
638                 for (int i=0;i<processors;i++) { 
639                         int temp = processIDS[i];
640                         wait(&temp);
641                 }
642                 
643                 return exitCommand;
644 #endif          
645         }
646         catch(exception& e) {
647                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
648                 exit(1);
649         }
650 }
651
652 /**************************************************************************************************/
653
654 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
655         try {
656                 
657                 //set file positions for fasta file
658                 fastaFilePos = m->divideFile(filename, processors);
659                 
660                 if (qfilename == "") { return processors; }
661                 
662                 //get name of first sequence in each chunk
663                 map<string, int> firstSeqNames;
664                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
665                         ifstream in;
666                         m->openInputFile(filename, in);
667                         in.seekg(fastaFilePos[i]);
668                 
669                         Sequence temp(in); 
670                         firstSeqNames[temp.getName()] = i;
671                 
672                         in.close();
673                 }
674                                 
675                 //seach for filePos of each first name in the qfile and save in qfileFilePos
676                 ifstream inQual;
677                 m->openInputFile(qfilename, inQual);
678                         
679                 string input;
680                 while(!inQual.eof()){   
681                         input = m->getline(inQual);
682
683                         if (input.length() != 0) {
684                                 if(input[0] == '>'){ //this is a sequence name line
685                                         istringstream nameStream(input);
686                                         
687                                         string sname = "";  nameStream >> sname;
688                                         sname = sname.substr(1);
689                                         
690                                         map<string, int>::iterator it = firstSeqNames.find(sname);
691                                         
692                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
693                                                 unsigned long int pos = inQual.tellg(); 
694                                                 qfileFilePos.push_back(pos - input.length() - 1);       
695                                                 firstSeqNames.erase(it);
696                                         }
697                                 }
698                         }
699                         
700                         if (firstSeqNames.size() == 0) { break; }
701                 }
702                 inQual.close();
703                 
704                 if (firstSeqNames.size() != 0) { 
705                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
706                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
707                         }
708                         qFileName = "";
709                         return processors;
710                 }
711
712                 //get last file position of qfile
713                 FILE * pFile;
714                 unsigned long int size;
715                 
716                 //get num bytes in file
717                 pFile = fopen (qfilename.c_str(),"rb");
718                 if (pFile==NULL) perror ("Error opening file");
719                 else{
720                         fseek (pFile, 0, SEEK_END);
721                         size=ftell (pFile);
722                         fclose (pFile);
723                 }
724                 
725                 qfileFilePos.push_back(size);
726                 
727                 return processors;
728         }
729         catch(exception& e) {
730                 m->errorOut(e, "TrimSeqsCommand", "setLines");
731                 exit(1);
732         }
733 }
734 //***************************************************************************************************************
735
736 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){
737         try {
738                 ifstream inOligos;
739                 m->openInputFile(oligoFile, inOligos);
740                 
741                 ofstream test;
742                 
743                 string type, oligo, group;
744                 int index=0;
745                 //int indexPrimer = 0;
746                 
747                 while(!inOligos.eof()){
748                         inOligos >> type;
749                                         
750                         if(type[0] == '#'){
751                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
752                         }
753                         else{
754                                 //make type case insensitive
755                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
756                                 
757                                 inOligos >> oligo;
758                                 
759                                 for(int i=0;i<oligo.length();i++){
760                                         oligo[i] = toupper(oligo[i]);
761                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
762                                 }
763                                 
764                                 if(type == "FORWARD"){
765                                         group = "";
766                                         
767                                         // get rest of line in case there is a primer name
768                                         while (!inOligos.eof()) {       
769                                                 char c = inOligos.get(); 
770                                                 if (c == 10 || c == 13){        break;  }
771                                                 else if (c == 32 || c == 9){;} //space or tab
772                                                 else {  group += c;  }
773                                         } 
774                                         
775                                         //check for repeat barcodes
776                                         map<string, int>::iterator itPrime = primers.find(oligo);
777                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
778                                         
779                                         primers[oligo]=index; index++;
780                                         groupVector.push_back(group);
781                                         
782                                         if(allFiles){
783                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
784                                                 if(qFileName != ""){
785                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
786                                                 }
787                                                 if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
788                                                         filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
789                                                         if(qFileName != ""){
790                                                                 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
791                                                         }
792                                                 }else {
793                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
794                                                         if(qFileName != ""){
795                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
796                                                         }                                                       
797                                                 }
798                                         }
799
800                                 }
801                                 else if(type == "REVERSE"){
802                                         Sequence oligoRC("reverse", oligo);
803                                         oligoRC.reverseComplement();
804                                         revPrimer.push_back(oligoRC.getUnaligned());
805                                 }
806                                 else if(type == "BARCODE"){
807                                         inOligos >> group;
808                                         
809                                         //check for repeat barcodes
810                                         map<string, int>::iterator itBar = barcodes.find(oligo);
811                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
812                                         
813                                         barcodes[oligo]=index; index++;
814                                         groupVector.push_back(group);
815                                         
816                                         if(allFiles){
817                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
818                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
819                                                 if(qFileName != ""){
820                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
821                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
822                                                 }                                                       
823                                         }
824                                 }else{  m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
825                         }
826                         m->gobble(inOligos);
827                 }
828                 
829                 inOligos.close();
830                 
831                 //add in potential combos
832                 if(allFiles){
833                         comboStarts = outFASTAVec.size()-1;
834                         for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
835                                 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
836                                         if (groupVector[itPrime->second] != "") { //there is a group for this primer
837                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
838                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
839                                                 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
840                                                 
841                                                 if(qFileName != ""){
842                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
843                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
844                                                 }
845                                         }
846                                 }
847                         }
848                 }
849                 
850                 numFPrimers = primers.size();
851                 numRPrimers = revPrimer.size();
852                 
853         }
854         catch(exception& e) {
855                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
856                 exit(1);
857         }
858 }
859 //***************************************************************************************************************
860
861 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
862         try {
863                 
864                 string rawSequence = seq.getUnaligned();
865                 int success = bdiffs + 1;       //guilty until proven innocent
866                 
867                 //can you find the barcode
868                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
869                         string oligo = it->first;
870                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
871                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
872                                 break;  
873                         }
874                         
875                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
876                                 group = it->second;
877                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
878                                 
879                                 if(qual.getName() != ""){
880                                         qual.trimQScores(oligo.length(), -1);
881                                 }
882                                 
883                                 success = 0;
884                                 break;
885                         }
886                 }
887                 
888                 //if you found the barcode or if you don't want to allow for diffs
889 //              cout << success;
890                 if ((bdiffs == 0) || (success == 0)) { return success;  }
891                 
892                 else { //try aligning and see if you can find it
893 //                      cout << endl;
894
895                         int maxLength = 0;
896
897                         Alignment* alignment;
898                         if (barcodes.size() > 0) {
899                                 map<string,int>::iterator it=barcodes.begin();
900
901                                 for(it;it!=barcodes.end();it++){
902                                         if(it->first.length() > maxLength){
903                                                 maxLength = it->first.length();
904                                         }
905                                 }
906                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
907
908                         }else{ alignment = NULL; } 
909                         
910                         //can you find the barcode
911                         int minDiff = 1e6;
912                         int minCount = 1;
913                         int minGroup = -1;
914                         int minPos = 0;
915                         
916                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
917                                 string oligo = it->first;
918 //                              int length = oligo.length();
919                                 
920                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
921                                         success = bdiffs + 10;
922                                         break;
923                                 }
924                                 
925                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
926                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
927                                 oligo = alignment->getSeqAAln();
928                                 string temp = alignment->getSeqBAln();
929                 
930                                 int alnLength = oligo.length();
931                                 
932                                 for(int i=oligo.length()-1;i>=0;i--){
933                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
934                                 }
935                                 oligo = oligo.substr(0,alnLength);
936                                 temp = temp.substr(0,alnLength);
937                                 
938                                 int newStart=0;
939                                 int numDiff = countDiffs(oligo, temp);
940                                 
941 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
942                                 
943                                 if(numDiff < minDiff){
944                                         minDiff = numDiff;
945                                         minCount = 1;
946                                         minGroup = it->second;
947                                         minPos = 0;
948                                         for(int i=0;i<alnLength;i++){
949                                                 if(temp[i] != '-'){
950                                                         minPos++;
951                                                 }
952                                         }
953                                 }
954                                 else if(numDiff == minDiff){
955                                         minCount++;
956                                 }
957
958                         }
959
960                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
961                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
962                         else{                                                                                                   //use the best match
963                                 group = minGroup;
964                                 seq.setUnaligned(rawSequence.substr(minPos));
965                                 
966                                 if(qual.getName() != ""){
967                                         qual.trimQScores(minPos, -1);
968                                 }
969                                 success = minDiff;
970                         }
971                         
972                         if (alignment != NULL) {  delete alignment;  }
973                         
974                 }
975 //              cout << success << endl;
976                 
977                 return success;
978                 
979         }
980         catch(exception& e) {
981                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
982                 exit(1);
983         }
984
985 }
986
987 //***************************************************************************************************************
988
989 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
990         try {
991                 string rawSequence = seq.getUnaligned();
992                 int success = pdiffs + 1;       //guilty until proven innocent
993                 
994                 //can you find the primer
995                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
996                         string oligo = it->first;
997                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
998                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
999                                 break;  
1000                         }
1001                         
1002                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1003                                 group = it->second;
1004                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1005                                 if(qual.getName() != ""){
1006                                         qual.trimQScores(oligo.length(), -1);
1007                                         
1008                                 }
1009                                 success = 0;
1010                                 break;
1011                         }
1012                 }
1013
1014                 //if you found the barcode or if you don't want to allow for diffs
1015 //              cout << success;
1016                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1017                 
1018                 else { //try aligning and see if you can find it
1019 //                      cout << endl;
1020
1021                         int maxLength = 0;
1022
1023                         Alignment* alignment;
1024                         if (primers.size() > 0) {
1025                                 map<string,int>::iterator it=primers.begin();
1026
1027                                 for(it;it!=primers.end();it++){
1028                                         if(it->first.length() > maxLength){
1029                                                 maxLength = it->first.length();
1030                                         }
1031                                 }
1032                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1033
1034                         }else{ alignment = NULL; } 
1035                         
1036                         //can you find the barcode
1037                         int minDiff = 1e6;
1038                         int minCount = 1;
1039                         int minGroup = -1;
1040                         int minPos = 0;
1041                         
1042                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1043                                 string oligo = it->first;
1044 //                              int length = oligo.length();
1045                                 
1046                                 if(rawSequence.length() < maxLength){   
1047                                         success = pdiffs + 100;
1048                                         break;
1049                                 }
1050                                 
1051                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1052                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1053                                 oligo = alignment->getSeqAAln();
1054                                 string temp = alignment->getSeqBAln();
1055                 
1056                                 int alnLength = oligo.length();
1057                                 
1058                                 for(int i=oligo.length()-1;i>=0;i--){
1059                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1060                                 }
1061                                 oligo = oligo.substr(0,alnLength);
1062                                 temp = temp.substr(0,alnLength);
1063                                 
1064                                 int newStart=0;
1065                                 int numDiff = countDiffs(oligo, temp);
1066                                 
1067 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1068                                 
1069                                 if(numDiff < minDiff){
1070                                         minDiff = numDiff;
1071                                         minCount = 1;
1072                                         minGroup = it->second;
1073                                         minPos = 0;
1074                                         for(int i=0;i<alnLength;i++){
1075                                                 if(temp[i] != '-'){
1076                                                         minPos++;
1077                                                 }
1078                                         }
1079                                 }
1080                                 else if(numDiff == minDiff){
1081                                         minCount++;
1082                                 }
1083
1084                         }
1085
1086                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1087                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1088                         else{                                                                                                   //use the best match
1089                                 group = minGroup;
1090                                 seq.setUnaligned(rawSequence.substr(minPos));
1091                                 if(qual.getName() != ""){
1092                                         qual.trimQScores(minPos, -1);
1093                                 }
1094                                 success = minDiff;
1095                         }
1096                         
1097                         if (alignment != NULL) {  delete alignment;  }
1098                         
1099                 }
1100                 
1101                 return success;
1102
1103         }
1104         catch(exception& e) {
1105                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1106                 exit(1);
1107         }
1108 }
1109
1110 //***************************************************************************************************************
1111
1112 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1113         try {
1114                 string rawSequence = seq.getUnaligned();
1115                 bool success = 0;       //guilty until proven innocent
1116                 
1117                 for(int i=0;i<numRPrimers;i++){
1118                         string oligo = revPrimer[i];
1119                         
1120                         if(rawSequence.length() < oligo.length()){
1121                                 success = 0;
1122                                 break;
1123                         }
1124                         
1125                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1126                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1127                                 if(qual.getName() != ""){
1128                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1129                                 }
1130                                 success = 1;
1131                                 break;
1132                         }
1133                 }       
1134                 return success;
1135                 
1136         }
1137         catch(exception& e) {
1138                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1139                 exit(1);
1140         }
1141 }
1142
1143 //***************************************************************************************************************
1144
1145 bool TrimSeqsCommand::cullLength(Sequence& seq){
1146         try {
1147         
1148                 int length = seq.getNumBases();
1149                 bool success = 0;       //guilty until proven innocent
1150                 
1151                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1152                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1153                 else                                                                                            {       success = 0;    }
1154                 
1155                 return success;
1156         
1157         }
1158         catch(exception& e) {
1159                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1160                 exit(1);
1161         }
1162         
1163 }
1164
1165 //***************************************************************************************************************
1166
1167 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1168         try {
1169                 int longHomoP = seq.getLongHomoPolymer();
1170                 bool success = 0;       //guilty until proven innocent
1171                 
1172                 if(longHomoP <= maxHomoP){      success = 1;    }
1173                 else                                    {       success = 0;    }
1174                 
1175                 return success;
1176         }
1177         catch(exception& e) {
1178                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1179                 exit(1);
1180         }
1181         
1182 }
1183
1184 //***************************************************************************************************************
1185
1186 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1187         try {
1188                 int numNs = seq.getAmbigBases();
1189                 bool success = 0;       //guilty until proven innocent
1190                 
1191                 if(numNs <= maxAmbig)   {       success = 1;    }
1192                 else                                    {       success = 0;    }
1193                 
1194                 return success;
1195         }
1196         catch(exception& e) {
1197                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1198                 exit(1);
1199         }
1200         
1201 }
1202
1203 //***************************************************************************************************************
1204
1205 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1206         try {
1207                 bool success = 1;
1208                 int length = oligo.length();
1209                 
1210                 for(int i=0;i<length;i++){
1211                         
1212                         if(oligo[i] != seq[i]){
1213                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1214                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1215                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1216                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1217                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1218                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1219                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1220                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1221                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1222                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1223                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1224                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1225                                 
1226                                 if(success == 0)        {       break;   }
1227                         }
1228                         else{
1229                                 success = 1;
1230                         }
1231                 }
1232                 
1233                 return success;
1234         }
1235         catch(exception& e) {
1236                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1237                 exit(1);
1238         }
1239
1240 }
1241 //***************************************************************************************************************
1242
1243 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1244         try {
1245
1246                 int length = oligo.length();
1247                 int countDiffs = 0;
1248                 
1249                 for(int i=0;i<length;i++){
1250                                                                 
1251                         if(oligo[i] != seq[i]){
1252                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1253                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1254                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1255                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1256                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1257                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1258                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1259                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1260                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1261                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1262                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1263                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1264                         }
1265                         
1266                 }
1267                 
1268                 return countDiffs;
1269         }
1270         catch(exception& e) {
1271                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1272                 exit(1);
1273         }
1274
1275 }
1276 //***************************************************************************************************************
1277
1278 //bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){
1279 //      try {
1280 //              
1281 //              string rawSequence = seq.getUnaligned();
1282 //              int seqLength = seq.getNumBases();
1283 //              bool success = 0;       //guilty until proven innocent
1284 //              string name;
1285 //              
1286 //              qFile >> name;
1287 //              if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1288 //              
1289 //              while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1290 //              
1291 //              int score;
1292 //              int end = seqLength;
1293 //              
1294 //              for(int i=0;i<seqLength;i++){
1295 //                      qFile >> score;
1296 //                      
1297 //                      if(score < qThreshold){
1298 //                              end = i;
1299 //                              break;
1300 //                      }
1301 //              }
1302 //              for(int i=end+1;i<seqLength;i++){
1303 //                      qFile >> score;
1304 //              }
1305 //              
1306 //              seq.setUnaligned(rawSequence.substr(0,end));
1307 //              
1308 //              return 1;
1309 //      }
1310 //      catch(exception& e) {
1311 //              m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold");
1312 //              exit(1);
1313 //      }
1314 //}
1315
1316 //***************************************************************************************************************
1317
1318 //bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){
1319 //      try {
1320 //              string rawSequence = seq.getUnaligned();
1321 //              int seqLength = seq.getNumBases();
1322 //              bool success = 0;       //guilty until proven innocent
1323 //              string name;
1324 //              
1325 //              qFile >> name;
1326 //              if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1327 //              
1328 //              while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1329 //              
1330 //              float score;    
1331 //              float average = 0;
1332 //              
1333 //              for(int i=0;i<seqLength;i++){
1334 //                      qFile >> score;
1335 //                      average += score;
1336 //              }
1337 //              average /= seqLength;
1338 //
1339 //              if(average >= qAverage) {       success = 1;    }
1340 //              else                                    {       success = 0;    }
1341 //              
1342 //              return success;
1343 //      }
1344 //      catch(exception& e) {
1345 //              m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
1346 //              exit(1);
1347 //      }
1348 //}
1349
1350 //***************************************************************************************************************