]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
e47e71d581d9a24f5b4c5ced4ac1875a94b1d269
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //***************************************************************************************************************
14
15 TrimSeqsCommand::TrimSeqsCommand(string option)  {
16         try {
17                 
18                 abort = false;
19                 comboStarts = 0;
20                 
21                 //allow user to run help
22                 if(option == "help") { help(); abort = true; }
23                 
24                 else {
25                         //valid paramters for this command
26                         string AlignArray[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
27                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
28                         
29                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
30                         
31                         OptionParser parser(option);
32                         map<string,string> parameters = parser.getParameters();
33                         
34                         ValidParameters validParameter;
35                         map<string,string>::iterator it;
36                         
37                         //check to make sure all parameters are valid for command
38                         for (it = parameters.begin(); it != parameters.end(); it++) { 
39                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
40                         }
41                         
42                         //if the user changes the input directory command factory will send this info to us in the output parameter 
43                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
44                         if (inputDir == "not found"){   inputDir = "";          }
45                         else {
46                                 string path;
47                                 it = parameters.find("fasta");
48                                 //user has given a template file
49                                 if(it != parameters.end()){ 
50                                         path = m->hasPath(it->second);
51                                         //if the user has not given a path then, add inputdir. else leave path alone.
52                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
53                                 }
54                                 
55                                 it = parameters.find("oligos");
56                                 //user has given a template file
57                                 if(it != parameters.end()){ 
58                                         path = m->hasPath(it->second);
59                                         //if the user has not given a path then, add inputdir. else leave path alone.
60                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
61                                 }
62                                 
63                                 it = parameters.find("qfile");
64                                 //user has given a template file
65                                 if(it != parameters.end()){ 
66                                         path = m->hasPath(it->second);
67                                         //if the user has not given a path then, add inputdir. else leave path alone.
68                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
69                                 }
70                         }
71
72                         
73                         //check for required parameters
74                         fastaFile = validParameter.validFile(parameters, "fasta", true);
75                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the screen.seqs command."); m->mothurOutEndLine(); abort = true; }
76                         else if (fastaFile == "not open") { abort = true; }     
77                         
78                         //if the user changes the output directory command factory will send this info to us in the output parameter 
79                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
80                                 outputDir = ""; 
81                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
82                         }
83                 
84                         
85                         //check for optional parameter and set defaults
86                         // ...at some point should added some additional type checking...
87                         string temp;
88                         temp = validParameter.validFile(parameters, "flip", false);
89                         if (temp == "not found"){       flip = 0;       }
90                         else if(m->isTrue(temp))        {       flip = 1;       }
91                 
92                         temp = validParameter.validFile(parameters, "oligos", true);
93                         if (temp == "not found"){       oligoFile = "";         }
94                         else if(temp == "not open"){    abort = true;   } 
95                         else                                    {       oligoFile = temp;               }
96                         
97                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
98                         convert(temp, maxAmbig);  
99
100                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
101                         convert(temp, maxHomoP);  
102
103                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
104                         convert(temp, minLength); 
105                         
106                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
107                         convert(temp, maxLength);
108                         
109                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
110                         convert(temp, bdiffs);
111                         
112                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
113                         convert(temp, pdiffs);
114                         
115                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
116                         convert(temp, tdiffs);
117                         
118                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
119                         
120                         temp = validParameter.validFile(parameters, "qfile", true);     
121                         if (temp == "not found")        {       qFileName = "";         }
122                         else if(temp == "not open")     {       abort = true;           }
123                         else                                            {       qFileName = temp;       }
124                         
125                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
126                         convert(temp, qThreshold);
127                         
128                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "F"; }
129                         qtrim = m->isTrue(temp);
130
131                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
132                         convert(temp, qRollAverage);
133
134                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
135                         convert(temp, qWindowAverage);
136
137                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "100"; }
138                         convert(temp, qWindowSize);
139
140                         temp = validParameter.validFile(parameters, "qstepsize", false);                if (temp == "not found") { temp = "10"; }
141                         convert(temp, qWindowStep);
142
143                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
144                         convert(temp, qAverage);
145                         
146                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
147                         allFiles = m->isTrue(temp);
148                         
149                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
150                         convert(temp, processors); 
151                         
152                         if(allFiles && oligoFile == ""){
153                                 m->mothurOut("You selected allfiles, but didn't enter an oligos file.  Ignoring the allfiles request."); m->mothurOutEndLine();
154                         }
155                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
156                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
157                                 qAverage=0;
158                                 qThreshold=0;
159                         }
160                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
161                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
162                                 abort = true;
163                         }
164                 }
165
166         }
167         catch(exception& e) {
168                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
169                 exit(1);
170         }
171 }
172 //**********************************************************************************************************************
173
174 void TrimSeqsCommand::help(){
175         try {
176                 m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
177                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
178                 m->mothurOut("The fasta parameter is required.\n");
179                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
180                 m->mothurOut("The oligos parameter .... The default is "".\n");
181                 m->mothurOut("The maxambig parameter .... The default is -1.\n");
182                 m->mothurOut("The maxhomop parameter .... The default is 0.\n");
183                 m->mothurOut("The minlength parameter .... The default is 0.\n");
184                 m->mothurOut("The maxlength parameter .... The default is 0.\n");
185                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
186                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
187                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
188                 m->mothurOut("The qfile parameter .....\n");
189                 m->mothurOut("The qthreshold parameter .... The default is 0.\n");
190                 m->mothurOut("The qaverage parameter .... The default is 0.\n");
191                 m->mothurOut("The allfiles parameter .... The default is F.\n");
192                 m->mothurOut("The qtrim parameter .... The default is F.\n");
193                 m->mothurOut("The trim.seqs command should be in the following format: \n");
194                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
195                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
196                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
197                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
198                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
199
200         }
201         catch(exception& e) {
202                 m->errorOut(e, "TrimSeqsCommand", "help");
203                 exit(1);
204         }
205 }
206
207
208 //***************************************************************************************************************
209
210 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
211
212 //***************************************************************************************************************
213
214 int TrimSeqsCommand::execute(){
215         try{
216         
217                 if (abort == true) { return 0; }
218                 
219                 numFPrimers = 0;  //this needs to be initialized
220                 numRPrimers = 0;
221                 
222                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
223                 outputNames.push_back(trimSeqFile);
224                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
225                 outputNames.push_back(scrapSeqFile);
226                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
227                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
228                 if (qFileName != "") {  outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile);  }
229                 string groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
230                 
231                 vector<string> fastaFileNames;
232                 vector<string> qualFileNames;
233                 if(oligoFile != ""){
234                         outputNames.push_back(groupFile);
235                         getOligos(fastaFileNames, qualFileNames);
236                 }
237
238                 vector<unsigned long int> fastaFilePos;
239                 vector<unsigned long int> qFilePos;
240                 
241                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
242                 
243                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
244                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
245                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
246                 }       
247                 if(qFileName == "")     {       qLines = lines; } //files with duds
248                 
249                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
250                                 if(processors == 1){
251                                                                                 
252                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
253                                         
254                                         for (int j = 0; j < fastaFileNames.size(); j++) {
255                                                 rename((fastaFileNames[j] + toString(getpid()) + ".temp").c_str(), fastaFileNames[j].c_str());
256                                         }
257                                         if(qFileName != ""){
258                                                 for (int j = 0; j < qualFileNames.size(); j++) {
259                                                         rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str());
260                                                 }
261                                         }                                               
262
263                                 }else{
264                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); 
265                                         
266                                         rename((trimSeqFile + toString(processIDS[0]) + ".temp").c_str(), trimSeqFile.c_str());
267                                         rename((scrapSeqFile + toString(processIDS[0]) + ".temp").c_str(), scrapSeqFile.c_str());
268                                         rename((groupFile + toString(processIDS[0]) + ".temp").c_str(), groupFile.c_str());
269                                         
270                                         if(qFileName != ""){
271                                                 rename((trimQualFile + toString(processIDS[0]) + ".temp").c_str(), trimQualFile.c_str());
272                                                 rename((scrapQualFile + toString(processIDS[0]) + ".temp").c_str(), scrapQualFile.c_str());
273                                         }
274                                         
275                                         
276                                         for (int j = 0; j < fastaFileNames.size(); j++) {
277                                                 rename((fastaFileNames[j] + toString(processIDS[0]) + ".temp").c_str(), fastaFileNames[j].c_str());
278                                         }
279                                         if(qFileName != ""){
280                                                 for (int j = 0; j < qualFileNames.size(); j++) {
281                                                         rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str());
282                                                 }
283                                         }                                               
284                                         
285                                         //append files
286                                         for(int i=1;i<processors;i++){
287                                                 m->appendFiles((trimSeqFile + toString(processIDS[i]) + ".temp"), trimSeqFile);
288                                                 remove((trimSeqFile + toString(processIDS[i]) + ".temp").c_str());
289                                                 m->appendFiles((scrapSeqFile + toString(processIDS[i]) + ".temp"), scrapSeqFile);
290                                                 remove((scrapSeqFile + toString(processIDS[i]) + ".temp").c_str());
291
292                                                 m->appendFiles((trimQualFile + toString(processIDS[i]) + ".temp"), trimQualFile);
293                                                 remove((trimQualFile + toString(processIDS[i]) + ".temp").c_str());
294                                                 m->appendFiles((scrapQualFile + toString(processIDS[i]) + ".temp"), scrapQualFile);
295                                                 remove((scrapQualFile + toString(processIDS[i]) + ".temp").c_str());
296                                                 
297                                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
298                                                 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
299                                                 for (int j = 0; j < fastaFileNames.size(); j++) {
300                                                         m->appendFiles((fastaFileNames[j] + toString(processIDS[i]) + ".temp"), fastaFileNames[j]);
301                                                         remove((fastaFileNames[j] + toString(processIDS[i]) + ".temp").c_str());
302                                                 }
303                                                 
304                                                 if(qFileName != ""){
305                                                         for (int j = 0; j < qualFileNames.size(); j++) {
306                                                                 m->appendFiles((qualFileNames[j] + toString(processIDS[i]) + ".temp"), qualFileNames[j]);
307                                                                 remove((qualFileNames[j] + toString(processIDS[i]) + ".temp").c_str());
308                                                         }
309                                                 }                                               
310                                                 
311                                                 
312                                         }
313                                 }
314                                 
315                                 if (m->control_pressed) {  return 0; }
316                 #else
317                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
318                                 
319                                 for (int j = 0; j < fastaFileNames.size(); j++) {
320                                         rename((fastaFileNames[j] + toString(j) + ".temp").c_str(), fastaFileNames[j].c_str());
321                                 }
322                                 if(qFileName != ""){
323                                         for (int j = 0; j < qualFileNames.size(); j++) {
324                                                 rename((qualFileNames[j] + toString(j) + ".temp").c_str(), qualFileNames[j].c_str());
325                                         }
326                                 }
327                                                                         
328                                 if (m->control_pressed) {  return 0; }
329                 #endif
330                                                 
331                                                                                 
332                 for(int i=0;i<fastaFileNames.size();i++){
333                         if (m->isBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); }
334                         else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); }
335                         else {
336                                 ifstream inFASTA;
337                                 string seqName;
338                                 m->openInputFile(fastaFileNames[i], inFASTA);
339                                 ofstream outGroups;
340                                 string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups";
341                                 m->openOutputFile(outGroupFilename, outGroups);
342                                 outputNames.push_back(outGroupFilename);
343                                 
344                                 string thisGroup = "";
345                                 if (i > comboStarts) {
346                                         map<string, int>::iterator itCombo;
347                                         for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
348                                                 if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
349                                         }
350                                 }else{ thisGroup = groupVector[i]; }
351                                 
352                                 while(!inFASTA.eof()){
353                                         if(inFASTA.get() == '>'){
354                                                 inFASTA >> seqName;
355                                                 outGroups << seqName << '\t' << thisGroup << endl;
356                                         }
357                                         while (!inFASTA.eof())  {       char c = inFASTA.get(); if (c == 10 || c == 13){        break;  }       }
358                                 }
359                                 outGroups.close();
360                                 inFASTA.close();
361                         }
362                 }
363                 
364                 if(qFileName != ""){
365                         for(int i=0;i<qualFileNames.size();i++){
366                                 if (m->isBlank(qualFileNames[i])) { remove(qualFileNames[i].c_str());   }
367                                 else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); }
368                                 else {
369                                         ifstream inQual;
370                                         string seqName;
371                                         m->openInputFile(qualFileNames[i], inQual);
372 //                                      ofstream outGroups;
373 //                                      
374 //                                      string thisGroup = "";
375 //                                      if (i > comboStarts) {
376 //                                              map<string, int>::iterator itCombo;
377 //                                              for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
378 //                                                      if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
379 //                                              }
380 //                                      }
381 //                                      else{ thisGroup = groupVector[i]; }
382                                         
383                                         inQual.close();
384                                 }
385                         }
386                 }
387                 
388                 
389                 if (m->control_pressed) { 
390                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
391                         return 0;
392                 }
393
394                 m->mothurOutEndLine();
395                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
396                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
397                 m->mothurOutEndLine();
398                 
399                 return 0;       
400                         
401         }
402         catch(exception& e) {
403                 m->errorOut(e, "TrimSeqsCommand", "execute");
404                 exit(1);
405         }
406 }
407                 
408 /**************************************************************************************/
409
410 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames, linePair* line, linePair* qline) {      
411                 
412         try {
413                 
414                 ofstream outFASTA;
415                 int able = m->openOutputFile(trimFile, outFASTA);
416                 
417                 ofstream scrapFASTA;
418                 m->openOutputFile(scrapFile, scrapFASTA);
419                 
420                 ofstream outQual;
421                 ofstream scrapQual;
422                 if(qFileName != ""){
423                         m->openOutputFile(trimQFile, outQual);
424                         m->openOutputFile(scrapQFile, scrapQual);
425                 }
426                 
427                 ofstream outGroups;
428                 vector<ofstream*> fastaFileNames;
429                 vector<ofstream*> qualFileNames;
430                 
431         cout << "here" << endl; 
432                 if (oligoFile != "") {          
433                         m->openOutputFile(groupFile, outGroups);   
434                         for (int i = 0; i < fastaNames.size(); i++) {
435                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
436                                 fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
437                                 if(qFileName != ""){
438                                         qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
439                                 }
440                         #else
441                                 fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate));                      
442                                 if(qFileName != ""){
443                                         qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate));                        
444                                 }
445                         #endif
446                         }
447                 }
448 cout << "here " << filename << endl;            
449                 ifstream inFASTA;
450                 m->openInputFile(filename, inFASTA);
451                 inFASTA.seekg(line->start);
452         cout << "here " << qFileName << endl;   
453                 ifstream qFile;
454                 if(qFileName != "")     {       m->openInputFile(qFileName, qFile);     qFile.seekg(qline->start);  }
455                 
456                 bool done = false;
457                 int count = 0;
458         cout << "here" << endl;
459                 while (!done) {
460                                 
461                         if (m->control_pressed) { 
462                                 inFASTA.close(); outFASTA.close(); scrapFASTA.close();
463                                 if (oligoFile != "") {   outGroups.close();   }
464                                 
465                                 for(int i=0;i<fastaFileNames.size();i++){  fastaFileNames[i]->close(); delete fastaFileNames[i];  }     
466
467                                 if(qFileName != ""){
468                                         qFile.close();
469                                         for(int i=0;i<qualFileNames.size();i++){  qualFileNames[i]->close(); delete qualFileNames[i];  }        
470                                 }
471                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
472
473                                 return 0;
474                         }
475                         
476                         int success = 1;
477                         
478
479                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
480
481                         QualityScores currQual;
482                         if(qFileName != ""){
483                                 currQual = QualityScores(qFile, currSeq.getNumBases());  m->gobble(qFile);
484                         }
485                         
486                         string origSeq = currSeq.getUnaligned();
487                         if (origSeq != "") {
488                                 int groupBar, groupPrime;
489                                 string trashCode = "";
490                                 int currentSeqsDiffs = 0;
491
492                                 if(qFileName != ""){
493                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
494                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
495                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
496                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
497
498                                         if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
499                                                 success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
500                                         }
501
502                                         if(!success)                            {       trashCode += 'q';       }
503                                 }
504                         
505                                 if(barcodes.size() != 0){
506                                         success = stripBarcode(currSeq, currQual, groupBar);
507                                         if(success > bdiffs)            {       trashCode += 'b';       }
508                                         else{ currentSeqsDiffs += success;  }
509                                 }
510
511                                 if(numFPrimers != 0){
512                                         success = stripForward(currSeq, currQual, groupPrime);
513                                         if(success > pdiffs)            {       trashCode += 'f';       }
514                                         else{ currentSeqsDiffs += success;  }
515                                 }
516                                 
517                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
518
519                                 if(numRPrimers != 0){
520                                         success = stripReverse(currSeq, currQual);
521                                         if(!success)                            {       trashCode += 'r';       }
522                                 }
523                 
524                                 if(minLength > 0 || maxLength > 0){
525                                         success = cullLength(currSeq);
526                                         if(!success)                            {       trashCode += 'l';       }
527                                 }
528                                 if(maxHomoP > 0){
529                                         success = cullHomoP(currSeq);
530                                         if(!success)                            {       trashCode += 'h';       }
531                                 }
532                                 if(maxAmbig != -1){
533                                         success = cullAmbigs(currSeq);
534                                         if(!success)                            {       trashCode += 'n';       }
535                                 }
536                                 
537                                 if(flip){       currSeq.reverseComplement();            currQual.flipQScores(); }               // should go last                       
538                                 
539                                 if(trashCode.length() == 0){
540                                         currSeq.setAligned(currSeq.getUnaligned());
541                                         currSeq.printSequence(outFASTA);
542                                         currQual.printQScores(outQual);
543                                         
544                                         if(barcodes.size() != 0){
545                                                 string thisGroup = groupVector[groupBar];
546                                                 int indexToFastaFile = groupBar;
547                                                 if (primers.size() != 0){
548                                                         //does this primer have a group
549                                                         if (groupVector[groupPrime] != "") {  
550                                                                 thisGroup += "." + groupVector[groupPrime]; 
551                                                                 indexToFastaFile = combos[thisGroup];
552                                                         }
553                                                 }
554                                                 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
555                                                 if(allFiles){
556                                                         currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
557                                                         
558                                                         if(qFileName != ""){
559                                                                 currQual.printQScores(*qualFileNames[indexToFastaFile]);                                                        
560                                                         }
561                                                 }
562                                         }
563                                 }
564                                 else{
565                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
566                                         currSeq.setUnaligned(origSeq);
567                                         currSeq.setAligned(origSeq);
568                                         currSeq.printSequence(scrapFASTA);
569                                         currQual.printQScores(scrapQual);
570                                 }
571                                 count++;
572                         }
573                         
574                         unsigned long int pos = inFASTA.tellg();
575                         if ((pos == -1) || (pos >= line->end)) { break; }
576                         
577                         //report progress
578                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
579                         
580                 }
581                 //report progress
582                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
583
584                 
585                 inFASTA.close();
586                 outFASTA.close();
587                 scrapFASTA.close();
588                 if (oligoFile != "") {   outGroups.close();   }
589                 if(qFileName != "")     {       qFile.close();  scrapQual.close(); outQual.close();     }
590                 
591                 for(int i=0;i<fastaFileNames.size();i++){
592                         fastaFileNames[i]->close();
593                         delete fastaFileNames[i];
594                 }               
595                 
596                 if(qFileName != ""){
597                         for(int i=0;i<qualFileNames.size();i++){
598                                 qualFileNames[i]->close();
599                                 delete qualFileNames[i];
600                         }               
601                 }                       
602                 
603                 return count;
604         }
605         catch(exception& e) {
606                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
607                 exit(1);
608         }
609 }
610
611 /**************************************************************************************************/
612
613 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames) {
614         try {
615 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
616                 int process = 0;
617                 int exitCommand = 1;
618                 processIDS.clear();
619                 
620                 //loop through and create all the processes you want
621                 while (process != processors) {
622                         int pid = fork();
623                         
624                         if (pid > 0) {
625                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
626                                 process++;
627                         }else if (pid == 0){
628                                 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
629                                 exit(0);
630                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
631                 }
632                 
633                 //force parent to wait until all the processes are done
634                 for (int i=0;i<processors;i++) { 
635                         int temp = processIDS[i];
636                         wait(&temp);
637                 }
638                 
639                 return exitCommand;
640 #endif          
641         }
642         catch(exception& e) {
643                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
644                 exit(1);
645         }
646 }
647
648 /**************************************************************************************************/
649
650 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
651         try {
652                 
653                 //set file positions for fasta file
654                 fastaFilePos = m->divideFile(filename, processors);
655                 
656                 if (qfilename == "") { return processors; }
657                 
658                 //get name of first sequence in each chunk
659                 map<string, int> firstSeqNames;
660                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
661                         ifstream in;
662                         m->openInputFile(filename, in);
663                         in.seekg(fastaFilePos[i]);
664                 
665                         Sequence temp(in); 
666                         firstSeqNames[temp.getName()] = i;
667                 
668                         in.close();
669                 }
670                                 
671                 //seach for filePos of each first name in the qfile and save in qfileFilePos
672                 ifstream inQual;
673                 m->openInputFile(qfilename, inQual);
674                         
675                 string input;
676                 while(!inQual.eof()){   
677                         input = m->getline(inQual);
678
679                         if (input.length() != 0) {
680                                 if(input[0] == '>'){ //this is a sequence name line
681                                         istringstream nameStream(input);
682                                         
683                                         string sname = "";  nameStream >> sname;
684                                         sname = sname.substr(1);
685                                         
686                                         map<string, int>::iterator it = firstSeqNames.find(sname);
687                                         
688                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
689                                                 unsigned long int pos = inQual.tellg(); 
690                                                 qfileFilePos.push_back(pos - input.length() - 1);       
691                                                 firstSeqNames.erase(it);
692                                         }
693                                 }
694                         }
695                         
696                         if (firstSeqNames.size() == 0) { break; }
697                 }
698                 inQual.close();
699                 
700                 if (firstSeqNames.size() != 0) { 
701                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
702                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
703                         }
704                         qFileName = "";
705                         return processors;
706                 }
707
708                 //get last file position of qfile
709                 FILE * pFile;
710                 unsigned long int size;
711                 
712                 //get num bytes in file
713                 pFile = fopen (qfilename.c_str(),"rb");
714                 if (pFile==NULL) perror ("Error opening file");
715                 else{
716                         fseek (pFile, 0, SEEK_END);
717                         size=ftell (pFile);
718                         fclose (pFile);
719                 }
720                 
721                 qfileFilePos.push_back(size);
722                 
723                 return processors;
724         }
725         catch(exception& e) {
726                 m->errorOut(e, "TrimSeqsCommand", "setLines");
727                 exit(1);
728         }
729 }
730 //***************************************************************************************************************
731
732 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){
733         try {
734                 ifstream inOligos;
735                 m->openInputFile(oligoFile, inOligos);
736                 
737                 ofstream test;
738                 
739                 string type, oligo, group;
740                 int index=0;
741                 //int indexPrimer = 0;
742                 
743                 while(!inOligos.eof()){
744                         inOligos >> type;
745                                         
746                         if(type[0] == '#'){
747                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
748                         }
749                         else{
750                                 //make type case insensitive
751                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
752                                 
753                                 inOligos >> oligo;
754                                 
755                                 for(int i=0;i<oligo.length();i++){
756                                         oligo[i] = toupper(oligo[i]);
757                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
758                                 }
759                                 
760                                 if(type == "FORWARD"){
761                                         group = "";
762                                         
763                                         // get rest of line in case there is a primer name
764                                         while (!inOligos.eof()) {       
765                                                 char c = inOligos.get(); 
766                                                 if (c == 10 || c == 13){        break;  }
767                                                 else if (c == 32 || c == 9){;} //space or tab
768                                                 else {  group += c;  }
769                                         } 
770                                         
771                                         //check for repeat barcodes
772                                         map<string, int>::iterator itPrime = primers.find(oligo);
773                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
774                                         
775                                         primers[oligo]=index; index++;
776                                         groupVector.push_back(group);
777                                         
778                                         if(allFiles){
779                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
780                                                 if(qFileName != ""){
781                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
782                                                 }
783                                                 if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
784                                                         filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
785                                                         if(qFileName != ""){
786                                                                 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
787                                                         }
788                                                 }else {
789                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
790                                                         if(qFileName != ""){
791                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
792                                                         }                                                       
793                                                 }
794                                         }
795
796                                 }
797                                 else if(type == "REVERSE"){
798                                         Sequence oligoRC("reverse", oligo);
799                                         oligoRC.reverseComplement();
800                                         revPrimer.push_back(oligoRC.getUnaligned());
801                                 }
802                                 else if(type == "BARCODE"){
803                                         inOligos >> group;
804                                         
805                                         //check for repeat barcodes
806                                         map<string, int>::iterator itBar = barcodes.find(oligo);
807                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
808                                         
809                                         barcodes[oligo]=index; index++;
810                                         groupVector.push_back(group);
811                                         
812                                         if(allFiles){
813                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
814                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
815                                                 if(qFileName != ""){
816                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
817                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
818                                                 }                                                       
819                                         }
820                                 }else{  m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
821                         }
822                         m->gobble(inOligos);
823                 }
824                 
825                 inOligos.close();
826                 
827                 //add in potential combos
828                 if(allFiles){
829                         comboStarts = outFASTAVec.size()-1;
830                         for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
831                                 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
832                                         if (groupVector[itPrime->second] != "") { //there is a group for this primer
833                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
834                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
835                                                 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
836                                                 
837                                                 if(qFileName != ""){
838                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
839                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
840                                                 }
841                                         }
842                                 }
843                         }
844                 }
845                 
846                 numFPrimers = primers.size();
847                 numRPrimers = revPrimer.size();
848                 
849         }
850         catch(exception& e) {
851                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
852                 exit(1);
853         }
854 }
855 //***************************************************************************************************************
856
857 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
858         try {
859                 
860                 string rawSequence = seq.getUnaligned();
861                 int success = bdiffs + 1;       //guilty until proven innocent
862                 
863                 //can you find the barcode
864                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
865                         string oligo = it->first;
866                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
867                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
868                                 break;  
869                         }
870                         
871                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
872                                 group = it->second;
873                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
874                                 
875                                 if(qual.getName() != ""){
876                                         qual.trimQScores(oligo.length(), -1);
877                                 }
878                                 
879                                 success = 0;
880                                 break;
881                         }
882                 }
883                 
884                 //if you found the barcode or if you don't want to allow for diffs
885 //              cout << success;
886                 if ((bdiffs == 0) || (success == 0)) { return success;  }
887                 
888                 else { //try aligning and see if you can find it
889 //                      cout << endl;
890
891                         int maxLength = 0;
892
893                         Alignment* alignment;
894                         if (barcodes.size() > 0) {
895                                 map<string,int>::iterator it=barcodes.begin();
896
897                                 for(it;it!=barcodes.end();it++){
898                                         if(it->first.length() > maxLength){
899                                                 maxLength = it->first.length();
900                                         }
901                                 }
902                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
903
904                         }else{ alignment = NULL; } 
905                         
906                         //can you find the barcode
907                         int minDiff = 1e6;
908                         int minCount = 1;
909                         int minGroup = -1;
910                         int minPos = 0;
911                         
912                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
913                                 string oligo = it->first;
914 //                              int length = oligo.length();
915                                 
916                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
917                                         success = bdiffs + 10;
918                                         break;
919                                 }
920                                 
921                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
922                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
923                                 oligo = alignment->getSeqAAln();
924                                 string temp = alignment->getSeqBAln();
925                 
926                                 int alnLength = oligo.length();
927                                 
928                                 for(int i=oligo.length()-1;i>=0;i--){
929                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
930                                 }
931                                 oligo = oligo.substr(0,alnLength);
932                                 temp = temp.substr(0,alnLength);
933                                 
934                                 int newStart=0;
935                                 int numDiff = countDiffs(oligo, temp);
936                                 
937 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
938                                 
939                                 if(numDiff < minDiff){
940                                         minDiff = numDiff;
941                                         minCount = 1;
942                                         minGroup = it->second;
943                                         minPos = 0;
944                                         for(int i=0;i<alnLength;i++){
945                                                 if(temp[i] != '-'){
946                                                         minPos++;
947                                                 }
948                                         }
949                                 }
950                                 else if(numDiff == minDiff){
951                                         minCount++;
952                                 }
953
954                         }
955
956                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
957                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
958                         else{                                                                                                   //use the best match
959                                 group = minGroup;
960                                 seq.setUnaligned(rawSequence.substr(minPos));
961                                 
962                                 if(qual.getName() != ""){
963                                         qual.trimQScores(minPos, -1);
964                                 }
965                                 success = minDiff;
966                         }
967                         
968                         if (alignment != NULL) {  delete alignment;  }
969                         
970                 }
971 //              cout << success << endl;
972                 
973                 return success;
974                 
975         }
976         catch(exception& e) {
977                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
978                 exit(1);
979         }
980
981 }
982
983 //***************************************************************************************************************
984
985 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
986         try {
987                 string rawSequence = seq.getUnaligned();
988                 int success = pdiffs + 1;       //guilty until proven innocent
989                 
990                 //can you find the primer
991                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
992                         string oligo = it->first;
993                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
994                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
995                                 break;  
996                         }
997                         
998                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
999                                 group = it->second;
1000                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1001                                 if(qual.getName() != ""){
1002                                         qual.trimQScores(oligo.length(), -1);
1003                                         
1004                                 }
1005                                 success = 0;
1006                                 break;
1007                         }
1008                 }
1009
1010                 //if you found the barcode or if you don't want to allow for diffs
1011 //              cout << success;
1012                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1013                 
1014                 else { //try aligning and see if you can find it
1015 //                      cout << endl;
1016
1017                         int maxLength = 0;
1018
1019                         Alignment* alignment;
1020                         if (primers.size() > 0) {
1021                                 map<string,int>::iterator it=primers.begin();
1022
1023                                 for(it;it!=primers.end();it++){
1024                                         if(it->first.length() > maxLength){
1025                                                 maxLength = it->first.length();
1026                                         }
1027                                 }
1028                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1029
1030                         }else{ alignment = NULL; } 
1031                         
1032                         //can you find the barcode
1033                         int minDiff = 1e6;
1034                         int minCount = 1;
1035                         int minGroup = -1;
1036                         int minPos = 0;
1037                         
1038                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1039                                 string oligo = it->first;
1040 //                              int length = oligo.length();
1041                                 
1042                                 if(rawSequence.length() < maxLength){   
1043                                         success = pdiffs + 100;
1044                                         break;
1045                                 }
1046                                 
1047                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1048                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1049                                 oligo = alignment->getSeqAAln();
1050                                 string temp = alignment->getSeqBAln();
1051                 
1052                                 int alnLength = oligo.length();
1053                                 
1054                                 for(int i=oligo.length()-1;i>=0;i--){
1055                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1056                                 }
1057                                 oligo = oligo.substr(0,alnLength);
1058                                 temp = temp.substr(0,alnLength);
1059                                 
1060                                 int newStart=0;
1061                                 int numDiff = countDiffs(oligo, temp);
1062                                 
1063 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1064                                 
1065                                 if(numDiff < minDiff){
1066                                         minDiff = numDiff;
1067                                         minCount = 1;
1068                                         minGroup = it->second;
1069                                         minPos = 0;
1070                                         for(int i=0;i<alnLength;i++){
1071                                                 if(temp[i] != '-'){
1072                                                         minPos++;
1073                                                 }
1074                                         }
1075                                 }
1076                                 else if(numDiff == minDiff){
1077                                         minCount++;
1078                                 }
1079
1080                         }
1081
1082                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1083                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1084                         else{                                                                                                   //use the best match
1085                                 group = minGroup;
1086                                 seq.setUnaligned(rawSequence.substr(minPos));
1087                                 if(qual.getName() != ""){
1088                                         qual.trimQScores(minPos, -1);
1089                                 }
1090                                 success = minDiff;
1091                         }
1092                         
1093                         if (alignment != NULL) {  delete alignment;  }
1094                         
1095                 }
1096                 
1097                 return success;
1098
1099         }
1100         catch(exception& e) {
1101                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1102                 exit(1);
1103         }
1104 }
1105
1106 //***************************************************************************************************************
1107
1108 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1109         try {
1110                 string rawSequence = seq.getUnaligned();
1111                 bool success = 0;       //guilty until proven innocent
1112                 
1113                 for(int i=0;i<numRPrimers;i++){
1114                         string oligo = revPrimer[i];
1115                         
1116                         if(rawSequence.length() < oligo.length()){
1117                                 success = 0;
1118                                 break;
1119                         }
1120                         
1121                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1122                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1123                                 if(qual.getName() != ""){
1124                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1125                                 }
1126                                 success = 1;
1127                                 break;
1128                         }
1129                 }       
1130                 return success;
1131                 
1132         }
1133         catch(exception& e) {
1134                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1135                 exit(1);
1136         }
1137 }
1138
1139 //***************************************************************************************************************
1140
1141 bool TrimSeqsCommand::cullLength(Sequence& seq){
1142         try {
1143         
1144                 int length = seq.getNumBases();
1145                 bool success = 0;       //guilty until proven innocent
1146                 
1147                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1148                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1149                 else                                                                                            {       success = 0;    }
1150                 
1151                 return success;
1152         
1153         }
1154         catch(exception& e) {
1155                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1156                 exit(1);
1157         }
1158         
1159 }
1160
1161 //***************************************************************************************************************
1162
1163 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1164         try {
1165                 int longHomoP = seq.getLongHomoPolymer();
1166                 bool success = 0;       //guilty until proven innocent
1167                 
1168                 if(longHomoP <= maxHomoP){      success = 1;    }
1169                 else                                    {       success = 0;    }
1170                 
1171                 return success;
1172         }
1173         catch(exception& e) {
1174                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1175                 exit(1);
1176         }
1177         
1178 }
1179
1180 //***************************************************************************************************************
1181
1182 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1183         try {
1184                 int numNs = seq.getAmbigBases();
1185                 bool success = 0;       //guilty until proven innocent
1186                 
1187                 if(numNs <= maxAmbig)   {       success = 1;    }
1188                 else                                    {       success = 0;    }
1189                 
1190                 return success;
1191         }
1192         catch(exception& e) {
1193                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1194                 exit(1);
1195         }
1196         
1197 }
1198
1199 //***************************************************************************************************************
1200
1201 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1202         try {
1203                 bool success = 1;
1204                 int length = oligo.length();
1205                 
1206                 for(int i=0;i<length;i++){
1207                         
1208                         if(oligo[i] != seq[i]){
1209                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1210                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1211                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1212                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1213                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1214                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1215                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1216                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1217                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1218                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1219                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1220                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1221                                 
1222                                 if(success == 0)        {       break;   }
1223                         }
1224                         else{
1225                                 success = 1;
1226                         }
1227                 }
1228                 
1229                 return success;
1230         }
1231         catch(exception& e) {
1232                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1233                 exit(1);
1234         }
1235
1236 }
1237 //***************************************************************************************************************
1238
1239 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1240         try {
1241
1242                 int length = oligo.length();
1243                 int countDiffs = 0;
1244                 
1245                 for(int i=0;i<length;i++){
1246                                                                 
1247                         if(oligo[i] != seq[i]){
1248                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1249                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1250                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1251                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1252                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1253                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1254                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1255                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1256                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1257                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1258                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1259                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1260                         }
1261                         
1262                 }
1263                 
1264                 return countDiffs;
1265         }
1266         catch(exception& e) {
1267                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1268                 exit(1);
1269         }
1270
1271 }
1272 //***************************************************************************************************************
1273
1274 //bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){
1275 //      try {
1276 //              
1277 //              string rawSequence = seq.getUnaligned();
1278 //              int seqLength = seq.getNumBases();
1279 //              bool success = 0;       //guilty until proven innocent
1280 //              string name;
1281 //              
1282 //              qFile >> name;
1283 //              if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1284 //              
1285 //              while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1286 //              
1287 //              int score;
1288 //              int end = seqLength;
1289 //              
1290 //              for(int i=0;i<seqLength;i++){
1291 //                      qFile >> score;
1292 //                      
1293 //                      if(score < qThreshold){
1294 //                              end = i;
1295 //                              break;
1296 //                      }
1297 //              }
1298 //              for(int i=end+1;i<seqLength;i++){
1299 //                      qFile >> score;
1300 //              }
1301 //              
1302 //              seq.setUnaligned(rawSequence.substr(0,end));
1303 //              
1304 //              return 1;
1305 //      }
1306 //      catch(exception& e) {
1307 //              m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold");
1308 //              exit(1);
1309 //      }
1310 //}
1311
1312 //***************************************************************************************************************
1313
1314 //bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){
1315 //      try {
1316 //              string rawSequence = seq.getUnaligned();
1317 //              int seqLength = seq.getNumBases();
1318 //              bool success = 0;       //guilty until proven innocent
1319 //              string name;
1320 //              
1321 //              qFile >> name;
1322 //              if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1323 //              
1324 //              while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1325 //              
1326 //              float score;    
1327 //              float average = 0;
1328 //              
1329 //              for(int i=0;i<seqLength;i++){
1330 //                      qFile >> score;
1331 //                      average += score;
1332 //              }
1333 //              average /= seqLength;
1334 //
1335 //              if(average >= qAverage) {       success = 1;    }
1336 //              else                                    {       success = 0;    }
1337 //              
1338 //              return success;
1339 //      }
1340 //      catch(exception& e) {
1341 //              m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
1342 //              exit(1);
1343 //      }
1344 //}
1345
1346 //***************************************************************************************************************