]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
a3abd597f0c3bdb9319849e675f722979d0c053f
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //***************************************************************************************************************
14
15 TrimSeqsCommand::TrimSeqsCommand(string option)  {
16         try {
17                 
18                 abort = false;
19                 comboStarts = 0;
20                 
21                 //allow user to run help
22                 if(option == "help") { help(); abort = true; }
23                 
24                 else {
25                         //valid paramters for this command
26                         string AlignArray[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
27                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
28                         
29                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
30                         
31                         OptionParser parser(option);
32                         map<string,string> parameters = parser.getParameters();
33                         
34                         ValidParameters validParameter;
35                         map<string,string>::iterator it;
36                         
37                         //check to make sure all parameters are valid for command
38                         for (it = parameters.begin(); it != parameters.end(); it++) { 
39                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
40                         }
41                         
42                         //if the user changes the input directory command factory will send this info to us in the output parameter 
43                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
44                         if (inputDir == "not found"){   inputDir = "";          }
45                         else {
46                                 string path;
47                                 it = parameters.find("fasta");
48                                 //user has given a template file
49                                 if(it != parameters.end()){ 
50                                         path = hasPath(it->second);
51                                         //if the user has not given a path then, add inputdir. else leave path alone.
52                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
53                                 }
54                                 
55                                 it = parameters.find("oligos");
56                                 //user has given a template file
57                                 if(it != parameters.end()){ 
58                                         path = hasPath(it->second);
59                                         //if the user has not given a path then, add inputdir. else leave path alone.
60                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
61                                 }
62                                 
63                                 it = parameters.find("qfile");
64                                 //user has given a template file
65                                 if(it != parameters.end()){ 
66                                         path = hasPath(it->second);
67                                         //if the user has not given a path then, add inputdir. else leave path alone.
68                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
69                                 }
70                         }
71
72                         
73                         //check for required parameters
74                         fastaFile = validParameter.validFile(parameters, "fasta", true);
75                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the screen.seqs command."); m->mothurOutEndLine(); abort = true; }
76                         else if (fastaFile == "not open") { abort = true; }     
77                         
78                         //if the user changes the output directory command factory will send this info to us in the output parameter 
79                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
80                                 outputDir = ""; 
81                                 outputDir += hasPath(fastaFile); //if user entered a file with a path then preserve it  
82                         }
83                 
84                         
85                         //check for optional parameter and set defaults
86                         // ...at some point should added some additional type checking...
87                         string temp;
88                         temp = validParameter.validFile(parameters, "flip", false);
89                         if (temp == "not found"){       flip = 0;       }
90                         else if(isTrue(temp))   {       flip = 1;       }
91                 
92                         temp = validParameter.validFile(parameters, "oligos", true);
93                         if (temp == "not found"){       oligoFile = "";         }
94                         else if(temp == "not open"){    abort = true;   } 
95                         else                                    {       oligoFile = temp;               }
96                         
97                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
98                         convert(temp, maxAmbig);  
99
100                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
101                         convert(temp, maxHomoP);  
102
103                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
104                         convert(temp, minLength); 
105                         
106                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
107                         convert(temp, maxLength);
108                         
109                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
110                         convert(temp, bdiffs);
111                         
112                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
113                         convert(temp, pdiffs);
114                         
115                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
116                         convert(temp, tdiffs);
117                         
118                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
119                         
120                         temp = validParameter.validFile(parameters, "qfile", true);     
121                         if (temp == "not found")        {       qFileName = "";         }
122                         else if(temp == "not open")     {       abort = true;           }
123                         else                                            {       qFileName = temp;       }
124                         
125                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
126                         convert(temp, qThreshold);
127                         
128                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "F"; }
129                         qtrim = isTrue(temp);
130
131                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
132                         convert(temp, qRollAverage);
133
134                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
135                         convert(temp, qWindowAverage);
136
137                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "100"; }
138                         convert(temp, qWindowSize);
139
140                         temp = validParameter.validFile(parameters, "qstepsize", false);                if (temp == "not found") { temp = "10"; }
141                         convert(temp, qWindowStep);
142
143                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
144                         convert(temp, qAverage);
145                         
146                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
147                         allFiles = isTrue(temp);
148                         
149                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
150                         convert(temp, processors); 
151                         
152                         if(allFiles && oligoFile == ""){
153                                 m->mothurOut("You selected allfiles, but didn't enter an oligos file.  Ignoring the allfiles request."); m->mothurOutEndLine();
154                         }
155                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
156                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
157                                 qAverage=0;
158                                 qThreshold=0;
159                         }
160                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
161                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
162                                 abort = true;
163                         }
164                 }
165
166         }
167         catch(exception& e) {
168                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
169                 exit(1);
170         }
171 }
172 //**********************************************************************************************************************
173
174 void TrimSeqsCommand::help(){
175         try {
176                 m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
177                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
178                 m->mothurOut("The fasta parameter is required.\n");
179                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
180                 m->mothurOut("The oligos parameter .... The default is "".\n");
181                 m->mothurOut("The maxambig parameter .... The default is -1.\n");
182                 m->mothurOut("The maxhomop parameter .... The default is 0.\n");
183                 m->mothurOut("The minlength parameter .... The default is 0.\n");
184                 m->mothurOut("The maxlength parameter .... The default is 0.\n");
185                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
186                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
187                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
188                 m->mothurOut("The qfile parameter .....\n");
189                 m->mothurOut("The qthreshold parameter .... The default is 0.\n");
190                 m->mothurOut("The qaverage parameter .... The default is 0.\n");
191                 m->mothurOut("The allfiles parameter .... The default is F.\n");
192                 m->mothurOut("The qtrim parameter .... The default is F.\n");
193                 m->mothurOut("The trim.seqs command should be in the following format: \n");
194                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
195                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
196                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
197                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
198                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
199
200         }
201         catch(exception& e) {
202                 m->errorOut(e, "TrimSeqsCommand", "help");
203                 exit(1);
204         }
205 }
206
207
208 //***************************************************************************************************************
209
210 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
211
212 //***************************************************************************************************************
213
214 int TrimSeqsCommand::execute(){
215         try{
216         
217                 if (abort == true) { return 0; }
218                 
219                 numFPrimers = 0;  //this needs to be initialized
220                 numRPrimers = 0;
221                 
222                 string trimSeqFile = outputDir + getRootName(getSimpleName(fastaFile)) + "trim.fasta";
223                 outputNames.push_back(trimSeqFile);
224                 string scrapSeqFile = outputDir + getRootName(getSimpleName(fastaFile)) + "scrap.fasta";
225                 outputNames.push_back(scrapSeqFile);
226                 string trimQualFile = outputDir + getRootName(getSimpleName(fastaFile)) + "trim.qual";
227                 outputNames.push_back(trimQualFile);
228                 string scrapQualFile = outputDir + getRootName(getSimpleName(fastaFile)) + "scrap.qual";
229                 outputNames.push_back(scrapQualFile);
230                 string groupFile = outputDir + getRootName(getSimpleName(fastaFile)) + "groups";
231                 
232                 vector<string> fastaFileNames;
233                 vector<string> qualFileNames;
234                 if(oligoFile != ""){
235                         outputNames.push_back(groupFile);
236                         getOligos(fastaFileNames, qualFileNames);
237                 }
238                 
239                 if(qFileName != "")     {       setLines(qFileName, qLines);    }
240
241                 
242                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
243                                 if(processors == 1){
244                                         ifstream inFASTA;
245                                         int numSeqs;
246                                         openInputFile(fastaFile, inFASTA);
247                                         getNumSeqs(inFASTA, numSeqs);
248                                         inFASTA.close();
249                                         
250                                         lines.push_back(new linePair(0, numSeqs));
251                                         
252                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], lines[0]);
253                                         
254                                         for (int j = 0; j < fastaFileNames.size(); j++) {
255                                                 rename((fastaFileNames[j] + toString(getpid()) + ".temp").c_str(), fastaFileNames[j].c_str());
256                                         }
257                                         if(qFileName != ""){
258                                                 for (int j = 0; j < qualFileNames.size(); j++) {
259                                                         rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str());
260                                                 }
261                                         }                                               
262
263                                 }else{
264                                         setLines(fastaFile, lines);     
265                                         if(qFileName == "")     {       qLines = lines; }       
266                                                                 
267                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); 
268                                         
269                                         rename((trimSeqFile + toString(processIDS[0]) + ".temp").c_str(), trimSeqFile.c_str());
270                                         rename((scrapSeqFile + toString(processIDS[0]) + ".temp").c_str(), scrapSeqFile.c_str());
271                                         rename((groupFile + toString(processIDS[0]) + ".temp").c_str(), groupFile.c_str());
272                                         
273                                         if(qFileName != ""){
274                                                 rename((trimQualFile + toString(processIDS[0]) + ".temp").c_str(), trimQualFile.c_str());
275                                                 rename((scrapQualFile + toString(processIDS[0]) + ".temp").c_str(), scrapQualFile.c_str());
276                                         }
277                                         
278                                         
279                                         for (int j = 0; j < fastaFileNames.size(); j++) {
280                                                 rename((fastaFileNames[j] + toString(processIDS[0]) + ".temp").c_str(), fastaFileNames[j].c_str());
281                                         }
282                                         if(qFileName != ""){
283                                                 for (int j = 0; j < qualFileNames.size(); j++) {
284                                                         rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str());
285                                                 }
286                                         }                                               
287                                         
288                                         //append files
289                                         for(int i=1;i<processors;i++){
290                                                 appendFiles((trimSeqFile + toString(processIDS[i]) + ".temp"), trimSeqFile);
291                                                 remove((trimSeqFile + toString(processIDS[i]) + ".temp").c_str());
292                                                 appendFiles((scrapSeqFile + toString(processIDS[i]) + ".temp"), scrapSeqFile);
293                                                 remove((scrapSeqFile + toString(processIDS[i]) + ".temp").c_str());
294
295                                                 appendFiles((trimQualFile + toString(processIDS[i]) + ".temp"), trimQualFile);
296                                                 remove((trimQualFile + toString(processIDS[i]) + ".temp").c_str());
297                                                 appendFiles((scrapQualFile + toString(processIDS[i]) + ".temp"), scrapQualFile);
298                                                 remove((scrapQualFile + toString(processIDS[i]) + ".temp").c_str());
299                                                 
300                                                 appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
301                                                 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
302                                                 for (int j = 0; j < fastaFileNames.size(); j++) {
303                                                         appendFiles((fastaFileNames[j] + toString(processIDS[i]) + ".temp"), fastaFileNames[j]);
304                                                         remove((fastaFileNames[j] + toString(processIDS[i]) + ".temp").c_str());
305                                                 }
306                                                 
307                                                 if(qFileName != ""){
308                                                         for (int j = 0; j < qualFileNames.size(); j++) {
309                                                                 appendFiles((qualFileNames[j] + toString(processIDS[i]) + ".temp"), qualFileNames[j]);
310                                                                 remove((qualFileNames[j] + toString(processIDS[i]) + ".temp").c_str());
311                                                         }
312                                                 }                                               
313                                                 
314                                                 
315                                         }
316                                 }
317                                 
318                                 if (m->control_pressed) {  return 0; }
319                 #else
320                                 ifstream inFASTA;
321                                 int numSeqs;
322                                 openInputFile(fastaFile, inFASTA);
323                                 getNumSeqs(inFASTA, numSeqs);
324                                 inFASTA.close();
325                                 
326                                 lines.push_back(new linePair(0, numSeqs));
327                                 
328                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], lines[0]);
329                                 
330                                 for (int j = 0; j < fastaFileNames.size(); j++) {
331                                         rename((fastaFileNames[j] + toString(j) + ".temp").c_str(), fastaFileNames[j].c_str());
332                                 }
333                                 if(qFileName != ""){
334                                         for (int j = 0; j < qualFileNames.size(); j++) {
335                                                 rename((qualFileNames[j] + toString(j) + ".temp").c_str(), qualFileNames[j].c_str());
336                                         }
337                                 }
338                                                                         
339                                 if (m->control_pressed) {  return 0; }
340                 #endif
341                                                 
342                                                                                 
343                 for(int i=0;i<fastaFileNames.size();i++){
344                         if (isBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str());    }
345                         else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); }
346                         else {
347                                 ifstream inFASTA;
348                                 string seqName;
349                                 openInputFile(fastaFileNames[i], inFASTA);
350                                 ofstream outGroups;
351                                 string outGroupFilename = outputDir + getRootName(getSimpleName(fastaFileNames[i])) + "groups";
352                                 openOutputFile(outGroupFilename, outGroups);
353                                 outputNames.push_back(outGroupFilename);
354                                 
355                                 string thisGroup = "";
356                                 if (i > comboStarts) {
357                                         map<string, int>::iterator itCombo;
358                                         for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
359                                                 if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
360                                         }
361                                 }else{ thisGroup = groupVector[i]; }
362                                 
363                                 while(!inFASTA.eof()){
364                                         if(inFASTA.get() == '>'){
365                                                 inFASTA >> seqName;
366                                                 outGroups << seqName << '\t' << thisGroup << endl;
367                                         }
368                                         while (!inFASTA.eof())  {       char c = inFASTA.get(); if (c == 10 || c == 13){        break;  }       }
369                                 }
370                                 outGroups.close();
371                                 inFASTA.close();
372                         }
373                 }
374                 
375                 if(qFileName != ""){
376                         for(int i=0;i<qualFileNames.size();i++){
377                                 if (isBlank(qualFileNames[i])) { remove(qualFileNames[i].c_str());      }
378                                 else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); }
379                                 else {
380                                         ifstream inQual;
381                                         string seqName;
382                                         openInputFile(qualFileNames[i], inQual);
383 //                                      ofstream outGroups;
384 //                                      
385 //                                      string thisGroup = "";
386 //                                      if (i > comboStarts) {
387 //                                              map<string, int>::iterator itCombo;
388 //                                              for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
389 //                                                      if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
390 //                                              }
391 //                                      }
392 //                                      else{ thisGroup = groupVector[i]; }
393                                         
394                                         inQual.close();
395                                 }
396                         }
397                 }
398                 
399                 
400                 if (m->control_pressed) { 
401                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
402                         return 0;
403                 }
404
405                 m->mothurOutEndLine();
406                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
407                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
408                 m->mothurOutEndLine();
409                 
410                 return 0;       
411                         
412         }
413         catch(exception& e) {
414                 m->errorOut(e, "TrimSeqsCommand", "execute");
415                 exit(1);
416         }
417 }
418                 
419 /**************************************************************************************/
420
421 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames, linePair* line, linePair* qline) {      
422                 
423         try {
424                 
425                 ofstream outFASTA;
426                 int able = openOutputFile(trimFile, outFASTA);
427                 
428                 ofstream scrapFASTA;
429                 openOutputFile(scrapFile, scrapFASTA);
430                 
431                 ofstream outQual;
432                 ofstream scrapQual;
433                 if(qFileName != ""){
434                         openOutputFile(trimQFile, outQual);
435                         openOutputFile(scrapQFile, scrapQual);
436                 }
437                 
438                 ofstream outGroups;
439                 vector<ofstream*> fastaFileNames;
440                 vector<ofstream*> qualFileNames;
441                 
442                 
443                 if (oligoFile != "") {          
444                         openOutputFile(groupFile, outGroups);   
445                         for (int i = 0; i < fastaNames.size(); i++) {
446                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
447                                 fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
448                                 if(qFileName != ""){
449                                         qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
450                                 }
451                         #else
452                                 fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate));                      
453                                 if(qFileName != ""){
454                                         qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate));                        
455                                 }
456                         #endif
457                         }
458                 }
459                 
460                 ifstream inFASTA;
461                 openInputFile(filename, inFASTA);
462                 
463                 ifstream qFile;
464                 if(qFileName != "")     {       openInputFile(qFileName, qFile);        }
465                 
466                 qFile.seekg(qline->start);
467                 inFASTA.seekg(line->start);
468                 
469                 for(int i=0;i<line->num;i++){
470                                 
471                         if (m->control_pressed) { 
472                                 inFASTA.close(); outFASTA.close(); scrapFASTA.close();
473                                 if (oligoFile != "") {   outGroups.close();   }
474                                 
475                                 for(int i=0;i<fastaFileNames.size();i++){  fastaFileNames[i]->close(); delete fastaFileNames[i];  }     
476
477                                 if(qFileName != ""){
478                                         qFile.close();
479                                         for(int i=0;i<qualFileNames.size();i++){  qualFileNames[i]->close(); delete qualFileNames[i];  }        
480                                 }
481                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
482
483                                 return 0;
484                         }
485                         
486                         int success = 1;
487                         
488                         Sequence currSeq(inFASTA);
489                         QualityScores currQual;
490                         if(qFileName != ""){
491                                 currQual = QualityScores(qFile, currSeq.getNumBases());
492                         }
493                         
494                         string origSeq = currSeq.getUnaligned();
495                         if (origSeq != "") {
496                                 int groupBar, groupPrime;
497                                 string trashCode = "";
498                                 int currentSeqsDiffs = 0;
499
500                                 if(qFileName != ""){
501                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
502                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
503                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
504                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
505
506                                         if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
507                                                 success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
508                                         }
509
510                                         if(!success)                            {       trashCode += 'q';       }
511                                 }
512                         
513                                 if(barcodes.size() != 0){
514                                         success = stripBarcode(currSeq, currQual, groupBar);
515                                         if(success > bdiffs)            {       trashCode += 'b';       }
516                                         else{ currentSeqsDiffs += success;  }
517                                 }
518
519                                 if(numFPrimers != 0){
520                                         success = stripForward(currSeq, currQual, groupPrime);
521                                         if(success > pdiffs)            {       trashCode += 'f';       }
522                                         else{ currentSeqsDiffs += success;  }
523                                 }
524                                 
525                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
526
527                                 if(numRPrimers != 0){
528                                         success = stripReverse(currSeq, currQual);
529                                         if(!success)                            {       trashCode += 'r';       }
530                                 }
531                 
532                                 if(minLength > 0 || maxLength > 0){
533                                         success = cullLength(currSeq);
534                                         if(!success)                            {       trashCode += 'l';       }
535                                 }
536                                 if(maxHomoP > 0){
537                                         success = cullHomoP(currSeq);
538                                         if(!success)                            {       trashCode += 'h';       }
539                                 }
540                                 if(maxAmbig != -1){
541                                         success = cullAmbigs(currSeq);
542                                         if(!success)                            {       trashCode += 'n';       }
543                                 }
544                                 
545                                 if(flip){       currSeq.reverseComplement();            currQual.flipQScores(); }               // should go last                       
546                                 
547                                 if(trashCode.length() == 0){
548                                         currSeq.setAligned(currSeq.getUnaligned());
549                                         currSeq.printSequence(outFASTA);
550                                         currQual.printQScores(outQual);
551                                         
552                                         if(barcodes.size() != 0){
553                                                 string thisGroup = groupVector[groupBar];
554                                                 int indexToFastaFile = groupBar;
555                                                 if (primers.size() != 0){
556                                                         //does this primer have a group
557                                                         if (groupVector[groupPrime] != "") {  
558                                                                 thisGroup += "." + groupVector[groupPrime]; 
559                                                                 indexToFastaFile = combos[thisGroup];
560                                                         }
561                                                 }
562                                                 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
563                                                 if(allFiles){
564                                                         currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
565                                                         
566                                                         if(qFileName != ""){
567                                                                 currQual.printQScores(*qualFileNames[indexToFastaFile]);                                                        
568                                                         }
569                                                 }
570                                         }
571                                 }
572                                 else{
573                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
574                                         currSeq.setUnaligned(origSeq);
575                                         currSeq.setAligned(origSeq);
576                                         currSeq.printSequence(scrapFASTA);
577                                         currQual.printQScores(scrapQual);
578                                 }
579                         }
580                         gobble(inFASTA);
581                         gobble(qFile);
582                 }
583                 
584                 inFASTA.close();
585                 outFASTA.close();
586                 scrapFASTA.close();
587                 if (oligoFile != "") {   outGroups.close();   }
588                 if(qFileName != "")     {       qFile.close();  scrapQual.close(); outQual.close();     }
589                 
590                 for(int i=0;i<fastaFileNames.size();i++){
591                         fastaFileNames[i]->close();
592                         delete fastaFileNames[i];
593                 }               
594                 
595                 if(qFileName != ""){
596                         for(int i=0;i<qualFileNames.size();i++){
597                                 qualFileNames[i]->close();
598                                 delete qualFileNames[i];
599                         }               
600                 }                       
601                 
602                 return 0;
603         }
604         catch(exception& e) {
605                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
606                 exit(1);
607         }
608 }
609
610 /**************************************************************************************************/
611
612 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames) {
613         try {
614 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
615                 int process = 0;
616                 int exitCommand = 1;
617                 processIDS.clear();
618                 
619                 //loop through and create all the processes you want
620                 while (process != processors) {
621                         int pid = fork();
622                         
623                         if (pid > 0) {
624                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
625                                 process++;
626                         }else if (pid == 0){
627                                 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
628                                 exit(0);
629                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
630                 }
631                 
632                 //force parent to wait until all the processes are done
633                 for (int i=0;i<processors;i++) { 
634                         int temp = processIDS[i];
635                         wait(&temp);
636                 }
637                 
638                 return exitCommand;
639 #endif          
640         }
641         catch(exception& e) {
642                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
643                 exit(1);
644         }
645 }
646
647 /**************************************************************************************************/
648
649 int TrimSeqsCommand::setLines(string filename, vector<linePair*>& lines) {
650         try {
651                 
652                 lines.clear();
653                 
654                 vector<unsigned long int> positions;
655                 
656                 ifstream inFASTA;
657                 openInputFile(filename, inFASTA);
658                         
659                 string input;
660                 while(!inFASTA.eof()){  
661                         input = getline(inFASTA);
662
663                         if (input.length() != 0) {
664                                 if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);    }
665                         }
666                 }
667                 inFASTA.close();
668                 
669                 int numFastaSeqs = positions.size();
670         
671                 FILE * pFile;
672                 unsigned long int size;
673                 
674                 //get num bytes in file
675                 pFile = fopen (filename.c_str(),"rb");
676                 if (pFile==NULL) perror ("Error opening file");
677                 else{
678                         fseek (pFile, 0, SEEK_END);
679                         size=ftell (pFile);
680                         fclose (pFile);
681                 }
682                 
683                 int numSeqsPerProcessor = numFastaSeqs / processors;
684                 
685                 for (int i = 0; i < processors; i++) {
686
687                         unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
688                         if(i == processors - 1){
689                                 numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
690                         }else{  
691                                 unsigned long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
692                         }
693                         lines.push_back(new linePair(startPos, numSeqsPerProcessor));
694                 }
695                 
696                 return numFastaSeqs;
697         }
698         catch(exception& e) {
699                 m->errorOut(e, "TrimSeqsCommand", "setLines");
700                 exit(1);
701         }
702 }
703 //***************************************************************************************************************
704
705 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){
706         try {
707                 ifstream inOligos;
708                 openInputFile(oligoFile, inOligos);
709                 
710                 ofstream test;
711                 
712                 string type, oligo, group;
713                 int index=0;
714                 //int indexPrimer = 0;
715                 
716                 while(!inOligos.eof()){
717                         inOligos >> type;
718                                         
719                         if(type[0] == '#'){
720                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
721                         }
722                         else{
723                                 //make type case insensitive
724                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
725                                 
726                                 inOligos >> oligo;
727                                 
728                                 for(int i=0;i<oligo.length();i++){
729                                         oligo[i] = toupper(oligo[i]);
730                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
731                                 }
732                                 
733                                 if(type == "FORWARD"){
734                                         group = "";
735                                         
736                                         // get rest of line in case there is a primer name
737                                         while (!inOligos.eof()) {       
738                                                 char c = inOligos.get(); 
739                                                 if (c == 10 || c == 13){        break;  }
740                                                 else if (c == 32 || c == 9){;} //space or tab
741                                                 else {  group += c;  }
742                                         } 
743                                         
744                                         //check for repeat barcodes
745                                         map<string, int>::iterator itPrime = primers.find(oligo);
746                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
747                                         
748                                         primers[oligo]=index; index++;
749                                         groupVector.push_back(group);
750                                         
751                                         if(allFiles){
752                                                 outFASTAVec.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta"));
753                                                 if(qFileName != ""){
754                                                         outQualVec.push_back((outputDir + getRootName(getSimpleName(qFileName)) + group + ".qual"));
755                                                 }
756                                                 if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
757                                                         filesToRemove.insert((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta"));
758                                                         if(qFileName != ""){
759                                                                 filesToRemove.insert((outputDir + getRootName(getSimpleName(qFileName)) + group + ".qual"));
760                                                         }
761                                                 }else {
762                                                         outputNames.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta"));
763                                                         if(qFileName != ""){
764                                                                 outputNames.push_back((outputDir + getRootName(getSimpleName(qFileName)) + group + ".qual"));
765                                                         }                                                       
766                                                 }
767                                         }
768
769                                 }
770                                 else if(type == "REVERSE"){
771                                         Sequence oligoRC("reverse", oligo);
772                                         oligoRC.reverseComplement();
773                                         revPrimer.push_back(oligoRC.getUnaligned());
774                                 }
775                                 else if(type == "BARCODE"){
776                                         inOligos >> group;
777                                         
778                                         //check for repeat barcodes
779                                         map<string, int>::iterator itBar = barcodes.find(oligo);
780                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
781                                         
782                                         barcodes[oligo]=index; index++;
783                                         groupVector.push_back(group);
784                                         
785                                         if(allFiles){
786                                                 outputNames.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta"));
787                                                 outFASTAVec.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + group + ".fasta"));
788                                                 if(qFileName != ""){
789                                                         outQualVec.push_back((outputDir + getRootName(getSimpleName(qFileName)) + group + ".qual"));
790                                                         outputNames.push_back((outputDir + getRootName(getSimpleName(qFileName)) + group + ".qual"));
791                                                 }                                                       
792                                         }
793                                 }else{  m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
794                         }
795                         gobble(inOligos);
796                 }
797                 
798                 inOligos.close();
799                 
800                 //add in potential combos
801                 if(allFiles){
802                         comboStarts = outFASTAVec.size()-1;
803                         for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
804                                 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
805                                         if (groupVector[itPrime->second] != "") { //there is a group for this primer
806                                                 outputNames.push_back((outputDir + getRootName(getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
807                                                 outFASTAVec.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
808                                                 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
809                                                 
810                                                 if(qFileName != ""){
811                                                         outQualVec.push_back((outputDir + getRootName(getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
812                                                         outputNames.push_back((outputDir + getRootName(getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
813                                                 }
814                                         }
815                                 }
816                         }
817                 }
818                 
819                 numFPrimers = primers.size();
820                 numRPrimers = revPrimer.size();
821                 
822         }
823         catch(exception& e) {
824                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
825                 exit(1);
826         }
827 }
828 //***************************************************************************************************************
829
830 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
831         try {
832                 
833                 string rawSequence = seq.getUnaligned();
834                 int success = bdiffs + 1;       //guilty until proven innocent
835                 
836                 //can you find the barcode
837                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
838                         string oligo = it->first;
839                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
840                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
841                                 break;  
842                         }
843                         
844                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
845                                 group = it->second;
846                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
847                                 
848                                 if(qual.getName() != ""){
849                                         qual.trimQScores(oligo.length(), -1);
850                                 }
851                                 
852                                 success = 0;
853                                 break;
854                         }
855                 }
856                 
857                 //if you found the barcode or if you don't want to allow for diffs
858 //              cout << success;
859                 if ((bdiffs == 0) || (success == 0)) { return success;  }
860                 
861                 else { //try aligning and see if you can find it
862 //                      cout << endl;
863
864                         int maxLength = 0;
865
866                         Alignment* alignment;
867                         if (barcodes.size() > 0) {
868                                 map<string,int>::iterator it=barcodes.begin();
869
870                                 for(it;it!=barcodes.end();it++){
871                                         if(it->first.length() > maxLength){
872                                                 maxLength = it->first.length();
873                                         }
874                                 }
875                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
876
877                         }else{ alignment = NULL; } 
878                         
879                         //can you find the barcode
880                         int minDiff = 1e6;
881                         int minCount = 1;
882                         int minGroup = -1;
883                         int minPos = 0;
884                         
885                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
886                                 string oligo = it->first;
887 //                              int length = oligo.length();
888                                 
889                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
890                                         success = bdiffs + 10;
891                                         break;
892                                 }
893                                 
894                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
895                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
896                                 oligo = alignment->getSeqAAln();
897                                 string temp = alignment->getSeqBAln();
898                 
899                                 int alnLength = oligo.length();
900                                 
901                                 for(int i=oligo.length()-1;i>=0;i--){
902                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
903                                 }
904                                 oligo = oligo.substr(0,alnLength);
905                                 temp = temp.substr(0,alnLength);
906                                 
907                                 int newStart=0;
908                                 int numDiff = countDiffs(oligo, temp);
909                                 
910 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
911                                 
912                                 if(numDiff < minDiff){
913                                         minDiff = numDiff;
914                                         minCount = 1;
915                                         minGroup = it->second;
916                                         minPos = 0;
917                                         for(int i=0;i<alnLength;i++){
918                                                 if(temp[i] != '-'){
919                                                         minPos++;
920                                                 }
921                                         }
922                                 }
923                                 else if(numDiff == minDiff){
924                                         minCount++;
925                                 }
926
927                         }
928
929                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
930                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
931                         else{                                                                                                   //use the best match
932                                 group = minGroup;
933                                 seq.setUnaligned(rawSequence.substr(minPos));
934                                 
935                                 if(qual.getName() != ""){
936                                         qual.trimQScores(minPos, -1);
937                                 }
938                                 success = minDiff;
939                         }
940                         
941                         if (alignment != NULL) {  delete alignment;  }
942                         
943                 }
944 //              cout << success << endl;
945                 
946                 return success;
947                 
948         }
949         catch(exception& e) {
950                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
951                 exit(1);
952         }
953
954 }
955
956 //***************************************************************************************************************
957
958 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
959         try {
960                 string rawSequence = seq.getUnaligned();
961                 int success = pdiffs + 1;       //guilty until proven innocent
962                 
963                 //can you find the primer
964                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
965                         string oligo = it->first;
966                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
967                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
968                                 break;  
969                         }
970                         
971                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
972                                 group = it->second;
973                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
974                                 if(qual.getName() != ""){
975                                         qual.trimQScores(oligo.length(), -1);
976                                         
977                                 }
978                                 success = 0;
979                                 break;
980                         }
981                 }
982
983                 //if you found the barcode or if you don't want to allow for diffs
984 //              cout << success;
985                 if ((pdiffs == 0) || (success == 0)) { return success;  }
986                 
987                 else { //try aligning and see if you can find it
988 //                      cout << endl;
989
990                         int maxLength = 0;
991
992                         Alignment* alignment;
993                         if (primers.size() > 0) {
994                                 map<string,int>::iterator it=primers.begin();
995
996                                 for(it;it!=primers.end();it++){
997                                         if(it->first.length() > maxLength){
998                                                 maxLength = it->first.length();
999                                         }
1000                                 }
1001                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1002
1003                         }else{ alignment = NULL; } 
1004                         
1005                         //can you find the barcode
1006                         int minDiff = 1e6;
1007                         int minCount = 1;
1008                         int minGroup = -1;
1009                         int minPos = 0;
1010                         
1011                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1012                                 string oligo = it->first;
1013 //                              int length = oligo.length();
1014                                 
1015                                 if(rawSequence.length() < maxLength){   
1016                                         success = pdiffs + 100;
1017                                         break;
1018                                 }
1019                                 
1020                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1021                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1022                                 oligo = alignment->getSeqAAln();
1023                                 string temp = alignment->getSeqBAln();
1024                 
1025                                 int alnLength = oligo.length();
1026                                 
1027                                 for(int i=oligo.length()-1;i>=0;i--){
1028                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1029                                 }
1030                                 oligo = oligo.substr(0,alnLength);
1031                                 temp = temp.substr(0,alnLength);
1032                                 
1033                                 int newStart=0;
1034                                 int numDiff = countDiffs(oligo, temp);
1035                                 
1036 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1037                                 
1038                                 if(numDiff < minDiff){
1039                                         minDiff = numDiff;
1040                                         minCount = 1;
1041                                         minGroup = it->second;
1042                                         minPos = 0;
1043                                         for(int i=0;i<alnLength;i++){
1044                                                 if(temp[i] != '-'){
1045                                                         minPos++;
1046                                                 }
1047                                         }
1048                                 }
1049                                 else if(numDiff == minDiff){
1050                                         minCount++;
1051                                 }
1052
1053                         }
1054
1055                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1056                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1057                         else{                                                                                                   //use the best match
1058                                 group = minGroup;
1059                                 seq.setUnaligned(rawSequence.substr(minPos));
1060                                 if(qual.getName() != ""){
1061                                         qual.trimQScores(minPos, -1);
1062                                 }
1063                                 success = minDiff;
1064                         }
1065                         
1066                         if (alignment != NULL) {  delete alignment;  }
1067                         
1068                 }
1069                 
1070                 return success;
1071
1072         }
1073         catch(exception& e) {
1074                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1075                 exit(1);
1076         }
1077 }
1078
1079 //***************************************************************************************************************
1080
1081 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1082         try {
1083                 string rawSequence = seq.getUnaligned();
1084                 bool success = 0;       //guilty until proven innocent
1085                 
1086                 for(int i=0;i<numRPrimers;i++){
1087                         string oligo = revPrimer[i];
1088                         
1089                         if(rawSequence.length() < oligo.length()){
1090                                 success = 0;
1091                                 break;
1092                         }
1093                         
1094                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1095                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1096                                 if(qual.getName() != ""){
1097                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1098                                 }
1099                                 success = 1;
1100                                 break;
1101                         }
1102                 }       
1103                 return success;
1104                 
1105         }
1106         catch(exception& e) {
1107                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1108                 exit(1);
1109         }
1110 }
1111
1112 //***************************************************************************************************************
1113
1114 bool TrimSeqsCommand::cullLength(Sequence& seq){
1115         try {
1116         
1117                 int length = seq.getNumBases();
1118                 bool success = 0;       //guilty until proven innocent
1119                 
1120                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1121                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1122                 else                                                                                            {       success = 0;    }
1123                 
1124                 return success;
1125         
1126         }
1127         catch(exception& e) {
1128                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1129                 exit(1);
1130         }
1131         
1132 }
1133
1134 //***************************************************************************************************************
1135
1136 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1137         try {
1138                 int longHomoP = seq.getLongHomoPolymer();
1139                 bool success = 0;       //guilty until proven innocent
1140                 
1141                 if(longHomoP <= maxHomoP){      success = 1;    }
1142                 else                                    {       success = 0;    }
1143                 
1144                 return success;
1145         }
1146         catch(exception& e) {
1147                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1148                 exit(1);
1149         }
1150         
1151 }
1152
1153 //***************************************************************************************************************
1154
1155 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1156         try {
1157                 int numNs = seq.getAmbigBases();
1158                 bool success = 0;       //guilty until proven innocent
1159                 
1160                 if(numNs <= maxAmbig)   {       success = 1;    }
1161                 else                                    {       success = 0;    }
1162                 
1163                 return success;
1164         }
1165         catch(exception& e) {
1166                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1167                 exit(1);
1168         }
1169         
1170 }
1171
1172 //***************************************************************************************************************
1173
1174 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1175         try {
1176                 bool success = 1;
1177                 int length = oligo.length();
1178                 
1179                 for(int i=0;i<length;i++){
1180                         
1181                         if(oligo[i] != seq[i]){
1182                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1183                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1184                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1185                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1186                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1187                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1188                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1189                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1190                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1191                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1192                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1193                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1194                                 
1195                                 if(success == 0)        {       break;   }
1196                         }
1197                         else{
1198                                 success = 1;
1199                         }
1200                 }
1201                 
1202                 return success;
1203         }
1204         catch(exception& e) {
1205                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1206                 exit(1);
1207         }
1208
1209 }
1210 //***************************************************************************************************************
1211
1212 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1213         try {
1214
1215                 int length = oligo.length();
1216                 int countDiffs = 0;
1217                 
1218                 for(int i=0;i<length;i++){
1219                                                                 
1220                         if(oligo[i] != seq[i]){
1221                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1222                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1223                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1224                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1225                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1226                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1227                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1228                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1229                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1230                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1231                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1232                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }                       
1233                                 
1234                         }
1235                 }
1236                 
1237                 return countDiffs;
1238         }
1239         catch(exception& e) {
1240                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1241                 exit(1);
1242         }
1243
1244 }
1245 //***************************************************************************************************************
1246
1247 //bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){
1248 //      try {
1249 //              
1250 //              string rawSequence = seq.getUnaligned();
1251 //              int seqLength = seq.getNumBases();
1252 //              bool success = 0;       //guilty until proven innocent
1253 //              string name;
1254 //              
1255 //              qFile >> name;
1256 //              if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1257 //              
1258 //              while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1259 //              
1260 //              int score;
1261 //              int end = seqLength;
1262 //              
1263 //              for(int i=0;i<seqLength;i++){
1264 //                      qFile >> score;
1265 //                      
1266 //                      if(score < qThreshold){
1267 //                              end = i;
1268 //                              break;
1269 //                      }
1270 //              }
1271 //              for(int i=end+1;i<seqLength;i++){
1272 //                      qFile >> score;
1273 //              }
1274 //              
1275 //              seq.setUnaligned(rawSequence.substr(0,end));
1276 //              
1277 //              return 1;
1278 //      }
1279 //      catch(exception& e) {
1280 //              m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold");
1281 //              exit(1);
1282 //      }
1283 //}
1284
1285 //***************************************************************************************************************
1286
1287 //bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){
1288 //      try {
1289 //              string rawSequence = seq.getUnaligned();
1290 //              int seqLength = seq.getNumBases();
1291 //              bool success = 0;       //guilty until proven innocent
1292 //              string name;
1293 //              
1294 //              qFile >> name;
1295 //              if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1296 //              
1297 //              while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1298 //              
1299 //              float score;    
1300 //              float average = 0;
1301 //              
1302 //              for(int i=0;i<seqLength;i++){
1303 //                      qFile >> score;
1304 //                      average += score;
1305 //              }
1306 //              average /= seqLength;
1307 //
1308 //              if(average >= qAverage) {       success = 1;    }
1309 //              else                                    {       success = 0;    }
1310 //              
1311 //              return success;
1312 //      }
1313 //      catch(exception& e) {
1314 //              m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
1315 //              exit(1);
1316 //      }
1317 //}
1318
1319 //***************************************************************************************************************