]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
1975f9ad26415f0b30e8dc0a15cec072b1eb0122
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //***************************************************************************************************************
14
15 TrimSeqsCommand::TrimSeqsCommand(string option)  {
16         try {
17                 
18                 abort = false;
19                 comboStarts = 0;
20                 
21                 //allow user to run help
22                 if(option == "help") { help(); abort = true; }
23                 
24                 else {
25                         //valid paramters for this command
26                         string AlignArray[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
27                                                                         "qthreshold", "qaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
28                         
29                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
30                         
31                         OptionParser parser(option);
32                         map<string,string> parameters = parser.getParameters();
33                         
34                         ValidParameters validParameter;
35                         map<string,string>::iterator it;
36                         
37                         //check to make sure all parameters are valid for command
38                         for (it = parameters.begin(); it != parameters.end(); it++) { 
39                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
40                         }
41                         
42                         //if the user changes the input directory command factory will send this info to us in the output parameter 
43                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
44                         if (inputDir == "not found"){   inputDir = "";          }
45                         else {
46                                 string path;
47                                 it = parameters.find("fasta");
48                                 //user has given a template file
49                                 if(it != parameters.end()){ 
50                                         path = hasPath(it->second);
51                                         //if the user has not given a path then, add inputdir. else leave path alone.
52                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
53                                 }
54                                 
55                                 it = parameters.find("oligos");
56                                 //user has given a template file
57                                 if(it != parameters.end()){ 
58                                         path = hasPath(it->second);
59                                         //if the user has not given a path then, add inputdir. else leave path alone.
60                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
61                                 }
62                                 
63                                 it = parameters.find("qfile");
64                                 //user has given a template file
65                                 if(it != parameters.end()){ 
66                                         path = hasPath(it->second);
67                                         //if the user has not given a path then, add inputdir. else leave path alone.
68                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
69                                 }
70                         }
71
72                         
73                         //check for required parameters
74                         fastaFile = validParameter.validFile(parameters, "fasta", true);
75                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the screen.seqs command."); m->mothurOutEndLine(); abort = true; }
76                         else if (fastaFile == "not open") { abort = true; }     
77                         
78                         //if the user changes the output directory command factory will send this info to us in the output parameter 
79                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
80                                 outputDir = ""; 
81                                 outputDir += hasPath(fastaFile); //if user entered a file with a path then preserve it  
82                         }
83                 
84                         //check for optional parameter and set defaults
85                         // ...at some point should added some additional type checking...
86                         string temp;
87                         temp = validParameter.validFile(parameters, "flip", false);
88                         if (temp == "not found"){       flip = 0;       }
89                         else if(isTrue(temp))   {       flip = 1;       }
90                 
91                         temp = validParameter.validFile(parameters, "oligos", true);
92                         if (temp == "not found"){       oligoFile = "";         }
93                         else if(temp == "not open"){    abort = true;   } 
94                         else                                    {       oligoFile = temp;               }
95                         
96                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
97                         convert(temp, maxAmbig);  
98
99                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
100                         convert(temp, maxHomoP);  
101
102                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
103                         convert(temp, minLength); 
104                         
105                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
106                         convert(temp, maxLength);
107                         
108                         
109                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
110                         convert(temp, bdiffs);
111                         
112                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
113                         convert(temp, pdiffs);
114                         
115                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
116                         convert(temp, tdiffs);
117                         
118                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
119                         
120                         temp = validParameter.validFile(parameters, "qfile", true);     
121                         if (temp == "not found")        {       qFileName = "";         }
122                         else if(temp == "not open")     {       abort = true;           }
123                         else                                            {       qFileName = temp;       }
124                         
125                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
126                         convert(temp, qThreshold);
127                         
128                         temp = validParameter.validFile(parameters, "qtrim", false);    if (temp == "not found") { temp = "F"; }
129                         qtrim = isTrue(temp);
130
131                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
132                         convert(temp, qAverage);
133                         
134                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
135                         allFiles = isTrue(temp);
136                         
137                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
138                         convert(temp, processors); 
139                         
140                         if(allFiles && oligoFile == ""){
141                                 m->mothurOut("You selected allfiles, but didn't enter an oligos file.  Ignoring the allfiles request."); m->mothurOutEndLine();
142                         }
143                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
144                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
145                                 qAverage=0;
146                                 qThreshold=0;
147                         }
148                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
149                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
150                                 abort = true;
151                         }
152                 }
153
154         }
155         catch(exception& e) {
156                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
157                 exit(1);
158         }
159 }
160 //**********************************************************************************************************************
161
162 void TrimSeqsCommand::help(){
163         try {
164                 m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
165                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
166                 m->mothurOut("The fasta parameter is required.\n");
167                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
168                 m->mothurOut("The oligos parameter .... The default is "".\n");
169                 m->mothurOut("The maxambig parameter .... The default is -1.\n");
170                 m->mothurOut("The maxhomop parameter .... The default is 0.\n");
171                 m->mothurOut("The minlength parameter .... The default is 0.\n");
172                 m->mothurOut("The maxlength parameter .... The default is 0.\n");
173                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
174                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
175                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
176                 m->mothurOut("The qfile parameter .....\n");
177                 m->mothurOut("The qthreshold parameter .... The default is 0.\n");
178                 m->mothurOut("The qaverage parameter .... The default is 0.\n");
179                 m->mothurOut("The allfiles parameter .... The default is F.\n");
180                 m->mothurOut("The qtrim parameter .... The default is F.\n");
181                 m->mothurOut("The trim.seqs command should be in the following format: \n");
182                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
183                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
184                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
185                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
186                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
187
188         }
189         catch(exception& e) {
190                 m->errorOut(e, "TrimSeqsCommand", "help");
191                 exit(1);
192         }
193 }
194
195
196 //***************************************************************************************************************
197
198 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
199
200 //***************************************************************************************************************
201
202 int TrimSeqsCommand::execute(){
203         try{
204         
205                 if (abort == true) { return 0; }
206                 
207                 numFPrimers = 0;  //this needs to be initialized
208                 numRPrimers = 0;
209                 
210                 string trimSeqFile = outputDir + getRootName(getSimpleName(fastaFile)) + "trim.fasta";
211                 outputNames.push_back(trimSeqFile);
212                 string scrapSeqFile = outputDir + getRootName(getSimpleName(fastaFile)) + "scrap.fasta";
213                 outputNames.push_back(scrapSeqFile);
214                 string groupFile = outputDir + getRootName(getSimpleName(fastaFile)) + "groups";
215                 
216                 vector<string> fastaFileNames;
217                 if(oligoFile != ""){
218                         outputNames.push_back(groupFile);
219                         getOligos(fastaFileNames);
220                 }
221                 
222                 if(qFileName != "")     {       setLines(qFileName, qLines);    }
223
224                 
225                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
226                                 if(processors == 1){
227                                         ifstream inFASTA;
228                                         int numSeqs;
229                                         openInputFile(fastaFile, inFASTA);
230                                         getNumSeqs(inFASTA, numSeqs);
231                                         inFASTA.close();
232                                         
233                                         lines.push_back(new linePair(0, numSeqs));
234                                         
235                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, groupFile, fastaFileNames, lines[0], lines[0]);
236                                         
237                                         for (int j = 0; j < fastaFileNames.size(); j++) {
238                                                 rename((fastaFileNames[j] + toString(getpid()) + ".temp").c_str(), fastaFileNames[j].c_str());
239                                         }
240
241                                 }else{
242                                         setLines(fastaFile, lines);     
243                                         if(qFileName == "")     {       qLines = lines; }       
244                                                                 
245                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, groupFile, fastaFileNames); 
246                                         
247                                         rename((trimSeqFile + toString(processIDS[0]) + ".temp").c_str(), trimSeqFile.c_str());
248                                         rename((scrapSeqFile + toString(processIDS[0]) + ".temp").c_str(), scrapSeqFile.c_str());
249                                         rename((groupFile + toString(processIDS[0]) + ".temp").c_str(), groupFile.c_str());
250                                         for (int j = 0; j < fastaFileNames.size(); j++) {
251                                                 rename((fastaFileNames[j] + toString(processIDS[0]) + ".temp").c_str(), fastaFileNames[j].c_str());
252                                         }
253                                         //append files
254                                         for(int i=1;i<processors;i++){
255                                                 appendFiles((trimSeqFile + toString(processIDS[i]) + ".temp"), trimSeqFile);
256                                                 remove((trimSeqFile + toString(processIDS[i]) + ".temp").c_str());
257                                                 appendFiles((scrapSeqFile + toString(processIDS[i]) + ".temp"), scrapSeqFile);
258                                                 remove((scrapSeqFile + toString(processIDS[i]) + ".temp").c_str());
259                                                 appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
260                                                 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
261                                                 for (int j = 0; j < fastaFileNames.size(); j++) {
262                                                         appendFiles((fastaFileNames[j] + toString(processIDS[i]) + ".temp"), fastaFileNames[j]);
263                                                         remove((fastaFileNames[j] + toString(processIDS[i]) + ".temp").c_str());
264                                                 }
265                                         }
266                                 }
267                                 
268                                 if (m->control_pressed) {  return 0; }
269                 #else
270                                 ifstream inFASTA;
271                                 int numSeqs;
272                                 openInputFile(fastaFile, inFASTA);
273                                 getNumSeqs(inFASTA, numSeqs);
274                                 inFASTA.close();
275                                 
276                                 lines.push_back(new linePair(0, numSeqs));
277                                 
278                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, groupFile, fastaFileNames, lines[0], lines[0]);
279                                 
280                                 if (m->control_pressed) {  return 0; }
281                 #endif
282                                                 
283                                                                                 
284                 for(int i=0;i<fastaFileNames.size();i++){
285                         if (isBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); }
286                         else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); }
287                         else {
288                                 ifstream inFASTA;
289                                 string seqName;
290                                 //openInputFile(getRootName(fastaFile) +  groupVector[i] + ".fasta", inFASTA);
291                                 openInputFile(fastaFileNames[i], inFASTA);
292                                 ofstream outGroups;
293                                 string outGroupFilename = outputDir + getRootName(getSimpleName(fastaFileNames[i])) + "groups";
294                                 openOutputFile(outGroupFilename, outGroups);
295                                 //openOutputFile(outputDir + getRootName(getSimpleName(fastaFile)) + groupVector[i] + ".groups", outGroups);
296                                 outputNames.push_back(outGroupFilename);
297                                 
298                                 string thisGroup = "";
299                                 if (i > comboStarts) {
300                                         map<string, int>::iterator itCombo;
301                                         for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
302                                                 if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
303                                         }
304                                 }else{ thisGroup = groupVector[i]; }
305                                 
306                                 while(!inFASTA.eof()){
307                                         if(inFASTA.get() == '>'){
308                                                 inFASTA >> seqName;
309                                                 outGroups << seqName << '\t' << thisGroup << endl;
310                                         }
311                                         while (!inFASTA.eof())  {       char c = inFASTA.get(); if (c == 10 || c == 13){        break;  }       }
312                                 }
313                                 outGroups.close();
314                                 inFASTA.close();
315                         }
316                 }
317                 
318                 if (m->control_pressed) { 
319                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
320                         return 0;
321                 }
322
323                 m->mothurOutEndLine();
324                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
325                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
326                 m->mothurOutEndLine();
327                 
328                 return 0;       
329                         
330         }
331         catch(exception& e) {
332                 m->errorOut(e, "TrimSeqsCommand", "execute");
333                 exit(1);
334         }
335 }
336                 
337 /**************************************************************************************/
338 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string groupFile, vector<string> fastaNames, linePair* line, linePair* qline) {     
339         try {
340                 
341                 ofstream outFASTA;
342                 int able = openOutputFile(trimFile, outFASTA);
343                 
344                 ofstream scrapFASTA;
345                 openOutputFile(scrapFile, scrapFASTA);
346                 
347                 ofstream outGroups;
348                 vector<ofstream*> fastaFileNames;
349                 if (oligoFile != "") {          
350                         openOutputFile(groupFile, outGroups);   
351                         for (int i = 0; i < fastaNames.size(); i++) {
352                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
353                                 fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
354                         #else
355                                 fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate));                      
356                         #endif
357                         }
358                 }
359                 
360                 ifstream inFASTA;
361                 openInputFile(filename, inFASTA);
362                 
363                 ifstream qFile;
364                 if(qFileName != "")     {       openInputFile(qFileName, qFile);        }
365                 
366                 qFile.seekg(qline->start);
367                 inFASTA.seekg(line->start);
368                 
369                 for(int i=0;i<line->num;i++){
370                                 
371                         if (m->control_pressed) { 
372                                 inFASTA.close(); outFASTA.close(); scrapFASTA.close(); if (oligoFile != "") {    outGroups.close();   } if(qFileName != "")     {       qFile.close();  }
373                                 for(int i=0;i<fastaFileNames.size();i++){  fastaFileNames[i]->close(); delete fastaFileNames[i];  }     
374                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
375                                 return 0;
376                         }
377                         
378                         int success = 1;
379                         
380                         Sequence currSeq(inFASTA);
381         
382
383                         string origSeq = currSeq.getUnaligned();
384                         if (origSeq != "") {
385                                 int groupBar, groupPrime;
386                                 string trashCode = "";
387                                 int currentSeqsDiffs = 0;
388                                 
389                                 if(qFileName != ""){
390                                         if(qThreshold != 0)             {       success = stripQualThreshold(currSeq, qFile);   }
391                                         else if(qAverage != 0)  {       success = cullQualAverage(currSeq, qFile);              }
392                                         
393                                         if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
394                                                 success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
395                                         }
396
397                                         if(!success)                    {       trashCode += 'q';                                                               }
398                                 }
399                         
400                                 if(barcodes.size() != 0){
401                                         success = stripBarcode(currSeq, groupBar);
402                                         if(success > bdiffs){   trashCode += 'b';       }
403                                         else{ currentSeqsDiffs += success;  }
404                                 }
405
406                                 if(numFPrimers != 0){
407                                         success = stripForward(currSeq, groupPrime);
408                                         if(success > pdiffs){   trashCode += 'f';       }
409                                         else{ currentSeqsDiffs += success;  }
410                                 }
411                                 
412                                 if (currentSeqsDiffs > tdiffs) { trashCode += 't';   }
413
414                                 if(numRPrimers != 0){
415                                         success = stripReverse(currSeq);
416                                         if(!success){   trashCode += 'r';       }
417                                 }
418                 
419                                 if(minLength > 0 || maxLength > 0){
420                                         success = cullLength(currSeq);
421                                         if(!success){   trashCode += 'l'; }
422                                 }
423                                 if(maxHomoP > 0){
424                                         success = cullHomoP(currSeq);
425                                         if(!success){   trashCode += 'h';       }
426                                 }
427                                 if(maxAmbig != -1){
428                                         success = cullAmbigs(currSeq);
429                                         if(!success){   trashCode += 'n';       }
430                                 }
431                                 
432                                 if(flip){       currSeq.reverseComplement();    }               // should go last                       
433                                 
434                                 if(trashCode.length() == 0){
435                                         currSeq.setAligned(currSeq.getUnaligned());
436                                         currSeq.printSequence(outFASTA);
437                                         if(barcodes.size() != 0){
438                                                 string thisGroup = groupVector[groupBar];
439                                                 int indexToFastaFile = groupBar;
440                                                 if (primers.size() != 0){
441                                                         //does this primer have a group
442                                                         if (groupVector[groupPrime] != "") {  
443                                                                 thisGroup += "." + groupVector[groupPrime]; 
444                                                                 indexToFastaFile = combos[thisGroup];
445                                                         }
446                                                 }
447                                                 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
448                                                 if(allFiles){
449                                                         currSeq.printSequence(*fastaFileNames[indexToFastaFile]);                                       
450                                                 }
451                                         }
452                                 }
453                                 else{
454                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
455                                         currSeq.setUnaligned(origSeq);
456                                         currSeq.setAligned(origSeq);
457                                         currSeq.printSequence(scrapFASTA);
458                                 }
459                         }
460                         gobble(inFASTA);
461                 }
462                 
463                 inFASTA.close();
464                 outFASTA.close();
465                 scrapFASTA.close();
466                 if (oligoFile != "") {   outGroups.close();   }
467                 if(qFileName != "")     {       qFile.close();  }
468                 
469                 for(int i=0;i<fastaFileNames.size();i++){
470                         fastaFileNames[i]->close();
471                         delete fastaFileNames[i];
472                 }               
473                 
474                 return 0;
475         }
476         catch(exception& e) {
477                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
478                 exit(1);
479         }
480 }
481 /**************************************************************************************************/
482 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string groupFile, vector<string> fastaNames) {
483         try {
484 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
485                 int process = 0;
486                 int exitCommand = 1;
487                 processIDS.clear();
488                 
489                 //loop through and create all the processes you want
490                 while (process != processors) {
491                         int pid = fork();
492                         
493                         if (pid > 0) {
494                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
495                                 process++;
496                         }else if (pid == 0){
497                                 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, lines[process], qLines[process]);
498                                 exit(0);
499                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
500                 }
501                 
502                 //force parent to wait until all the processes are done
503                 for (int i=0;i<processors;i++) { 
504                         int temp = processIDS[i];
505                         wait(&temp);
506                 }
507                 
508                 return exitCommand;
509 #endif          
510         }
511         catch(exception& e) {
512                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
513                 exit(1);
514         }
515 }
516 /**************************************************************************************************/
517
518 int TrimSeqsCommand::setLines(string filename, vector<linePair*>& lines) {
519         try {
520                 
521                 lines.clear();
522                 
523                 vector<long int> positions;
524                 
525                 ifstream inFASTA;
526                 openInputFile(filename, inFASTA);
527                         
528                 string input;
529                 while(!inFASTA.eof()){  
530                         input = getline(inFASTA);
531
532                         if (input.length() != 0) {
533                                 if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);     }
534                         }
535                 }
536                 inFASTA.close();
537                 
538                 int numFastaSeqs = positions.size();
539         
540                 FILE * pFile;
541                 long size;
542                 
543                 //get num bytes in file
544                 pFile = fopen (filename.c_str(),"rb");
545                 if (pFile==NULL) perror ("Error opening file");
546                 else{
547                         fseek (pFile, 0, SEEK_END);
548                         size=ftell (pFile);
549                         fclose (pFile);
550                 }
551                 
552                 int numSeqsPerProcessor = numFastaSeqs / processors;
553                 
554                 for (int i = 0; i < processors; i++) {
555
556                         long int startPos = positions[ i * numSeqsPerProcessor ];
557                         if(i == processors - 1){
558                                 numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
559                         }else{  
560                                 long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
561                         }
562                         lines.push_back(new linePair(startPos, numSeqsPerProcessor));
563                 }
564                 
565                 return numFastaSeqs;
566         }
567         catch(exception& e) {
568                 m->errorOut(e, "TrimSeqsCommand", "setLines");
569                 exit(1);
570         }
571 }
572 //***************************************************************************************************************
573
574 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec){ //vector<ofstream*>& outFASTAVec
575         try {
576                 ifstream inOligos;
577                 openInputFile(oligoFile, inOligos);
578                 
579                 ofstream test;
580                 
581                 string type, oligo, group;
582                 int index=0;
583                 //int indexPrimer = 0;
584                 
585                 while(!inOligos.eof()){
586                         inOligos >> type;
587                                         
588                         if(type[0] == '#'){
589                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
590                         }
591                         else{
592                                 //make type case insensitive
593                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
594                                 
595                                 inOligos >> oligo;
596                                 
597                                 for(int i=0;i<oligo.length();i++){
598                                         oligo[i] = toupper(oligo[i]);
599                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
600                                 }
601                                 
602                                 if(type == "FORWARD"){
603                                         group = "";
604                                         
605                                         // get rest of line in case there is a primer name
606                                         while (!inOligos.eof()) {       
607                                                 char c = inOligos.get(); 
608                                                 if (c == 10 || c == 13){        break;  }
609                                                 else if (c == 32 || c == 9){;} //space or tab
610                                                 else {  group += c;  }
611                                         } 
612                                         
613                                         //check for repeat barcodes
614                                         map<string, int>::iterator itPrime = primers.find(oligo);
615                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
616                                         
617                                         primers[oligo]=index; index++;
618                                         groupVector.push_back(group);
619                                         
620                                         if(allFiles){
621                                                 outFASTAVec.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + toString(index) + "." + group + ".fasta"));
622                                                 if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
623                                                         filesToRemove.insert((outputDir + getRootName(getSimpleName(fastaFile)) + toString(index) + "." + group + ".fasta"));
624                                                 }else {
625                                                         outputNames.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + toString(index) + "." + group + ".fasta"));
626                                                 }
627                                         }
628
629                                 }
630                                 else if(type == "REVERSE"){
631                                         Sequence oligoRC("reverse", oligo);
632                                         oligoRC.reverseComplement();
633                                         revPrimer.push_back(oligoRC.getUnaligned());
634                                 }
635                                 else if(type == "BARCODE"){
636                                         inOligos >> group;
637                                         
638                                         //check for repeat barcodes
639                                         map<string, int>::iterator itBar = barcodes.find(oligo);
640                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
641                                         
642                                         barcodes[oligo]=index; index++;
643                                         groupVector.push_back(group);
644                                         
645                                         if(allFiles){
646                                                 outputNames.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + toString(index) + "." + group + ".fasta"));
647                                                 outFASTAVec.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + toString(index) + "." + group + ".fasta"));
648                                         }
649                                 }else{  m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
650                         }
651                         gobble(inOligos);
652                 }
653                 
654                 inOligos.close();
655                 
656                 //add in potential combos
657                 if(allFiles){
658                         comboStarts = outFASTAVec.size()-1;
659                         for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
660                                 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
661                                         if (groupVector[itPrime->second] != "") { //there is a group for this primer
662                                                 outputNames.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + toString(itBar->second) + "." + groupVector[itBar->second] + "." + toString(itPrime->second) + "." + groupVector[itPrime->second] + ".fasta"));
663                                                 outFASTAVec.push_back((outputDir + getRootName(getSimpleName(fastaFile)) + toString(itBar->second) + "." + groupVector[itBar->second] + "." + toString(itPrime->second) + "." + groupVector[itPrime->second] + ".fasta"));
664                                                 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
665                                         }
666                                 }
667                         }
668                 }
669                 
670                 numFPrimers = primers.size();
671                 numRPrimers = revPrimer.size();
672                 
673         }
674         catch(exception& e) {
675                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
676                 exit(1);
677         }
678 }
679 //***************************************************************************************************************
680
681 int TrimSeqsCommand::stripBarcode(Sequence& seq, int& group){
682         try {
683                 
684                 string rawSequence = seq.getUnaligned();
685                 int success = bdiffs + 1;       //guilty until proven innocent
686                 
687                 //can you find the barcode
688                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
689                         string oligo = it->first;
690                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
691                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
692                                 break;  
693                         }
694                         
695                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
696                                 group = it->second;
697                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
698                                 success = 0;
699                                 break;
700                         }
701                 }
702                 
703                 //if you found the barcode or if you don't want to allow for diffs
704 //              cout << success;
705                 if ((bdiffs == 0) || (success == 0)) { return success;  }
706                 
707                 else { //try aligning and see if you can find it
708 //                      cout << endl;
709
710                         int maxLength = 0;
711
712                         Alignment* alignment;
713                         if (barcodes.size() > 0) {
714                                 map<string,int>::iterator it=barcodes.begin();
715
716                                 for(it;it!=barcodes.end();it++){
717                                         if(it->first.length() > maxLength){
718                                                 maxLength = it->first.length();
719                                         }
720                                 }
721                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
722
723                         }else{ alignment = NULL; } 
724                         
725                         //can you find the barcode
726                         int minDiff = 1e6;
727                         int minCount = 1;
728                         int minGroup = -1;
729                         int minPos = 0;
730                         
731                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
732                                 string oligo = it->first;
733 //                              int length = oligo.length();
734                                 
735                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
736                                         success = bdiffs + 10;
737                                         break;
738                                 }
739                                 
740                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
741                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
742                                 oligo = alignment->getSeqAAln();
743                                 string temp = alignment->getSeqBAln();
744                 
745                                 int alnLength = oligo.length();
746                                 
747                                 for(int i=oligo.length()-1;i>=0;i--){
748                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
749                                 }
750                                 oligo = oligo.substr(0,alnLength);
751                                 temp = temp.substr(0,alnLength);
752                                 
753                                 int newStart=0;
754                                 int numDiff = countDiffs(oligo, temp);
755                                 
756 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
757                                 
758                                 if(numDiff < minDiff){
759                                         minDiff = numDiff;
760                                         minCount = 1;
761                                         minGroup = it->second;
762                                         minPos = 0;
763                                         for(int i=0;i<alnLength;i++){
764                                                 if(temp[i] != '-'){
765                                                         minPos++;
766                                                 }
767                                         }
768                                 }
769                                 else if(numDiff == minDiff){
770                                         minCount++;
771                                 }
772
773                         }
774
775                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
776                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
777                         else{                                                                                                   //use the best match
778                                 group = minGroup;
779                                 seq.setUnaligned(rawSequence.substr(minPos));
780                                 success = minDiff;
781                         }
782                         
783                         if (alignment != NULL) {  delete alignment;  }
784                         
785                 }
786 //              cout << success << endl;
787                 
788                 return success;
789                 
790         }
791         catch(exception& e) {
792                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
793                 exit(1);
794         }
795
796 }
797
798 //***************************************************************************************************************
799
800 int TrimSeqsCommand::stripForward(Sequence& seq, int& group){
801         try {
802                 string rawSequence = seq.getUnaligned();
803                 int success = pdiffs + 1;       //guilty until proven innocent
804                 
805                 //can you find the primer
806                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
807                         string oligo = it->first;
808                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
809                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
810                                 break;  
811                         }
812                         
813                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
814                                 group = it->second;
815                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
816                                 success = 0;
817                                 break;
818                         }
819                 }
820
821                 //if you found the barcode or if you don't want to allow for diffs
822 //              cout << success;
823                 if ((pdiffs == 0) || (success == 0)) { return success;  }
824                 
825                 else { //try aligning and see if you can find it
826 //                      cout << endl;
827
828                         int maxLength = 0;
829
830                         Alignment* alignment;
831                         if (primers.size() > 0) {
832                                 map<string,int>::iterator it=primers.begin();
833
834                                 for(it;it!=primers.end();it++){
835                                         if(it->first.length() > maxLength){
836                                                 maxLength = it->first.length();
837                                         }
838                                 }
839                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
840
841                         }else{ alignment = NULL; } 
842                         
843                         //can you find the barcode
844                         int minDiff = 1e6;
845                         int minCount = 1;
846                         int minGroup = -1;
847                         int minPos = 0;
848                         
849                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
850                                 string oligo = it->first;
851 //                              int length = oligo.length();
852                                 
853                                 if(rawSequence.length() < maxLength){   
854                                         success = pdiffs + 100;
855                                         break;
856                                 }
857                                 
858                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
859                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
860                                 oligo = alignment->getSeqAAln();
861                                 string temp = alignment->getSeqBAln();
862                 
863                                 int alnLength = oligo.length();
864                                 
865                                 for(int i=oligo.length()-1;i>=0;i--){
866                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
867                                 }
868                                 oligo = oligo.substr(0,alnLength);
869                                 temp = temp.substr(0,alnLength);
870                                 
871                                 int newStart=0;
872                                 int numDiff = countDiffs(oligo, temp);
873                                 
874 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
875                                 
876                                 if(numDiff < minDiff){
877                                         minDiff = numDiff;
878                                         minCount = 1;
879                                         minGroup = it->second;
880                                         minPos = 0;
881                                         for(int i=0;i<alnLength;i++){
882                                                 if(temp[i] != '-'){
883                                                         minPos++;
884                                                 }
885                                         }
886                                 }
887                                 else if(numDiff == minDiff){
888                                         minCount++;
889                                 }
890
891                         }
892
893                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
894                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
895                         else{                                                                                                   //use the best match
896                                 group = minGroup;
897                                 seq.setUnaligned(rawSequence.substr(minPos));
898                                 success = minDiff;
899                         }
900                         
901                         if (alignment != NULL) {  delete alignment;  }
902                         
903                 }
904                 
905                 return success;
906
907         }
908         catch(exception& e) {
909                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
910                 exit(1);
911         }
912 }
913
914 //***************************************************************************************************************
915
916 bool TrimSeqsCommand::stripReverse(Sequence& seq){
917         try {
918                 string rawSequence = seq.getUnaligned();
919                 bool success = 0;       //guilty until proven innocent
920                 
921                 for(int i=0;i<numRPrimers;i++){
922                         string oligo = revPrimer[i];
923                         
924                         if(rawSequence.length() < oligo.length()){
925                                 success = 0;
926                                 break;
927                         }
928                         
929                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
930                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
931                                 success = 1;
932                                 break;
933                         }
934                 }       
935                 return success;
936                 
937         }
938         catch(exception& e) {
939                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
940                 exit(1);
941         }
942 }
943
944 //***************************************************************************************************************
945
946 bool TrimSeqsCommand::cullLength(Sequence& seq){
947         try {
948         
949                 int length = seq.getNumBases();
950                 bool success = 0;       //guilty until proven innocent
951                 
952                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
953                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
954                 else                                                                                            {       success = 0;    }
955                 
956                 return success;
957         
958         }
959         catch(exception& e) {
960                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
961                 exit(1);
962         }
963         
964 }
965
966 //***************************************************************************************************************
967
968 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
969         try {
970                 int longHomoP = seq.getLongHomoPolymer();
971                 bool success = 0;       //guilty until proven innocent
972                 
973                 if(longHomoP <= maxHomoP){      success = 1;    }
974                 else                                    {       success = 0;    }
975                 
976                 return success;
977         }
978         catch(exception& e) {
979                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
980                 exit(1);
981         }
982         
983 }
984
985 //***************************************************************************************************************
986
987 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
988         try {
989                 int numNs = seq.getAmbigBases();
990                 bool success = 0;       //guilty until proven innocent
991                 
992                 if(numNs <= maxAmbig)   {       success = 1;    }
993                 else                                    {       success = 0;    }
994                 
995                 return success;
996         }
997         catch(exception& e) {
998                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
999                 exit(1);
1000         }
1001         
1002 }
1003
1004 //***************************************************************************************************************
1005
1006 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1007         try {
1008                 bool success = 1;
1009                 int length = oligo.length();
1010                 
1011                 for(int i=0;i<length;i++){
1012                         
1013                         if(oligo[i] != seq[i]){
1014                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1015                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1016                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1017                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1018                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1019                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1020                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1021                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1022                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1023                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1024                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1025                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1026                                 
1027                                 if(success == 0)        {       break;   }
1028                         }
1029                         else{
1030                                 success = 1;
1031                         }
1032                 }
1033                 
1034                 return success;
1035         }
1036         catch(exception& e) {
1037                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1038                 exit(1);
1039         }
1040
1041 }
1042 //***************************************************************************************************************
1043
1044 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1045         try {
1046
1047                 int length = oligo.length();
1048                 int countDiffs = 0;
1049                 
1050                 for(int i=0;i<length;i++){
1051                                                                 
1052                         if(oligo[i] != seq[i]){
1053                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1054                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1055                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1056                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1057                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1058                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1059                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1060                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1061                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1062                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1063                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1064                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }                       
1065                                 
1066                         }
1067                 }
1068                 
1069                 return countDiffs;
1070         }
1071         catch(exception& e) {
1072                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1073                 exit(1);
1074         }
1075
1076 }
1077 //***************************************************************************************************************
1078
1079 bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){
1080         try {
1081 //              string rawSequence = seq.getUnaligned();
1082 //              int seqLength;  // = rawSequence.length();
1083 //              string name, temp, temp2;
1084 //              
1085 //              qFile >> name;
1086 //              
1087 //              //get rest of line
1088 //              temp = "";
1089 //              while (!qFile.eof())    {       
1090 //                      char c = qFile.get(); 
1091 //                      if (c == 10 || c == 13){        break;  }       
1092 //                      else { temp += c; }
1093 //              } 
1094 //      
1095 //              int pos = temp.find("length");
1096 //              if (pos == temp.npos) { m->mothurOut("Cannot find length in qfile for " + seq.getName()); m->mothurOutEndLine();  seqLength = 0;  }
1097 //              else {
1098 //                      string tempLength = temp.substr(pos);
1099 //                      istringstream iss (tempLength,istringstream::in);
1100 //                      iss >> temp;
1101 //              }
1102 //              
1103 //              splitAtEquals(temp2, temp); //separates length=242, temp=length, temp2=242
1104 //              convert(temp, seqLength); //converts string to int
1105 //      
1106 //              if (name.length() != 0) {  if(name.substr(1) != seq.getName())  {       m->mothurOut("sequence name mismatch btwn fasta and qual file"); m->mothurOutEndLine(); }  } 
1107                 
1108                 string rawSequence = seq.getUnaligned();
1109                 int seqLength = seq.getNumBases();
1110                 bool success = 0;       //guilty until proven innocent
1111                 string name;
1112                 
1113                 qFile >> name;
1114                 if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1115                 
1116                 while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1117                 
1118                 int score;
1119                 int end = seqLength;
1120                 
1121                 for(int i=0;i<seqLength;i++){
1122                         qFile >> score;
1123                         
1124                         if(score < qThreshold){
1125                                 end = i;
1126                                 break;
1127                         }
1128                 }
1129                 for(int i=end+1;i<seqLength;i++){
1130                         qFile >> score;
1131                 }
1132                 
1133                 seq.setUnaligned(rawSequence.substr(0,end));
1134                 
1135                 return 1;
1136         }
1137         catch(exception& e) {
1138                 m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold");
1139                 exit(1);
1140         }
1141 }
1142
1143 //***************************************************************************************************************
1144
1145 bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){
1146         try {
1147                 string rawSequence = seq.getUnaligned();
1148                 int seqLength = seq.getNumBases();
1149                 bool success = 0;       //guilty until proven innocent
1150                 string name;
1151                 
1152                 qFile >> name;
1153                 if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1154                 
1155                 while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1156                 
1157                 float score;    
1158                 float average = 0;
1159                 
1160                 for(int i=0;i<seqLength;i++){
1161                         qFile >> score;
1162                         average += score;
1163                 }
1164                 average /= seqLength;
1165
1166                 if(average >= qAverage) {       success = 1;    }
1167                 else                                    {       success = 0;    }
1168                 
1169                 return success;
1170         }
1171         catch(exception& e) {
1172                 m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
1173                 exit(1);
1174         }
1175 }
1176
1177 //***************************************************************************************************************