]> git.donarmstrong.com Git - mothur.git/commitdiff
added some features to trim.seqs
authorpschloss <pschloss>
Mon, 8 Jun 2009 14:34:08 +0000 (14:34 +0000)
committerpschloss <pschloss>
Mon, 8 Jun 2009 14:34:08 +0000 (14:34 +0000)
Mothur.xcodeproj/project.pbxproj
trimseqscommand.cpp
trimseqscommand.h
validparameter.cpp

index 6ea76d739c9f338b11ad6479dc0f7487610f6b77..f6eeacb79539bb1f1eebe7a8bf0c2fb7e92a3e74 100644 (file)
                37E5F3E20F29FD4200F8D827 /* treenode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treenode.cpp; sourceTree = SOURCE_ROOT; };
                37E5F4900F2A3DA800F8D827 /* readtreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readtreecommand.h; sourceTree = SOURCE_ROOT; };
                37E5F4910F2A3DA800F8D827 /* readtreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readtreecommand.cpp; sourceTree = SOURCE_ROOT; };
-               7E09C5120FDA79C5002ECAE5 /* reversecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reversecommand.h; sourceTree = "<group>"; };
-               7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = reversecommand.cpp; sourceTree = "<group>"; };
-               7E09C5340FDA7F65002ECAE5 /* trimseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trimseqscommand.h; sourceTree = "<group>"; };
-               7E09C5350FDA7F65002ECAE5 /* trimseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trimseqscommand.cpp; sourceTree = "<group>"; };
+               7E09C5120FDA79C5002ECAE5 /* reversecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reversecommand.h; sourceTree = SOURCE_ROOT; };
+               7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = reversecommand.cpp; sourceTree = SOURCE_ROOT; };
+               7E09C5340FDA7F65002ECAE5 /* trimseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trimseqscommand.h; sourceTree = SOURCE_ROOT; };
+               7E09C5350FDA7F65002ECAE5 /* trimseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trimseqscommand.cpp; sourceTree = SOURCE_ROOT; };
                7E412F420F8D213C00381DD0 /* libshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = libshuff.h; sourceTree = SOURCE_ROOT; };
                7E412F470F8D21B600381DD0 /* slibshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slibshuff.h; sourceTree = SOURCE_ROOT; };
                7E412F480F8D21B600381DD0 /* slibshuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = slibshuff.cpp; sourceTree = SOURCE_ROOT; };
index 95a85ac23b3a347e25cbc11c03350ef49ae7cb58..259adc1485222c2070e44c2f1b0bb32e3c86815d 100644 (file)
 TrimSeqsCommand::TrimSeqsCommand(){
        try {
                
-               oligos = 0;             
-               totalBarcodeCount = 0;
-               matchBarcodeCount = 0;
-               
                globaldata = GlobalData::getInstance();
+               
+               oligos = 0;             
+
                if(globaldata->getFastaFile() == ""){
                        cout << "you need to at least enter a fasta file name" << endl;
                }
                
-               if(isTrue(globaldata->getFlip()))               {       flip = 1;                               }
-               if(globaldata->getOligosFile() != "")   {       oligos = 1;                             }
-               if(!flip && !oligos)                                    {       cout << "huh?" << endl; }
+               if(isTrue(globaldata->getFlip()))                       {       flip = 1;                                                                                                       }
+               if(globaldata->getOligosFile() != "")           {       oligos = 1;                                                                                                     }
+               
+               if(globaldata->getMaxAmbig() != "-1")           {       maxAmbig = atoi(globaldata->getMaxAmbig().c_str());                     }
+               else                                                                            {       maxAmbig = -1;                                                                                          }
+               
+               if(globaldata->getMaxHomoPolymer() != "-1")     {       maxHomoP = atoi(globaldata->getMaxHomoPolymer().c_str());       }
+               else                                                                            {       maxHomoP = 0;                                                                                           }
+               
+               if(globaldata->getMinLength() != "-1")          {       minLength = atoi(globaldata->getMinLength().c_str());           }
+               else                                                                            {       minLength = 0;                                                                                          }
+               
+               if(globaldata->getMaxLength() != "-1")          {       maxLength = atoi(globaldata->getMaxLength().c_str());           }
+               else                                                                            {       maxLength = 0;                                                                                          }
+               
+               if(!flip && !oligos && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP ){       cout << "huh?" << endl; }
 
        }
        catch(exception& e) {
@@ -83,6 +95,19 @@ int TrimSeqsCommand::execute(){
                                success = stripReverse(currSeq);
                                if(!success){   trashCode += 'r';       }
                        }
+                       if(minLength > 0 || maxLength > 0){
+                               success = cullLength(currSeq);
+                               if(!success){   trashCode += 'l';       }
+                       }
+                       if(maxHomoP > 0){
+                               success = cullHomoP(currSeq);
+                               if(!success){   trashCode += 'h';       }
+                       }
+                       if(maxAmbig != -1){
+                               success = cullAmbigs(currSeq);
+                               if(!success){   trashCode += 'n';       }
+                       }
+                       
                        if(flip){       currSeq.reverseComplement();    }               // should go last                       
 
                        if(trashCode.length() == 0){
@@ -168,12 +193,10 @@ bool TrimSeqsCommand::stripBarcode(Sequence& seq, string& group){
                if (rawSequence.compare(0,oligo.length(),oligo) == 0){
                        group = it->second;
                        seq.setUnaligned(rawSequence.substr(oligo.length()));
-                       matchBarcodeCount++;
                        success = 1;
                        break;
                }
        }
-       totalBarcodeCount++;
        return success;
        
 }
@@ -195,13 +218,11 @@ bool TrimSeqsCommand::stripForward(Sequence& seq){
                
                if (rawSequence.compare(0,oligo.length(),oligo) == 0){
                        seq.setUnaligned(rawSequence.substr(oligo.length()));
-                       matchFPrimerCount++;
                        success = 1;
                        break;
                }
        }
        
-       totalFPrimerCount++;
        return success;
        
 }
@@ -223,13 +244,53 @@ bool TrimSeqsCommand::stripReverse(Sequence& seq){
                
                if(rawSequence.compare(rawSequence.length()-oligo.length(),oligo.length(),oligo) == 0){
                        seq.setUnaligned(rawSequence.substr(rawSequence.length()-oligo.length()));
-                       matchRPrimerCount++;
                        success = 1;
                        break;
                }
        }       
+       return success;
+       
+}
+
+//***************************************************************************************************************
+
+bool TrimSeqsCommand::cullLength(Sequence& seq){
+       
+       int length = seq.getNumBases();
+       bool success = 0;       //guilty until proven innocent
+       
+       if(length >= minLength && maxLength == 0)                       {       success = 1;    }
+       else if(length >= minLength && length <= maxLength)     {       success = 1;    }
+       else                                                                                            {       success = 0;    }
+       
+       return success;
+       
+}
+
+//***************************************************************************************************************
+
+bool TrimSeqsCommand::cullHomoP(Sequence& seq){
+       
+       int longHomoP = seq.getLongHomoPolymer();
+       bool success = 0;       //guilty until proven innocent
+       
+       if(longHomoP <= maxHomoP){      success = 1;    }
+       else                                    {       success = 0;    }
+       
+       return success;
+       
+}
+
+//***************************************************************************************************************
+
+bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
+       
+       int numNs = seq.getAmbigBases();
+       bool success = 0;       //guilty until proven innocent
+       
+       if(numNs <= maxAmbig){  success = 1;    }
+       else                                    {       success = 0;    }
        
-       totalRPrimerCount++;
        return success;
        
 }
index 565d5283329124a65aca975d51a9588cb791670d..800ec3d047fac0221edde0dec47fb130058519ad 100644 (file)
@@ -26,15 +26,14 @@ private:
        bool stripBarcode(Sequence&, string&);
        bool stripForward(Sequence&);
        bool stripReverse(Sequence&);
+       bool cullLength(Sequence&);
+       bool cullHomoP(Sequence&);
+       bool cullAmbigs(Sequence&);
        
        GlobalData* globaldata;
 
-       int totalBarcodeCount, matchBarcodeCount; // to be removed
-       int totalFPrimerCount, matchFPrimerCount; // to be removed
-       int totalRPrimerCount, matchRPrimerCount; // to be removed
-       
        bool oligos, flip;
-       int numFPrimers, numRPrimers;
+       int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength;
        vector<string> forPrimer, revPrimer;
        map<string, string> barcodes;
 };
index 4845425883cfbb57b05eac1aa2156efd1841090f..ae5fc3ff8590d0f9291aec0618dfd08d3d989605 100644 (file)
@@ -285,7 +285,7 @@ void ValidParameters::initCommandParameters() {
                string reverseseqsArray[] =  {"fasta"};
                commandParameters["reverse.seqs"] = addParameters(reverseseqsArray, sizeof(reverseseqsArray)/sizeof(string));
 
-               string trimseqsArray[] =  {"fasta", "flip", "oligos"};
+               string trimseqsArray[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength"};
                commandParameters["trim.seqs"] = addParameters(trimseqsArray, sizeof(trimseqsArray)/sizeof(string));
 
                string vennArray[] =  {"groups","line","label","calc"};