]> git.donarmstrong.com Git - mothur.git/blobdiff - trimflowscommand.cpp
fixes while testing 1.33.0
[mothur.git] / trimflowscommand.cpp
index 58380942b021bfa370d71e5103118826924b5f67..33349decab52e90be19c928b44b9b07c8f6c710a 100644 (file)
@@ -28,7 +28,7 @@ vector<string> TrimFlowsCommand::setParameters(){
                CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal);
                CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise);
                CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles);
-        CommandParameter porder("order", "Multiple", "A-B", "A", "", "", "","",false,false, true); parameters.push_back(porder);
+        CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
                CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfasta);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
@@ -47,6 +47,7 @@ string TrimFlowsCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The trim.flows command reads a flowgram file and creates .....\n";
+        helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
                helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n";
                return helpString;
@@ -394,7 +395,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                
                if(line->start == 0){
                        flowFile >> numFlows; m->gobble(flowFile);
-                       scrapFlowFile << maxFlows << endl;
+                       scrapFlowFile << numFlows << endl;
                        trimFlowFile << maxFlows << endl;
                        if(allFiles){
                                for(int i=0;i<thisBarcodePrimerComboFileNames.size();i++){
@@ -429,11 +430,16 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                        flowData.capFlows(maxFlows);    
                        
                        Sequence currSeq = flowData.getSequence();
+            //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
                        if(!flowData.hasMinFlows(minFlows)){    //screen to see if sequence is of a minimum number of flows
                                success = 0;
                                trashCode += 'l';
                        }
-                       
+            if(!flowData.hasGoodHomoP()){      //screen to see if sequence meets the maximum homopolymer limit
+                               success = 0;
+                               trashCode += 'h';
+                       }
+
                        int primerIndex = 0;
                        int barcodeIndex = 0;
                        
@@ -491,7 +497,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                 if (pos == string::npos) {             
                     flowData.printFlows(trimFlowFile);
                     
-                    if(fasta)  {       currSeq.printSequence(fastaFile);       }
+                    if(fasta)  { currSeq.printSequence(fastaFile);     }
                     
                     if(allFiles){
                         ofstream output;
@@ -551,13 +557,16 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                
                while(!oligosFile.eof()){
                
-                       oligosFile >> type; m->gobble(oligosFile);      //get the first column value of the row - is it a comment or a feature we are interested in?
-
+                       oligosFile >> type;     //get the first column value of the row - is it a comment or a feature we are interested in?
+            
+            if (m->debug) { m->mothurOut("[DEBUG]: type = " + type + ".\n"); }
+            
                        if(type[0] == '#'){     //igore the line because there's a comment
-                               while (!oligosFile.eof())       {       char c = oligosFile.get(); if (c == 10 || c == 13){     break;  }       } // get rest of line if there's any crap there
+                               while (!oligosFile.eof())       {       char c = oligosFile.get(); if (c == 10 || c == 13){     break;  }       }
+                m->gobble(oligosFile);// get rest of line if there's any crap there
                        }
                        else{                           //there's a feature we're interested in
-
+                m->gobble(oligosFile);
                                for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }                                  //make type case insensitive
 
                                oligosFile >> oligo;    //get the DNA sequence for the feature
@@ -566,13 +575,15 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                                        oligo[i] = toupper(oligo[i]);
                                        if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
                                }
-
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: oligos = " + oligo + ".\n"); }
+                
                                if(type == "FORWARD"){  //if the feature is a forward primer...
                                        group = "";
 
                                        while (!oligosFile.eof())       {       // get rest of line in case there is a primer name = will have the name of the primer
                                                char c = oligosFile.get(); 
-                                               if (c == 10 || c == 13){        break;  }
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
                                                else if (c == 32 || c == 9){;} //space or tab
                                                else {  group += c;  }
                                        } 
@@ -588,6 +599,7 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                                else if(type == "REVERSE"){
                                        string oligoRC = reverseOligo(oligo);
                                        revPrimer.push_back(oligoRC);
+                    if (m->debug) { m->mothurOut("[DEBUG]: reverse oligos = " + oligoRC + ".\n"); }
                                }
                                else if(type == "BARCODE"){
                                        oligosFile >> group;
@@ -595,7 +607,9 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                                        //check for repeat barcodes
                                        map<string, int>::iterator itBar = barcodes.find(oligo);
                                        if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
-
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ".\n"); }
+                    
                                        barcodes[oligo]=indexBarcode; indexBarcode++;
                                        barcodeNameVector.push_back(group);
                                }else if(type == "LINKER"){
@@ -624,8 +638,7 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                        primers[""] = 0;
                        primerNameVector.push_back("");                 
                }
-               
-               
+
                outFlowFileNames.resize(barcodeNameVector.size());
                for(int i=0;i<outFlowFileNames.size();i++){
                        outFlowFileNames[i].assign(primerNameVector.size(), "");