]> git.donarmstrong.com Git - mothur.git/commitdiff
pcr.seqs bug fix. working on shannon range calc
authorSarah Westcott <mothur.westcott@gmail.com>
Fri, 28 Feb 2014 19:59:14 +0000 (14:59 -0500)
committerSarah Westcott <mothur.westcott@gmail.com>
Fri, 28 Feb 2014 19:59:14 +0000 (14:59 -0500)
heatmap.cpp
prcseqscommand.cpp
shannonrange.cpp
shannonrange.h
sracommand.cpp
sracommand.h

index 514c7af15312b60d4787bb0aa659dbd9f1ccaf08..254b70668f9539929dcd3d60360cd23bd7c6dc2a 100644 (file)
@@ -144,12 +144,17 @@ string HeatMap::getPic(vector<SharedRAbundVector*> lookup) {
                                
                                if (lookup[i]->getAbundance(j) != 0) { //don't want log value of 0.
                                        if (scaler == "log10") {
                                
                                if (lookup[i]->getAbundance(j) != 0) { //don't want log value of 0.
                                        if (scaler == "log10") {
-                                               scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund) / log10(maxRelAbund[i]))) + "0000";  
+                        if (maxRelAbund[i] == 1) { maxRelAbund[i] -= 0.001; }
+                        if (relAbund == 1) { relAbund -= 0.001; }
+                                               scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund) / log10(maxRelAbund[i]))) + "0000";
                                        }else if (scaler == "log2") {
                                        }else if (scaler == "log2") {
+                        if (maxRelAbund[i] == 1) { maxRelAbund[i] -= 0.001; }
+                        if (relAbund == 1) { relAbund -= 0.001; }
                                                scaleRelAbund[i][j] = toHex(int(255 * log2(relAbund) / log2(maxRelAbund[i]))) + "0000";  
                                        }else if (scaler == "linear") {
                                                scaleRelAbund[i][j] = toHex(int(255 * log2(relAbund) / log2(maxRelAbund[i]))) + "0000";  
                                        }else if (scaler == "linear") {
-                                               scaleRelAbund[i][j] = toHex(int(255 * relAbund / maxRelAbund[i])) + "0000";  
+                                               scaleRelAbund[i][j] = toHex(int(255 * relAbund / maxRelAbund[i])) + "0000";
                                        }else {  //if user enters invalid scaler option.
                                        }else {  //if user enters invalid scaler option.
+                        if (maxRelAbund[i] == 1) { maxRelAbund[i] += 0.001; }
                                                scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund / log10(maxRelAbund[i]))))  + "0000"; 
                                        } 
                                }else { scaleRelAbund[i][j] = "FFFFFF";  }
                                                scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund / log10(maxRelAbund[i]))))  + "0000"; 
                                        } 
                                }else { scaleRelAbund[i][j] = "FFFFFF";  }
@@ -455,11 +460,15 @@ string HeatMap::getPic(vector<SharedRAbundFloatVector*> lookup) {
                                
                                if (lookup[i]->getAbundance(j) != 0) { //don't want log value of 0.
                                        if (scaler == "log10") {
                                
                                if (lookup[i]->getAbundance(j) != 0) { //don't want log value of 0.
                                        if (scaler == "log10") {
+                        if (maxRelAbund[i] == 1) { maxRelAbund[i] -= 0.001; }
+                        if (relAbund == 1) { relAbund -= 0.001; }
                                                scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund) / log10(maxRelAbund[i]))) + "0000";  
                                        }else if (scaler == "log2") {
                                                scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund) / log10(maxRelAbund[i]))) + "0000";  
                                        }else if (scaler == "log2") {
+                        if (maxRelAbund[i] == 1) { maxRelAbund[i] -= 0.001; }
+                        if (relAbund == 1) { relAbund -= 0.001; }
                                                scaleRelAbund[i][j] = toHex(int(255 * log2(relAbund) / log2(maxRelAbund[i]))) + "0000";  
                                        }else if (scaler == "linear") {
                                                scaleRelAbund[i][j] = toHex(int(255 * log2(relAbund) / log2(maxRelAbund[i]))) + "0000";  
                                        }else if (scaler == "linear") {
-                                               scaleRelAbund[i][j] = toHex(int(255 * relAbund / maxRelAbund[i])) + "0000";  
+                                               scaleRelAbund[i][j] = toHex(int(255 * relAbund / maxRelAbund[i])) + "0000";
                                        }else {  //if user enters invalid scaler option.
                                                scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund / log10(maxRelAbund[i]))))  + "0000"; 
                                        } 
                                        }else {  //if user enters invalid scaler option.
                                                scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund / log10(maxRelAbund[i]))))  + "0000"; 
                                        } 
index a1eae4687c4d49b2ce8e0f90895a36a994aec689..6a4df6388683b1cda6aa5aa41d1ec3d2d672a15f 100644 (file)
@@ -575,7 +575,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
         
         
 
         
         
 
-        if (fileAligned) {
+        if (fileAligned && adjustNeeded) {
             //find pend - pend is the biggest ending value, but we must account for when we adjust the start.  That adjustment may make the "new" end larger then the largest end. So lets find out what that "new" end will be.
             ifstream inLocations;
             m->openInputFile(locationsFile, inLocations);
             //find pend - pend is the biggest ending value, but we must account for when we adjust the start.  That adjustment may make the "new" end larger then the largest end. So lets find out what that "new" end will be.
             ifstream inLocations;
             m->openInputFile(locationsFile, inLocations);
@@ -608,7 +608,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
             inLocations.close();
             
             adjustDots(goodFileName, locationsFile, pstart, pend);
             inLocations.close();
             
             adjustDots(goodFileName, locationsFile, pstart, pend);
-        }
+        }else { m->mothurRemove(locationsFile); }
         
         return num;
         
         
         return num;
         
index f26f08a1f541177ae607d60be6aec0964fb19241..2b8468bd2d096f71c6c33289d371829d0a69dcba 100644 (file)
 
 /***********************************************************************/
 
 
 /***********************************************************************/
 
-EstOutput RangeShannon::getValues(vector<SharedRAbundVector*> shared) {
+EstOutput RangeShannon::getValues(SAbundVector* rank){
        try {
         data.resize(3,0);
         
         double commSize = 1e20;
        try {
         data.resize(3,0);
         
         double commSize = 1e20;
+        double sampleSize = rank->getNumSeqs();
         
         
-        SAbundVector sabund1 = shared[0]->getSAbundVector();
-        SAbundVector sabund2 = shared[1]->getSAbundVector();
+        double aux = ceil(pow(sampleSize+1, (1/(double)3)));
+        double est0 = rank->get(1)+1;
+        if (aux > est0) { est0 = aux; } //est0 = max(rank->get(1)+1, aux)
+        
+        vector<double> ests;
+        double numr = 0.0;
+        for (int i = 1; i < rank->getNumBins()-1; i++) {
+            
+            if (m->control_pressed) { break; }
+            
+            int abund = rank->get(i);
+            
+            if (abund != 0) {
+            
+                int abundNext = rank->get(i+1);
+                if (abundNext == 0) {  numr = aux;  }
+                else {
+                    if (abundNext+1 > aux) { numr = abundNext+1; } //numr = max(abundNext+1, aux)
+                    else { numr = aux; }
+                }
+                double denr = aux;
+                if (abund > aux) { denr = abund; } //denr = max(abund, aux)
+                ests.push_back((abund+1)*numr/denr);
+             }
+        }
+        numr = aux;
         
         
-        double sampleSize = 0;
-        for (int i = 0; i < sabund1.getNumBins(); i++) {  sampleSize += (sabund1.get(i) * sabund2.get(i));  }
-        int aux = ceil(pow((sampleSize+1), 0.33333));
                
                if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
                
                
                if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; }
                
index 34b73f45f6b50b8e3fe4d8c6c4c48bd2a6fb8fea..2123e05928c0be0bef9db1f18dbcffae0d3940fe 100644 (file)
@@ -17,8 +17,8 @@ class RangeShannon : public Calculator  {
        
 public:
        RangeShannon() : Calculator("rangeshannon", 3, false) {};
        
 public:
        RangeShannon() : Calculator("rangeshannon", 3, false) {};
-       EstOutput getValues(SAbundVector*) {return data;};
-    EstOutput getValues(vector<SharedRAbundVector*>);
+       EstOutput getValues(SAbundVector*);
+       EstOutput getValues(vector<SharedRAbundVector*>) {return data;};
        string getCitation() { return "http://www.mothur.org/wiki/rangeshannon"; }
 };
 
        string getCitation() { return "http://www.mothur.org/wiki/rangeshannon"; }
 };
 
index 65ccda469c006305c7357310c7f344f0d2d4c326..2c4c2b10b4f1b1585fb7e471d53a81f16dff268f 100644 (file)
 //**********************************************************************************************************************
 vector<string> SRACommand::setParameters(){
        try {
 //**********************************************************************************************************************
 vector<string> SRACommand::setParameters(){
        try {
-        CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(psff);
-        CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(pgroup);
-        CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(poligos);
-        CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfile);
-               CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfastq);
+        CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(psff);
+        CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pgroup);
+        CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(poligos);
+        CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
+               CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
         //choose only one multiple options
         CommandParameter pplatform("platform", "Multiple", "454-???-???", "454", "", "", "","",false,false); parameters.push_back(pplatform);
         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
         //choose only one multiple options
         CommandParameter pplatform("platform", "Multiple", "454-???-???", "454", "", "", "","",false,false); parameters.push_back(pplatform);
         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
@@ -43,9 +43,13 @@ vector<string> SRACommand::setParameters(){
 string SRACommand::getHelpString(){
        try {
                string helpString = "";
 string SRACommand::getHelpString(){
        try {
                string helpString = "";
-               helpString += "The sra command creates a sequence read archive from sff or fastq files.\n";
-               helpString += "The sra command parameters are: sff, fastqfiles, oligos, platform....\n";
-               helpString += "The sffiles parameter is used to provide a file containing a \n";
+               helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
+               helpString += "The sra command parameters are: sff, fastq, file, oligos, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, group.\n";
+        helpString += "The sff parameter is used to provide the original sff file.\n";
+               helpString += "The fastq parameter is used to provide the original fastq file.\n";
+        helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by.\n";
+        helpString += "The group parameter is used to provide the group file to parse your sff or fastq file by.\n";
+               helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files.\n";
         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
                helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
                helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
@@ -66,7 +70,7 @@ string SRACommand::getOutputPattern(string type) {
     try {
         string pattern = "";
         
     try {
         string pattern = "";
         
-        if (type == "sra") {  pattern = "[filename],sra"; }
+        if (type == "xml") {  pattern = "[filename],xml"; }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
@@ -82,7 +86,7 @@ SRACommand::SRACommand(){
                abort = true; calledHelp = true;
                setParameters();
         vector<string> tempOutNames;
                abort = true; calledHelp = true;
                setParameters();
         vector<string> tempOutNames;
-               outputTypes["sra"] = tempOutNames; 
+               outputTypes["xml"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "SRACommand", "SRACommand");
        }
        catch(exception& e) {
                m->errorOut(e, "SRACommand", "SRACommand");
@@ -112,6 +116,8 @@ SRACommand::SRACommand(string option)  {
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
+            vector<string> tempOutNames;
+            outputTypes["xml"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);
@@ -234,8 +240,6 @@ SRACommand::SRACommand(string option)  {
                        m->mothurConvert(temp, tdiffs);
                        
                        if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
                        m->mothurConvert(temp, tdiffs);
                        
                        if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
-            
-
                                
                }
                
                                
                }
                
@@ -253,12 +257,13 @@ int SRACommand::execute(){
         
         //parse files
         vector<string> filesBySample;
         
         //parse files
         vector<string> filesBySample;
+        isSFF = false;
         
         if (file != "")             {       readFile(filesBySample);        }
         else if (sfffile != "")     {       parseSffFile(filesBySample);    }
         else if (fastqfile != "")   {       parseFastqFile(filesBySample);  }
         
         
         if (file != "")             {       readFile(filesBySample);        }
         else if (sfffile != "")     {       parseSffFile(filesBySample);    }
         else if (fastqfile != "")   {       parseFastqFile(filesBySample);  }
         
-        
+        //create xml file
         
                
         //output files created by command
         
                
         //output files created by command
@@ -277,6 +282,27 @@ int SRACommand::execute(){
 //**********************************************************************************************************************
 int SRACommand::readFile(vector<string>& files){
        try {
 //**********************************************************************************************************************
 int SRACommand::readFile(vector<string>& files){
        try {
+        files.clear();
+        
+        ifstream in;
+        m->openInputFile(file, in);
+        
+        while(!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string filename;
+            in >> filename; m->gobble(in);
+            files.push_back(filename);
+        }
+        in.close();
+        
+        if (!m->control_pressed) {
+            if (files.size() > 0) {
+                int pos = files[0].find(".sff");
+                if (pos != string::npos) { isSFF = true; } //these files are sff files
+            }
+        }
         
         return 0;
     }
         
         return 0;
     }
@@ -288,6 +314,7 @@ int SRACommand::readFile(vector<string>& files){
 //**********************************************************************************************************************
 int SRACommand::parseSffFile(vector<string>& files){
        try {
 //**********************************************************************************************************************
 int SRACommand::parseSffFile(vector<string>& files){
        try {
+        isSFF = true;
         //run sffinfo to parse sff file into individual sampled sff files
         string commandString = "sff=" + sfffile;
         if (groupfile != "") { commandString += ", group=" + groupfile; }
         //run sffinfo to parse sff file into individual sampled sff files
         string commandString = "sff=" + sfffile;
         if (groupfile != "") { commandString += ", group=" + groupfile; }
index 7bf764b640534d11b5d40242d210bc77064a00de..3f7f1560530f8a8903db79b7a6f98b474ce50102 100644 (file)
@@ -34,7 +34,7 @@ public:
     void help() { m->mothurOut(getHelpString()); }
     
 private:
     void help() { m->mothurOut(getHelpString()); }
     
 private:
-    bool abort;
+    bool abort, isSFF;
     int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs;
     string sfffile, fastqfile, platform, outputDir, groupfile, file, oligosfile;
     vector<string> outputNames;
     int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs;
     string sfffile, fastqfile, platform, outputDir, groupfile, file, oligosfile;
     vector<string> outputNames;