]> git.donarmstrong.com Git - mothur.git/blobdiff - normalizesharedcommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / normalizesharedcommand.cpp
index a4bdd47099c350e77cfe238bcf34e254d1db9341..acd8208ea7ebb20a3d3810d254d0b24f5386d4d3 100644 (file)
 //**********************************************************************************************************************
 vector<string> NormalizeSharedCommand::setParameters(){        
        try {
-               CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(pshared);    
-               CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none",false,false); parameters.push_back(prelabund);
-               CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
-               CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "",false,false); parameters.push_back(pmethod);
-               CommandParameter pnorm("norm", "Number", "", "0", "", "", "",false,false); parameters.push_back(pnorm);
-               CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pmakerelabund);
-               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pshared("shared", "InputTypes", "", "", "LRSS", "LRSS", "none","shared",false,false,true); parameters.push_back(pshared);      
+               CommandParameter prelabund("relabund", "InputTypes", "", "", "LRSS", "LRSS", "none","shared",false,false,true); parameters.push_back(prelabund);
+               CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter pmethod("method", "Multiple", "totalgroup-zscore", "totalgroup", "", "", "","",false,false,true); parameters.push_back(pmethod);
+               CommandParameter pnorm("norm", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pnorm);
+               CommandParameter pmakerelabund("makerelabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pmakerelabund);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -53,8 +53,21 @@ string NormalizeSharedCommand::getHelpString(){
                exit(1);
        }
 }
-
-
+//**********************************************************************************************************************
+string NormalizeSharedCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "shared") {  pattern = "[filename],[distance],norm.shared"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "NormalizeSharedCommand", "getOutputPattern");
+        exit(1);
+    }
+}
 //**********************************************************************************************************************
 NormalizeSharedCommand::NormalizeSharedCommand(){      
        try {
@@ -166,7 +179,7 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) {
                        else { 
                                pickedGroups = true;
                                m->splitAtDash(groups, Groups);
-                               m->Groups = Groups;
+                               m->setGroups(Groups);
                        }
                        
                        method = validParameter.validFile(parameters, "method", false);                         if (method == "not found") { method = "totalgroup"; }
@@ -176,7 +189,7 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) {
                        if (temp == "not found") {  
                                norm = 0;  //once you have read, set norm to smallest group number
                        }else { 
-                               convert(temp, norm);
+                               m->mothurConvert(temp, norm);
                                if (norm < 0) { m->mothurOut("norm must be positive."); m->mothurOutEndLine(); abort=true; }
                        }
                        
@@ -197,10 +210,6 @@ int NormalizeSharedCommand::execute(){
        
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
-               string outputFileName = outputDir + m->getRootName(m->getSimpleName(inputfile)) + "norm.shared";
-               ofstream out;
-               m->openOutputFile(outputFileName, out);
-               
                input = new InputData(inputfile, format);
                
                //you are reading a sharedfile and you do not want to make relabund
@@ -210,18 +219,20 @@ int NormalizeSharedCommand::execute(){
                        
                        //look for groups whose numseqs is below norm and remove them, warning the user
                        if (norm != 0) { 
-                               m->Groups.clear();
+                               m->clearGroups();
+                               vector<string> mGroups;
                                vector<SharedRAbundVector*> temp;
                                for (int i = 0; i < lookup.size(); i++) {
                                        if (lookup[i]->getNumSeqs() < norm) { 
                                                m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
                                                delete lookup[i];
                                        }else { 
-                                               m->Groups.push_back(lookup[i]->getGroup()); 
+                                               mGroups.push_back(lookup[i]->getGroup()); 
                                                temp.push_back(lookup[i]);
                                        }
                                } 
                                lookup = temp;
+                               m->setGroups(mGroups);
                        }
                        
                        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
@@ -244,13 +255,12 @@ int NormalizeSharedCommand::execute(){
                        //as long as you are not at the end of the file or done wih the lines you want
                        while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                                
-                               if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->Groups.clear();   out.close(); remove(outputFileName.c_str()); return 0; }
+                               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear();  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups();   return 0; }
                                
                                if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
                                        
                                        m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                                       if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
-                                       normalize(lookup, out);
+                                       normalize(lookup);
                                        
                                        processedLabels.insert(lookup[0]->getLabel());
                                        userLabels.erase(lookup[0]->getLabel());
@@ -262,8 +272,8 @@ int NormalizeSharedCommand::execute(){
                                        for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
                                        lookup = input->getSharedRAbundVectors(lastLabel);
                                        m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                                       if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
-                                       normalize(lookup, out);
+                                       
+                                       normalize(lookup);
                                        
                                        processedLabels.insert(lookup[0]->getLabel());
                                        userLabels.erase(lookup[0]->getLabel());
@@ -276,13 +286,13 @@ int NormalizeSharedCommand::execute(){
                                //prevent memory leak
                                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
                                
-                               if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str()); return 0; }
+                               if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();  return 0; }
                                
                                //get next line to process
                                lookup = input->getSharedRAbundVectors();                               
                        }
                        
-                       if (m->control_pressed) { outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str());  return 0; }
+                       if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
                        
                        //output error messages about any remaining user labels
                        set<string>::iterator it;
@@ -303,8 +313,8 @@ int NormalizeSharedCommand::execute(){
                                lookup = input->getSharedRAbundVectors(lastLabel);
                                
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                               if (!m->printedHeaders) { lookup[0]->printHeaders(out); }
-                               normalize(lookup, out);
+                               
+                               normalize(lookup);
                                
                                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                        }
@@ -319,18 +329,20 @@ int NormalizeSharedCommand::execute(){
                        
                        //look for groups whose numseqs is below norm and remove them, warning the user
                        if (norm != 0) { 
-                               m->Groups.clear();
+                               m->clearGroups();
+                               vector<string> mGroups;
                                vector<SharedRAbundFloatVector*> temp;
                                for (int i = 0; i < lookupFloat.size(); i++) {
                                        if (lookupFloat[i]->getNumSeqs() < norm) { 
                                                m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
                                                delete lookupFloat[i];
                                        }else { 
-                                               m->Groups.push_back(lookupFloat[i]->getGroup()); 
+                                               mGroups.push_back(lookupFloat[i]->getGroup()); 
                                                temp.push_back(lookupFloat[i]);
                                        }
                                } 
                                lookupFloat = temp;
+                               m->setGroups(mGroups);
                        }
                        
                        //set norm to smallest group number
@@ -348,14 +360,13 @@ int NormalizeSharedCommand::execute(){
                        //as long as you are not at the end of the file or done wih the lines you want
                        while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                                
-                               if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } m->Groups.clear();   out.close(); remove(outputFileName.c_str()); return 0; }
+                               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear();  for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } m->clearGroups();  return 0; }
                                
                                if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){                     
                                        
                                        m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
                                        
-                                       if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); }
-                                       normalize(lookupFloat, out);
+                                       normalize(lookupFloat);
                                        
                                        processedLabels.insert(lookupFloat[0]->getLabel());
                                        userLabels.erase(lookupFloat[0]->getLabel());
@@ -368,8 +379,8 @@ int NormalizeSharedCommand::execute(){
                                        lookupFloat = input->getSharedRAbundFloatVectors(lastLabel);
                                        
                                        m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
-                                       if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); }
-                                       normalize(lookupFloat, out);
+               
+                                       normalize(lookupFloat);
                                        
                                        processedLabels.insert(lookupFloat[0]->getLabel());
                                        userLabels.erase(lookupFloat[0]->getLabel());
@@ -382,13 +393,13 @@ int NormalizeSharedCommand::execute(){
                                //prevent memory leak
                                for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i]; lookupFloat[i] = NULL; }
                                
-                               if (m->control_pressed) {  outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str()); return 0; }
+                               if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
                                
                                //get next line to process
                                lookupFloat = input->getSharedRAbundFloatVectors();                             
                        }
                        
-                       if (m->control_pressed) { outputTypes.clear(); m->Groups.clear();  out.close(); remove(outputFileName.c_str());  return 0; }
+                       if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); m->clearGroups();   return 0; }
                        
                        //output error messages about any remaining user labels
                        set<string>::iterator it;
@@ -410,23 +421,22 @@ int NormalizeSharedCommand::execute(){
                                
                                m->mothurOut(lookupFloat[0]->getLabel()); m->mothurOutEndLine();
                                
-                               if (!m->printedHeaders) { lookupFloat[0]->printHeaders(out); }
-                               normalize(lookupFloat, out);
+                               normalize(lookupFloat);
                                
                                for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
                        }
                        
                }
                //reset groups parameter
-               m->Groups.clear();  
+               m->clearGroups();  
                delete input;
-               out.close();
                
-               if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } outputTypes.clear(); return 0;}
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
+               //m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
                m->mothurOutEndLine();
                
                //set shared file as new current sharedfile
@@ -445,21 +455,37 @@ int NormalizeSharedCommand::execute(){
 }
 //**********************************************************************************************************************
 
-int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
+int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp){
        try {
+               //save mothurOut's binLabels to restore for next label
+               vector<string> saveBinLabels = m->currentBinLabels;
+               
                if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
+               
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
+        variables["[distance]"] = thisLookUp[0]->getLabel();
+               string outputFileName = getOutputFileName("shared",variables);
+        
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
                                
                if (method == "totalgroup") { 
+                       
+                       //save numSeqs since they will change as the data is normalized
+                       vector<int> sizes;
+                       for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
                                        
                        for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
                                                
                                        for (int i = 0; i < thisLookUp.size(); i++) {
                                                        
-                                               if (m->control_pressed) { return 0; }
+                                               if (m->control_pressed) { out.close(); return 0; }
                                                        
                                                int abund = thisLookUp[i]->getAbundance(j);
                                                        
-                                               float relabund = abund / (float) thisLookUp[i]->getNumSeqs();
+                                               float relabund = abund / (float) sizes[i];
                                                float newNorm = relabund * norm;
                                                
                                                //round to nearest int
@@ -473,7 +499,7 @@ int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, o
                        
                        for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
                                
-                               if (m->control_pressed) { return 0; }
+                               if (m->control_pressed) { out.close(); return 0; }
                                
                                //calc mean
                                float mean = 0.0;
@@ -504,12 +530,18 @@ int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, o
                                
                                                
                eliminateZeroOTUS(thisLookUp);
+               
+               thisLookUp[0]->printHeaders(out); 
                 
                for (int i = 0; i < thisLookUp.size(); i++) {
                        out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
                        thisLookUp[i]->print(out);
                }
                
+               out.close();
+               
+               m->currentBinLabels = saveBinLabels;
+               
                return 0;
        }
        catch(exception& e) {
@@ -519,21 +551,38 @@ int NormalizeSharedCommand::normalize(vector<SharedRAbundVector*>& thisLookUp, o
 }
 //**********************************************************************************************************************
 
-int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLookUp, ofstream& out){
+int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLookUp){
        try {
+               
+               //save mothurOut's binLabels to restore for next label
+               vector<string> saveBinLabels = m->currentBinLabels;
+               
+        map<string, string> variables; 
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
+        variables["[distance]"] = thisLookUp[0]->getLabel();
+               string outputFileName = getOutputFileName("shared",variables);
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               outputNames.push_back(outputFileName); outputTypes["shared"].push_back(outputFileName);
+               
+               
                if (pickedGroups) { eliminateZeroOTUS(thisLookUp); }
                
                if (method == "totalgroup") { 
                        
+                       //save numSeqs since they will change as the data is normalized
+                       vector<float> sizes;
+                       for (int i = 0; i < thisLookUp.size(); i++) {  sizes.push_back(thisLookUp[i]->getNumSeqs()); }
+                       
                        for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
                                
                                for (int i = 0; i < thisLookUp.size(); i++) {
                                        
-                                       if (m->control_pressed) { return 0; }
+                                       if (m->control_pressed) { out.close(); return 0; }
                                        
                                        float abund = thisLookUp[i]->getAbundance(j);
                                        
-                                       float relabund = abund / (float) thisLookUp[i]->getNumSeqs();
+                                       float relabund = abund / (float) sizes[i];
                                        float newNorm = relabund * norm;
                                        
                                        thisLookUp[i]->set(j, newNorm, thisLookUp[i]->getGroup());
@@ -543,7 +592,7 @@ int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLook
                }else if (method == "zscore") {
                        for (int j = 0; j < thisLookUp[0]->getNumBins(); j++) {
                                
-                               if (m->control_pressed) { return 0; }
+                               if (m->control_pressed) { out.close(); return 0; }
                                
                                //calc mean
                                float mean = 0.0;
@@ -571,11 +620,17 @@ int NormalizeSharedCommand::normalize(vector<SharedRAbundFloatVector*>& thisLook
                
                eliminateZeroOTUS(thisLookUp);
                
+               thisLookUp[0]->printHeaders(out); 
+               
                for (int i = 0; i < thisLookUp.size(); i++) {
                        out << thisLookUp[i]->getLabel() << '\t' << thisLookUp[i]->getGroup() << '\t';
                        thisLookUp[i]->print(out);
                }
                
+               out.close();
+               
+               m->currentBinLabels = saveBinLabels;
+               
                return 0;
        }
        catch(exception& e) {
@@ -596,6 +651,8 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thisl
                }
                
                //for each bin
+               vector<string> newBinLabels;
+               string snumBins = toString(thislookup[0]->getNumBins());
                for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
                        if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
                
@@ -610,12 +667,24 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thisl
                                for (int j = 0; j < thislookup.size(); j++) {
                                        newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
                                }
+                               //if there is a bin label use it otherwise make one
+                               string binLabel = "Otu";
+                               string sbinNumber = toString(i+1);
+                               if (sbinNumber.length() < snumBins.length()) { 
+                                       int diff = snumBins.length() - sbinNumber.length();
+                                       for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                               }
+                               binLabel += sbinNumber; 
+                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               
+                               newBinLabels.push_back(binLabel);
                        }
                }
 
                for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
 
                thislookup = newLookup;
+               m->currentBinLabels = newBinLabels;
                
                return 0;
  
@@ -638,6 +707,8 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>&
                }
                
                //for each bin
+               vector<string> newBinLabels;
+               string snumBins = toString(thislookup[0]->getNumBins());
                for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
                        if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
                        
@@ -652,12 +723,24 @@ int NormalizeSharedCommand::eliminateZeroOTUS(vector<SharedRAbundFloatVector*>&
                                for (int j = 0; j < thislookup.size(); j++) {
                                        newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
                                }
+                               //if there is a bin label use it otherwise make one
+                               string binLabel = "Otu";
+                               string sbinNumber = toString(i+1);
+                               if (sbinNumber.length() < snumBins.length()) { 
+                                       int diff = snumBins.length() - sbinNumber.length();
+                                       for (int h = 0; h < diff; h++) { binLabel += "0"; }
+                               }
+                               binLabel += sbinNumber; 
+                               if (i < m->currentBinLabels.size()) {  binLabel = m->currentBinLabels[i]; }
+                               
+                               newBinLabels.push_back(binLabel);
                        }
                }
                
                for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
                
                thislookup = newLookup;
+               m->currentBinLabels = newBinLabels;
                
                return 0;