]> git.donarmstrong.com Git - mothur.git/blobdiff - indicatorcommand.cpp
added classify.shared command and random forest files. added count file to pcr.seqs...
[mothur.git] / indicatorcommand.cpp
index 905076b7dcd56b62102a3dbdbcef5de9024d0fef..dc9f121a0e83758d4ecf4977294e8221b3da0712 100644 (file)
@@ -42,7 +42,7 @@ string IndicatorCommand::getHelpString(){
                helpString += "The indicator command outputs a .indicator.summary file and a .indicator.tre if a tree is given. \n";
                helpString += "The new tree contains labels at each internal node.  The label is the node number so you can relate the tree to the summary file.\n";
                helpString += "The summary file lists the indicator value for each OTU for each node.\n";
-               helpString += "The indicator command parameters are tree, groups, shared, relabund, design and label. The tree parameter is required as well as either shared or relabund.\n";
+               helpString += "The indicator command parameters are tree, groups, shared, relabund, design and label. \n";
                helpString += "The design parameter allows you to relate the tree to the shared or relabund file, if your tree contains the grouping names, or if no tree is provided to group your groups into groupings.\n";                  
                helpString += "The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed, or if you provide a design file the groups in your design file.  The groups may be entered separated by dashes.\n";
                helpString += "The label parameter indicates at what distance your tree relates to the shared or relabund.\n";
@@ -57,7 +57,27 @@ string IndicatorCommand::getHelpString(){
                exit(1);
        }
 }
-
+//**********************************************************************************************************************
+string IndicatorCommand::getOutputFileNameTag(string type, string inputName=""){       
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "tree")       {   outputFileName =  "indicator.tre";   }
+            else if (type == "summary")    {   outputFileName =  "indicator.summary";   }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "IndicatorCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 IndicatorCommand::IndicatorCommand(){  
        try {
@@ -96,10 +116,9 @@ IndicatorCommand::IndicatorCommand(string option)  {
                        }
                        
                        m->runParse = true;
-                       m->Groups.clear();
-                       m->namesOfGroups.clear();
+                       m->clearGroups();
+                       m->clearAllGroups();
                        m->Treenames.clear();
-                       m->names.clear();
                        
                        vector<string> tempOutNames;
                        outputTypes["tree"] = tempOutNames;
@@ -169,17 +188,17 @@ IndicatorCommand::IndicatorCommand(string option)  {
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = "";  Groups.push_back("all"); }
                        else { m->splitAtDash(groups, Groups);  }                       
-                       m->Groups = Groups;
+                       m->setGroups(Groups);
                        
                        label = validParameter.validFile(parameters, "label", false);                   
                        if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
                        
                        string temp = validParameter.validFile(parameters, "iters", false);             if (temp == "not found") { temp = "1000"; }
-                       convert(temp, iters); 
+                       m->mothurConvert(temp, iters); 
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors); 
+                       m->mothurConvert(temp, processors); 
                        
                        if ((relabundfile == "") && (sharedfile == "")) { 
                                //is there are current file available for either of these?
@@ -236,12 +255,13 @@ int IndicatorCommand::execute(){
                        designMap->readDesignMap();
                        
                        //fill Groups - checks for "all" and for any typo groups
-                       SharedUtil* util = new SharedUtil();
-                       util->setGroups(Groups, designMap->namesOfGroups);
-                       delete util;
+                       SharedUtil util;
+                       vector<string> nameGroups = designMap->getNamesOfGroups();
+                       util.setGroups(Groups, nameGroups);
+                       designMap->setNamesOfGroups(nameGroups);
                        
-                       //loop through the Groups and fill Globaldata's Groups with the design file info
-                       m->Groups = designMap->getNamesSeqs(Groups);
+                       vector<string> namesSeqs = designMap->getNamesSeqs(Groups);
+                       m->setGroups(namesSeqs);
                }
        
                /***************************************************/
@@ -258,7 +278,7 @@ int IndicatorCommand::execute(){
                }
                
                //reset groups if needed
-               if (designfile != "") { m->Groups = Groups; }
+               if (designfile != "") { m->setGroups(Groups); }
                        
                /***************************************************/
                //    reading tree info                                                    //
@@ -267,30 +287,36 @@ int IndicatorCommand::execute(){
                        string groupfile = ""; 
                        m->setTreeFile(treefile);
                        Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
-                       treeMap = new TreeMap();
+                       ct = new CountTable();
                        bool mismatch = false;
-                               
-                       for (int i = 0; i < m->Treenames.size(); i++) { 
-                               //sanity check - is this a group that is not in the sharedfile?
+            
+            set<string> nameMap;
+            map<string, string> groupMap;
+            set<string> gps;
+            for (int i = 0; i < m->Treenames.size(); i++) { 
+                nameMap.insert(m->Treenames[i]); 
+                //sanity check - is this a group that is not in the sharedfile?
                                if (designfile == "") {
-                                       if (!(m->inUsersGroups(m->Treenames[i], m->namesOfGroups))) {
+                    if (i == 0) { gps.insert("Group1"); }
+                                       if (!(m->inUsersGroups(m->Treenames[i], m->getAllGroups()))) {
                                                m->mothurOut("[ERROR]: " + m->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine();
                                                mismatch = true;
                                        }
-                                       treeMap->addSeq(m->Treenames[i], "Group1")
+                                       groupMap[m->Treenames[i]] = "Group1"
                                }else{
                                        vector<string> myGroups; myGroups.push_back(m->Treenames[i]);
                                        vector<string> myNames = designMap->getNamesSeqs(myGroups);
                                        
                                        for(int k = 0; k < myNames.size(); k++) {
-                                               if (!(m->inUsersGroups(myNames[k], m->namesOfGroups))) {
+                                               if (!(m->inUsersGroups(myNames[k], m->getAllGroups()))) {
                                                        m->mothurOut("[ERROR]: " + myNames[k] + " is not a group in your shared or relabund file."); m->mothurOutEndLine();
                                                        mismatch = true;
                                                }
                                        }
-                                       treeMap->addSeq(m->Treenames[i], "Group1");
+                                       groupMap[m->Treenames[i]] = "Group1";
                                }
-                       }
+            }
+            ct->createTable(nameMap, groupMap, gps);
                        
                        if ((designfile != "") && (m->Treenames.size() != Groups.size())) { cout << Groups.size() << '\t' << m->Treenames.size() << endl; m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; }
                                        
@@ -298,14 +324,14 @@ int IndicatorCommand::execute(){
                                if (designfile != "") { delete designMap; }
                                if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
                                else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
-                               delete treeMap;
+                               delete ct;
                                return 0;
                        }
                 
                        read = new ReadNewickTree(treefile);
-                       int readOk = read->read(treeMap); 
+                       int readOk = read->read(ct); 
                        
-                       if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete treeMap; delete read; return 0; }
+                       if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete ct; delete read; return 0; }
                        
                        vector<Tree*> T = read->getTrees();
                        
@@ -315,35 +341,34 @@ int IndicatorCommand::execute(){
                                if (designfile != "") { delete designMap; }
                                if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
                                else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
-                               for (int i = 0; i < T.size(); i++) {  delete T[i];  }  delete treeMap; return 0; 
+                               for (int i = 0; i < T.size(); i++) {  delete T[i];  }  delete ct; return 0; 
                        }
-                               
+            
                        T[0]->assembleTree();
                                        
                        /***************************************************/
                        //    create ouptut tree - respecting pickedGroups //
                        /***************************************************/
-                       Tree* outputTree = new Tree(m->Groups.size(), treeMap); 
+                       Tree* outputTree = new Tree(m->getNumGroups(), ct); 
                        
-                       outputTree->getSubTree(T[0], m->Groups);
+                       outputTree->getSubTree(T[0], m->getGroups());
                        outputTree->assembleTree();
                                
                        //no longer need original tree, we have output tree to use and label
                        for (int i = 0; i < T.size(); i++) {  delete T[i];  } 
                        
-                                       
                        if (m->control_pressed) { 
                                if (designfile != "") { delete designMap; }
                                if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
                                else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
-                               delete outputTree; delete treeMap;  return 0; 
+                               delete outputTree; delete ct;  return 0; 
                        }
                        
                        /***************************************************/
                        //              get indicator species values                       //
                        /***************************************************/
                        GetIndicatorSpecies(outputTree);
-                       delete outputTree; delete treeMap;
+                       delete outputTree; delete ct;
                        
                }else { //run with design file only
                        //get indicator species
@@ -385,7 +410,7 @@ int IndicatorCommand::GetIndicatorSpecies(){
        try {
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(inputFileName);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary";
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + getOutputFileNameTag("summary");
                outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
                
                ofstream out;
@@ -413,11 +438,11 @@ int IndicatorCommand::GetIndicatorSpecies(){
                        vector<SharedRAbundVector*> subset;
                        
                        //for each grouping
-                       for (int i = 0; i < designMap->namesOfGroups.size(); i++) {
+                       for (int i = 0; i < (designMap->getNamesOfGroups()).size(); i++) {
                                
                                for (int k = 0; k < lookup.size(); k++) {
                                        //are you from this grouping?
-                                       if (designMap->getGroup(lookup[k]->getGroup()) == designMap->namesOfGroups[i]) {
+                                       if (designMap->getGroup(lookup[k]->getGroup()) == (designMap->getNamesOfGroups())[i]) {
                                                subset.push_back(lookup[k]);
                                                groupsAlreadyAdded.insert(lookup[k]->getGroup());
                                        }
@@ -437,10 +462,10 @@ int IndicatorCommand::GetIndicatorSpecies(){
                        vector<SharedRAbundFloatVector*> subset;
                        
                        //for each grouping
-                       for (int i = 0; i < designMap->namesOfGroups.size(); i++) {
+                       for (int i = 0; i < (designMap->getNamesOfGroups()).size(); i++) {
                                for (int k = 0; k < lookupFloat.size(); k++) {
                                        //are you from this grouping?
-                                       if (designMap->getGroup(lookupFloat[k]->getGroup()) == designMap->namesOfGroups[i]) {
+                                       if (designMap->getGroup(lookupFloat[k]->getGroup()) == (designMap->getNamesOfGroups())[i]) {
                                                subset.push_back(lookupFloat[k]);
                                                groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
                                        }
@@ -467,17 +492,17 @@ int IndicatorCommand::GetIndicatorSpecies(){
                                
                        if (m->control_pressed) { out.close(); return 0; }
                        
-                       out << (j+1) << '\t' << indicatorValues[j] << '\t'; 
+                       out << m->currentBinLabels[j] << '\t' << indicatorValues[j] << '\t'; 
                        
                        if (pValues[j] > (1/(float)iters)) { out << pValues[j] << endl; } 
                        else { out << "<" << (1/(float)iters) << endl; }
                        
                        if (pValues[j] <= 0.05) {
-                               cout << "OTU" << j+1 << '\t' << indicatorValues[j]  << '\t';
+                               cout << m->currentBinLabels[j] << '\t' << indicatorValues[j]  << '\t';
                                string pValueString = "<" + toString((1/(float)iters)); 
                                if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];} 
                                else { cout << "<" << (1/(float)iters); }
-                               m->mothurOutJustToLog("OTU" + toString(j+1) + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); 
+                               m->mothurOutJustToLog(m->currentBinLabels[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); 
                                m->mothurOutEndLine(); 
                        }
                }
@@ -500,7 +525,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(inputFileName);  }
-               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary";
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + getOutputFileNameTag("summary");
                outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
                
                ofstream out;
@@ -513,14 +538,14 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                
                //print headings
                out << "TreeNode\t";
-               for (int i = 0; i < numBins; i++) { out << "OTU" << (i+1) << "_IndValue" << '\t' << "pValue" << '\t'; }
+               for (int i = 0; i < numBins; i++) { out << m->currentBinLabels[i] << "_IndValue" << '\t' << "pValue" << '\t'; }
                out << endl;
                
                m->mothurOutEndLine(); m->mothurOut("Node\tSpecies\tIndicatorValue\tpValue\n");
                
                string treeOutputDir = outputDir;
                if (outputDir == "") {  treeOutputDir += m->hasPath(treefile);  }
-               string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "indicator.tre";
+               string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("tree");
                
                
                //create a map from tree node index to names of descendants, save time later to know which sharedRabund you need
@@ -670,11 +695,11 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                                }
                                
                                if (pValues[j] <= 0.05) {
-                                       cout << i+1 << "\tOTU" << j+1 << '\t' << indicatorValues[j]  << '\t';
+                                       cout << i+1 << '\t' << m->currentBinLabels[j] << '\t' << indicatorValues[j]  << '\t';
                                        string pValueString = "<" + toString((1/(float)iters)); 
                                        if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];} 
                                        else { cout << "<" << (1/(float)iters); }
-                                       m->mothurOutJustToLog(toString(i) + "\tOTU" + toString(j+1) + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); 
+                                       m->mothurOutJustToLog(toString(i) + "\t" + m->currentBinLabels[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); 
                                        m->mothurOutEndLine(); 
                                }
                        }
@@ -1117,7 +1142,7 @@ vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundFloatVecto
        try {
                vector<float> pvalues;
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                if(processors == 1){
                        pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
                        for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
@@ -1231,7 +1256,7 @@ vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundVector*> >
        try {
                vector<float> pvalues;
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                if(processors == 1){
                        pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
                        for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }