]> git.donarmstrong.com Git - mothur.git/blobdiff - getoturepcommand.cpp
added count file to cluster.classic and cluster.split. modified splitting classes...
[mothur.git] / getoturepcommand.cpp
index 06a633e446d907cb7e6a5a5caa615e901a8c37c7..4967f245fb11c2ff37195a83baf39d29667fc582 100644 (file)
@@ -40,7 +40,7 @@ inline bool compareGroup(repStruct left, repStruct right){
 vector<string> GetOTURepCommand::setParameters(){      
        try {
                CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pfasta);
                CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
                CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
                CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
@@ -68,7 +68,7 @@ vector<string> GetOTURepCommand::setParameters(){
 string GetOTURepCommand::getHelpString(){      
        try {
                string helpString = "";
-               helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, large, weighted, cutoff, precision, groups, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name, unless you have valid current files.\n";
+               helpString += "The get.oturep command parameters are phylip, column, list, fasta, name, group, large, weighted, cutoff, precision, groups, sorted and label.  The list parameter is required, as well as phylip or column and name, unless you have valid current files.\n";
                helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n";
                helpString += "The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n";
                helpString += "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n";
@@ -95,6 +95,27 @@ string GetOTURepCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string GetOTURepCommand::getOutputFileNameTag(string type, string inputName=""){       
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "fasta")            {   outputFileName =  "rep.fasta";   }
+            else if (type == "name")        {   outputFileName =  "rep.names";   }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 GetOTURepCommand::GetOTURepCommand(){  
        try {
                abort = true; calledHelp = true; 
@@ -117,6 +138,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                //allow user to run help
                if (option == "help") { 
                        help(); abort = true; calledHelp = true;
+               }else if(option == "citation") { citation(); abort = true; calledHelp = true;
                } else {
                        vector<string> myArray = setParameters();
                        
@@ -196,12 +218,9 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                        
                        //check for required parameters
                        fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not found") {                                 
-                               fastafile = m->getFastaFile(); 
-                               if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
-                       }
+                       if (fastafile == "not found") { fastafile = ""; }
                        else if (fastafile == "not open") { abort = true; }     
+                       else { m->setFastaFile(fastafile); }
                
                        listfile = validParameter.validFile(parameters, "list", true);
                        if (listfile == "not found") {                  
@@ -210,28 +229,30 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                else {  m->mothurOut("You have no current list file and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }
                        else if (listfile == "not open") { abort = true; }      
+                       else { m->setListFile(listfile); }
                        
                        phylipfile = validParameter.validFile(parameters, "phylip", true);
                        if (phylipfile == "not found") { phylipfile = "";  }
                        else if (phylipfile == "not open") { abort = true; }    
-                       else { distFile = phylipfile; format = "phylip";   }
+                       else { distFile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile);   }
                        
                        columnfile = validParameter.validFile(parameters, "column", true);
                        if (columnfile == "not found") { columnfile = ""; }
                        else if (columnfile == "not open") { abort = true; }    
-                       else { distFile = columnfile; format = "column";   }
+                       else { distFile = columnfile; format = "column";  m->setColumnFile(columnfile); }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
+                       else { m->setNameFile(namefile); }
                        
                        if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
                                //give priority to column, then phylip
                                columnfile = m->getColumnFile(); 
-                               if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
+                               if (columnfile != "") {  distFile = columnfile; format = "column"; m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
                                else { 
                                        phylipfile = m->getPhylipFile(); 
-                                       if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
+                                       if (phylipfile != "") {  distFile = phylipfile; format = "phylip"; m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
                                        else { 
                                                m->mothurOut("No valid current files. You must provide a phylip or column file before you can use the get.oturep command."); m->mothurOutEndLine(); 
                                                abort = true;
@@ -262,7 +283,8 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") { groupfile = ""; abort = true; }
                        else if (groupfile == "not found") { groupfile = ""; }
-                                               
+                       else { m->setGroupFile(groupfile); }
+                       
                        sorted = validParameter.validFile(parameters, "sorted", false);         if (sorted == "not found"){     sorted = "";    }
                        if (sorted == "none") { sorted=""; }
                        if ((sorted != "") && (sorted != "name") && (sorted != "bin") && (sorted != "size") && (sorted != "group")) {
@@ -285,7 +307,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                                        m->splitAtDash(groups, Groups);
                                }
                        }
-                       m->Groups = Groups;
+                       m->setGroups(Groups);
                        
                        string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
                        large = m->isTrue(temp);
@@ -296,10 +318,10 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                        if ((weighted) && (namefile == "")) { m->mothurOut("You cannot set weighted to true unless you provide a namesfile."); m->mothurOutEndLine(); abort = true; }
                        
                        temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
-                       convert(temp, precision); 
+                       m->mothurConvert(temp, precision); 
                        
                        temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10.0"; }
-                       convert(temp, cutoff); 
+                       m->mothurConvert(temp, cutoff); 
                        cutoff += (5 / (precision * 10.0));
                }
        }
@@ -337,16 +359,19 @@ int GetOTURepCommand::execute(){
 
                        list = readMatrix->getListVector();
 
-                       SparseMatrix* matrix = readMatrix->getMatrix();
+                       SparseDistanceMatrix* matrix = readMatrix->getDMatrix();
                        
                        // Create a data structure to quickly access the distance information.
                        // It consists of a vector of distance maps, where each map contains
                        // all distances of a certain sequence. Vector and maps are accessed
                        // via the index of a sequence in the distance matrix
                        seqVec = vector<SeqMap>(list->size()); 
-                       for (MatData currentCell = matrix->begin(); currentCell != matrix->end(); currentCell++) {
-                               if (m->control_pressed) { delete readMatrix; return 0; }
-                               seqVec[currentCell->row][currentCell->column] = currentCell->dist;
+            for (int i = 0; i < matrix->seqVec.size(); i++) {
+                for (int j = 0; j < matrix->seqVec[i].size(); j++) {
+                    if (m->control_pressed) { delete readMatrix; return 0; }
+                    //already added everyone else in row
+                    if (i < matrix->seqVec[i][j].index) {  seqVec[i][matrix->seqVec[i][j].index] = matrix->seqVec[i][j].dist;  }
+                }
                        }
                        //add dummy map for unweighted calc
                        SeqMap dummy;
@@ -389,7 +414,7 @@ int GetOTURepCommand::execute(){
                        //openfile for getMap to use
                        m->openInputFile(distFile, inRow);
                        
-                       if (m->control_pressed) { inRow.close(); remove(distFile.c_str()); return 0; }
+                       if (m->control_pressed) { inRow.close(); m->mothurRemove(distFile); return 0; }
                }
                
                
@@ -412,7 +437,7 @@ int GetOTURepCommand::execute(){
                
                                
                if (m->control_pressed) { 
-                       if (large) {  inRow.close(); remove(distFile.c_str());  }
+                       if (large) {  inRow.close(); m->mothurRemove(distFile);  }
                        return 0; 
                }
                
@@ -424,7 +449,9 @@ int GetOTURepCommand::execute(){
                        
                        if (Groups.size() != 0) {
                                SharedUtil* util = new SharedUtil();
-                               util->setGroups(Groups, groupMap->namesOfGroups, "getoturep");
+                               vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
+                               util->setGroups(Groups, gNamesOfGroups, "getoturep");
+                               groupMap->setNamesOfGroups(gNamesOfGroups);
                                delete util;
                        }
                }
@@ -441,7 +468,7 @@ int GetOTURepCommand::execute(){
                set<string> userLabels = labels;
                
                if (m->control_pressed) { 
-                       if (large) {  inRow.close(); remove(distFile.c_str());  }
+                       if (large) {  inRow.close(); m->mothurRemove(distFile);  }
                        delete input; delete list; return 0; 
                }
                
@@ -455,8 +482,8 @@ int GetOTURepCommand::execute(){
                                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
                                        
                                        if (m->control_pressed) { 
-                                               if (large) {  inRow.close(); remove(distFile.c_str());  }
-                                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
+                                               if (large) {  inRow.close(); m->mothurRemove(distFile);  }
+                                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
                                                delete input; delete list; return 0; 
                                        }
                                        
@@ -474,8 +501,8 @@ int GetOTURepCommand::execute(){
                                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
                                        
                                        if (m->control_pressed) { 
-                                               if (large) {  inRow.close(); remove(distFile.c_str());  }
-                                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
+                                               if (large) {  inRow.close(); m->mothurRemove(distFile);  }
+                                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
                                                delete input; delete list; return 0; 
                                        }
                                        
@@ -514,8 +541,8 @@ int GetOTURepCommand::execute(){
                        if (error == 1) { return 0; } //there is an error in hte input files, abort command
                        
                        if (m->control_pressed) { 
-                                       if (large) {  inRow.close(); remove(distFile.c_str());  }
-                                       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  } outputTypes.clear();
+                                       if (large) {  inRow.close(); m->mothurRemove(distFile);  }
+                                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  } outputTypes.clear();
                                        delete input; delete list; return 0; 
                        }
                }
@@ -523,28 +550,37 @@ int GetOTURepCommand::execute(){
                //close and remove formatted matrix file
                if (large) {
                        inRow.close();
-                       remove(distFile.c_str());
+                       m->mothurRemove(distFile);
                }
                
                delete input;  
                
                if (!weighted) { nameFileMap.clear(); }
                
-               //read fastafile
-               fasta = new FastaMap();
-               fasta->readFastaFile(fastafile);
-               
-               //if user gave a namesfile then use it
-               if (namefile != "") {   readNamesFile();        }
-               
-               //output create and output the .rep.fasta files
-               map<string, string>::iterator itNameFile;
-               for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
-                       processNames(itNameFile->first, itNameFile->second);
+                               
+               if (fastafile != "") {
+                       //read fastafile
+                       fasta = new FastaMap();
+                       fasta->readFastaFile(fastafile);
+                       
+                       //if user gave a namesfile then use it
+                       if (namefile != "") {   readNamesFile();        }
+                       
+                       //output create and output the .rep.fasta files
+                       map<string, string>::iterator itNameFile;
+                       for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
+                               processFastaNames(itNameFile->first, itNameFile->second);
+                       }
+               }else {
+                       //output create and output the .rep.fasta files
+                       map<string, string>::iterator itNameFile;
+                       for (itNameFile = outputNameFiles.begin(); itNameFile != outputNameFiles.end(); itNameFile++) {
+                               processNames(itNameFile->first, itNameFile->second);
+                       }
                }
                
-               delete fasta;
-               if (groupfile != "") { delete groupMap;  }
+                               
+               if (groupfile != "") { delete groupMap; }
                
                if (m->control_pressed) {  return 0; }
                
@@ -759,7 +795,7 @@ int GetOTURepCommand::process(ListVector* processList) {
                map<string, ofstream*> filehandles;
                
                if (Groups.size() == 0) { //you don't want to use groups
-                       outputNamesFile  = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + ".rep.names";
+                       outputNamesFile  = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + getOutputFileNameTag("name");
                        m->openOutputFile(outputNamesFile, newNamesOutput);
                        outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile); 
                        outputNameFiles[outputNamesFile] = processList->getLabel();
@@ -768,7 +804,7 @@ int GetOTURepCommand::process(ListVector* processList) {
                        for (int i=0; i<Groups.size(); i++) {
                                temp = new ofstream;
                                filehandles[Groups[i]] = temp;
-                               outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + ".rep.names";
+                               outputNamesFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." + Groups[i] + "." + getOutputFileNameTag("name");
                                
                                m->openOutputFile(outputNamesFile, *(temp));
                                outputNames.push_back(outputNamesFile); outputTypes["name"].push_back(outputNamesFile);
@@ -851,12 +887,12 @@ int GetOTURepCommand::process(ListVector* processList) {
        }
 }
 //**********************************************************************************************************************
-int GetOTURepCommand::processNames(string filename, string label) {
+int GetOTURepCommand::processFastaNames(string filename, string label) {
        try{
 
                //create output file
                if (outputDir == "") { outputDir += m->hasPath(listfile); }
-               string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + label + ".rep.fasta";
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + label + "." + getOutputFileNameTag("fasta");
                m->openOutputFile(outputFileName, out);
                vector<repStruct> reps;
                outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
@@ -949,12 +985,46 @@ int GetOTURepCommand::processNames(string filename, string label) {
                out.close();
                out2.close();
                
-               remove(filename.c_str());
+               m->mothurRemove(filename);
                rename(tempNameFile.c_str(), filename.c_str());
                
                return 0;
 
        }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "processFastaNames");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetOTURepCommand::processNames(string filename, string label) {
+       try{
+               
+               //create output file
+               if (outputDir == "") { outputDir += m->hasPath(listfile); }
+               
+               ofstream out2;
+               string tempNameFile = filename + ".temp";
+               m->openOutputFile(tempNameFile, out2);
+               
+               ifstream in;
+               m->openInputFile(filename, in);
+               
+               int i = 0;
+               string rep, binnames;
+               while (!in.eof()) {
+                       if (m->control_pressed) { break; }
+                       in >> i >> rep >> binnames; m->gobble(in);
+                       out2 << rep << '\t' << binnames << endl;
+               }
+               in.close();
+               out2.close();
+               
+               m->mothurRemove(filename);
+               rename(tempNameFile.c_str(), filename.c_str());
+               
+               return 0;
+       }
        catch(exception& e) {
                m->errorOut(e, "GetOTURepCommand", "processNames");
                exit(1);