]> git.donarmstrong.com Git - mothur.git/blobdiff - readdistcommand.cpp
added MPI code, broke up chimera.seqs into 5 separated commands, added parse.sff...
[mothur.git] / readdistcommand.cpp
index 579f848bdb75b10b1350d1eb61b589de8f543ee0..bcecb7844e8a0348180884fbdb54993c286c53ac 100644 (file)
@@ -12,7 +12,7 @@
 #include "readcolumn.h"
 #include "readmatrix.hpp"
 
-ReadDistCommand::ReadDistCommand(string option){
+ReadDistCommand::ReadDistCommand(string option) {
        try {
                globaldata = GlobalData::getInstance();
                abort = false;
@@ -22,21 +22,63 @@ ReadDistCommand::ReadDistCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"phylip", "column", "name", "cutoff", "precision", "group"};
+                       string Array[] =  {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
                        map<string, string> parameters = parser.getParameters();
                        
                        ValidParameters validParameter;
+                       map<string,string>::iterator it;
                
                        //check to make sure all parameters are valid for command
-                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
                        globaldata->newRead();
                        
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("phylip");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("column");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["column"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                       }
+
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
+
                        //check for required parameters
                        phylipfile = validParameter.validFile(parameters, "phylip", true);
                        if (phylipfile == "not open") { abort = true; }
@@ -66,20 +108,24 @@ ReadDistCommand::ReadDistCommand(string option){
                        if ((phylipfile != "") && (groupfile != "")) { 
                        globaldata->setFormat("matrix"); }
                        
-                       if ((phylipfile == "") && (columnfile == "")) { mothurOut("When executing a read.dist command you must enter a phylip or a column."); mothurOutEndLine(); abort = true; }
-                       else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When executing a read.dist command you must enter ONLY ONE of the following: phylip or column."); mothurOutEndLine(); abort = true; }
+                       if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a read.dist command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
+                       else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a read.dist command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
                
                        if (columnfile != "") {
                                if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
                        }
-               
+                       
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
                        //get user cutoff and precision or use defaults
                        string temp;
-                       temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
+                       temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
                        convert(temp, precision); 
                        
+                       temp = validParameter.validFile(parameters, "sim", false);                              if (temp == "not found") { temp = "F"; }
+                       sim = isTrue(temp); 
+                       globaldata->sim = sim;
+                       
                        temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10"; }
                        convert(temp, cutoff); 
                        cutoff += (5 / (precision * 10.0));
@@ -92,9 +138,12 @@ ReadDistCommand::ReadDistCommand(string option){
                                else if (format == "phylip") { read = new ReadPhylipMatrix(distFileName); }
                                else if (format == "matrix") { 
                                        groupMap = new GroupMap(groupfile);
-                                       groupMap->readMap();
-                                       if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap;  }
-                                       globaldata->gGroupmap = groupMap;
+                                       int error = groupMap->readMap();
+                                       if (error == 1) { delete groupMap; abort = true; }
+                                       else {
+                                               if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap;  }
+                                               globaldata->gGroupmap = groupMap;
+                                       }
                                }
                
                                if (format != "matrix" ) {
@@ -113,7 +162,7 @@ ReadDistCommand::ReadDistCommand(string option){
 
        }
        catch(exception& e) {
-               errorOut(e, "ReadDistCommand", "ReadDistCommand");
+               m->errorOut(e, "ReadDistCommand", "ReadDistCommand");
                exit(1);
        }
 }
@@ -121,19 +170,20 @@ ReadDistCommand::ReadDistCommand(string option){
 
 void ReadDistCommand::help(){
        try {
-               mothurOut("The read.dist command parameter options are phylip or column, group, name, cutoff and precision\n");
-               mothurOut("The read.dist command can be used in two ways.  The first is to read a phylip or column and run the cluster command\n");
-               mothurOut("For this use the read.dist command should be in the following format: \n");
-               mothurOut("read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) \n");
-               mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
-               mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
-               mothurOut("The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command.\n");
-               mothurOut("For this use the read.dist command should be in the following format: \n");
-               mothurOut("read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use.  \n");
-               mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile).\n\n");
+               m->mothurOut("The read.dist command parameter options are phylip or column, group, name, sim, cutoff and precision\n");
+               m->mothurOut("The read.dist command can be used in two ways.  The first is to read a phylip or column and run the cluster command\n");
+               m->mothurOut("For this use the read.dist command should be in the following format: \n");
+               m->mothurOut("read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) \n");
+               m->mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
+               m->mothurOut("The sim parameter is used to indicate that your distance file contains similiarity values instead of distance values. The default is false, if sim=true then mothur will convert the similairity values to distances. \n");
+               m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
+               m->mothurOut("The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command.\n");
+               m->mothurOut("For this use the read.dist command should be in the following format: \n");
+               m->mothurOut("read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use.  \n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile).\n\n");
        }
        catch(exception& e) {
-               errorOut(e, "ReadDistCommand", "help");
+               m->errorOut(e, "ReadDistCommand", "help");
                exit(1);
        }
 }
@@ -142,7 +192,10 @@ void ReadDistCommand::help(){
 
 ReadDistCommand::~ReadDistCommand(){
        if (abort == false) {
-               if (format != "matrix") { delete read; delete nameMap; }
+               if (format != "matrix") { 
+                       delete read; 
+                       delete nameMap; 
+               }
        }
 }
 
@@ -155,11 +208,50 @@ int ReadDistCommand::execute(){
                time_t start = time(NULL);
                size_t numDists = 0;
                
+               vector<string> outputNames;
+               
                if (format == "matrix") {
                        ifstream in;
                        openInputFile(distFileName, in);
                        matrix = new FullMatrix(in); //reads the matrix file
                        in.close();
+                       
+                       if (m->control_pressed) { delete groupMap; delete matrix; return 0; }
+                       
+                       //if files don't match...
+                       if (matrix->getNumSeqs() < groupMap->getNumSeqs()) {  
+                               m->mothurOut("Your distance file contains " + toString(matrix->getNumSeqs()) + " sequences, and your group file contains " + toString(groupMap->getNumSeqs()) + " sequences.");  m->mothurOutEndLine();                         
+                               //create new group file
+                               if(outputDir == "") { outputDir += hasPath(groupfile); }
+                               
+                               string newGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "editted.groups";
+                               outputNames.push_back(newGroupFile);
+                               ofstream outGroups;
+                               openOutputFile(newGroupFile, outGroups);
+                               
+                               for (int i = 0; i < matrix->getNumSeqs(); i++) {
+                                       if (m->control_pressed) { delete groupMap; delete matrix; outGroups.close(); remove(newGroupFile.c_str()); return 0; }
+                                       
+                                       Names temp = matrix->getRowInfo(i);
+                                       outGroups << temp.seqName << '\t' << temp.groupName << endl;
+                               }
+                               outGroups.close();
+                               
+                               m->mothurOut(newGroupFile + " is a new group file containing only the sequence that are in your distance file. I will read this file instead."); m->mothurOutEndLine();
+                               
+                               //read new groupfile
+                               delete groupMap; groupMap = NULL;
+                               groupfile = newGroupFile;
+                               globaldata->setGroupFile(groupfile); 
+                               
+                               groupMap = new GroupMap(groupfile);
+                               groupMap->readMap();
+                               
+                               if (m->control_pressed) { delete groupMap; delete matrix; remove(newGroupFile.c_str()); return 0; }
+       
+                               globaldata->gGroupmap = groupMap;
+                       }
+                       
                        //memory leak prevention
                        if (globaldata->gMatrix != NULL) { delete globaldata->gMatrix;  }
                        globaldata->gMatrix = matrix; //save matrix for coverage commands
@@ -167,45 +259,32 @@ int ReadDistCommand::execute(){
                } else {
                        read->read(nameMap);
                        //to prevent memory leak
-
+                       
+                       if (m->control_pressed) {  return 0; }
+               
                        if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
                        globaldata->gListVector = read->getListVector();
 
                        if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix;  }
                        globaldata->gSparseMatrix = read->getMatrix();
                        numDists = globaldata->gSparseMatrix->getNNodes();
-      int lines = cutoff / (1.0/precision);
-      vector<float> dist_cutoff(lines+1,0);
-                       for (int i = 0; i <= lines;i++) {       
-       dist_cutoff[i] = (i + 0.5) / precision; 
-      } 
-      vector<int> dist_count(lines+1,0);
-      list<PCell>::iterator currentCell;
-      SparseMatrix* smatrix = globaldata->gSparseMatrix;
-               for (currentCell = smatrix->begin(); currentCell != smatrix->end(); currentCell++) {
-                               for (int i = 0; i <= lines;i++) {       
-                                       if (currentCell->dist < dist_cutoff[i]) {
-                                               dist_count[i]++;
-            break;
-          }
-        }
-                       }
+               }
+               
+               if (m->control_pressed) {  return 0; }
 
-      string dist_string = "Dist:";
-      string count_string = "Count: ";
-                       for (int i = 0; i <= lines;i++) {       
-       dist_string = dist_string.append("\t").append(toString(dist_cutoff[i]));
-       count_string = count_string.append("\t").append(toString(dist_count[i]));
-                       }
-      mothurOut(dist_string); mothurOutEndLine(); mothurOut(count_string); mothurOutEndLine();
+               if (outputNames.size() != 0) {
+                       m->mothurOutEndLine();
+                       m->mothurOut("Output File Name: "); m->mothurOutEndLine();
+                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+                       m->mothurOutEndLine();
                }
-               mothurOut("It took " + toString(time(NULL) - start) + " secs to read " + toString(numDists) + " distances (cutoff: " + toString(cutoff) + ")"); mothurOutEndLine();
+               
+               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to read "); m->mothurOutEndLine();
                return 0;
                
        }
        catch(exception& e) {
-               errorOut(e, "ReadDistCommand", "execute");
+               m->errorOut(e, "ReadDistCommand", "execute");
                exit(1);
        }
 }