X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=readdistcommand.cpp;h=0445eec63328e2cf7a646f1cc662bb36fdd3bb98;hb=260ae19c36cb11a53ddc5a75b5e507f8dd8b31d6;hp=719740cf7589e121029197bb46e30bbdd62e6bda;hpb=3c856e629e20261496b0433c2587f27b2c8ba3f6;p=mothur.git diff --git a/readdistcommand.cpp b/readdistcommand.cpp index 719740c..0445eec 100644 --- a/readdistcommand.cpp +++ b/readdistcommand.cpp @@ -12,7 +12,7 @@ #include "readcolumn.h" #include "readmatrix.hpp" -ReadDistCommand::ReadDistCommand(string option){ +ReadDistCommand::ReadDistCommand(string option) { try { globaldata = GlobalData::getInstance(); abort = false; @@ -22,21 +22,63 @@ ReadDistCommand::ReadDistCommand(string option){ else { //valid paramters for this command - string Array[] = {"phylip", "column", "name", "cutoff", "precision", "group"}; + string Array[] = {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); map parameters = parser.getParameters(); ValidParameters validParameter; + map::iterator it; //check to make sure all parameters are valid for command - for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + for (it = parameters.begin(); it != parameters.end(); it++) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } globaldata->newRead(); + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("phylip"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["phylip"] = inputDir + it->second; } + } + + it = parameters.find("column"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["column"] = inputDir + it->second; } + } + + it = parameters.find("name"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["name"] = inputDir + it->second; } + } + + it = parameters.find("group"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["group"] = inputDir + it->second; } + } + } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } + //check for required parameters phylipfile = validParameter.validFile(parameters, "phylip", true); if (phylipfile == "not open") { abort = true; } @@ -66,23 +108,27 @@ ReadDistCommand::ReadDistCommand(string option){ if ((phylipfile != "") && (groupfile != "")) { globaldata->setFormat("matrix"); } - if ((phylipfile == "") && (columnfile == "")) { mothurOut("When executing a read.dist command you must enter a phylip or a column."); mothurOutEndLine(); abort = true; } - else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When executing a read.dist command you must enter ONLY ONE of the following: phylip or column."); mothurOutEndLine(); abort = true; } + if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a read.dist command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; } + else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a read.dist command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; } if (columnfile != "") { if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; } } - + //check for optional parameter and set defaults // ...at some point should added some additional type checking... //get user cutoff and precision or use defaults string temp; - temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } + temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } convert(temp, precision); + temp = validParameter.validFile(parameters, "sim", false); if (temp == "not found") { temp = "F"; } + sim = m->isTrue(temp); + globaldata->sim = sim; + temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; } convert(temp, cutoff); - cutoff += (5 / (precision * 10.0)); + cutoff += (5 / (precision * 10.0)); if (abort == false) { distFileName = globaldata->inputFileName; @@ -92,9 +138,12 @@ ReadDistCommand::ReadDistCommand(string option){ else if (format == "phylip") { read = new ReadPhylipMatrix(distFileName); } else if (format == "matrix") { groupMap = new GroupMap(groupfile); - groupMap->readMap(); - if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; } - globaldata->gGroupmap = groupMap; + int error = groupMap->readMap(); + if (error == 1) { delete groupMap; abort = true; } + else { + if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; } + globaldata->gGroupmap = groupMap; + } } if (format != "matrix" ) { @@ -113,7 +162,7 @@ ReadDistCommand::ReadDistCommand(string option){ } catch(exception& e) { - errorOut(e, "ReadDistCommand", "ReadDistCommand"); + m->errorOut(e, "ReadDistCommand", "ReadDistCommand"); exit(1); } } @@ -121,19 +170,20 @@ ReadDistCommand::ReadDistCommand(string option){ void ReadDistCommand::help(){ try { - mothurOut("The read.dist command parameter options are phylip or column, group, name, cutoff and precision\n"); - mothurOut("The read.dist command can be used in two ways. The first is to read a phylip or column and run the cluster command\n"); - mothurOut("For this use the read.dist command should be in the following format: \n"); - mothurOut("read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) \n"); - mothurOut("The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. \n"); - mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n"); - mothurOut("The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command.\n"); - mothurOut("For this use the read.dist command should be in the following format: \n"); - mothurOut("read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use. \n"); - mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile).\n\n"); + m->mothurOut("The read.dist command parameter options are phylip or column, group, name, sim, cutoff and precision\n"); + m->mothurOut("The read.dist command can be used in two ways. The first is to read a phylip or column and run the cluster command\n"); + m->mothurOut("For this use the read.dist command should be in the following format: \n"); + m->mothurOut("read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) \n"); + m->mothurOut("The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. \n"); + m->mothurOut("The sim parameter is used to indicate that your distance file contains similarity values instead of distance values. The default is false, if sim=true then mothur will convert the similarity values to distances. \n"); + m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n"); + m->mothurOut("The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command.\n"); + m->mothurOut("For this use the read.dist command should be in the following format: \n"); + m->mothurOut("read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use. \n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile).\n\n"); } catch(exception& e) { - errorOut(e, "ReadDistCommand", "help"); + m->errorOut(e, "ReadDistCommand", "help"); exit(1); } } @@ -158,11 +208,50 @@ int ReadDistCommand::execute(){ time_t start = time(NULL); size_t numDists = 0; + vector outputNames; + if (format == "matrix") { ifstream in; - openInputFile(distFileName, in); + m->openInputFile(distFileName, in); matrix = new FullMatrix(in); //reads the matrix file in.close(); + + if (m->control_pressed) { delete groupMap; delete matrix; return 0; } + + //if files don't match... + if (matrix->getNumSeqs() < groupMap->getNumSeqs()) { + m->mothurOut("Your distance file contains " + toString(matrix->getNumSeqs()) + " sequences, and your group file contains " + toString(groupMap->getNumSeqs()) + " sequences."); m->mothurOutEndLine(); + //create new group file + if(outputDir == "") { outputDir += m->hasPath(groupfile); } + + string newGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "editted.groups"; + outputNames.push_back(newGroupFile); + ofstream outGroups; + m->openOutputFile(newGroupFile, outGroups); + + for (int i = 0; i < matrix->getNumSeqs(); i++) { + if (m->control_pressed) { delete groupMap; delete matrix; outGroups.close(); remove(newGroupFile.c_str()); return 0; } + + Names temp = matrix->getRowInfo(i); + outGroups << temp.seqName << '\t' << temp.groupName << endl; + } + outGroups.close(); + + m->mothurOut(newGroupFile + " is a new group file containing only the sequence that are in your distance file. I will read this file instead."); m->mothurOutEndLine(); + + //read new groupfile + delete groupMap; groupMap = NULL; + groupfile = newGroupFile; + globaldata->setGroupFile(groupfile); + + groupMap = new GroupMap(groupfile); + groupMap->readMap(); + + if (m->control_pressed) { delete groupMap; delete matrix; remove(newGroupFile.c_str()); return 0; } + + globaldata->gGroupmap = groupMap; + } + //memory leak prevention if (globaldata->gMatrix != NULL) { delete globaldata->gMatrix; } globaldata->gMatrix = matrix; //save matrix for coverage commands @@ -170,45 +259,32 @@ int ReadDistCommand::execute(){ } else { read->read(nameMap); //to prevent memory leak - + + if (m->control_pressed) { return 0; } + if (globaldata->gListVector != NULL) { delete globaldata->gListVector; } globaldata->gListVector = read->getListVector(); if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix; } globaldata->gSparseMatrix = read->getMatrix(); numDists = globaldata->gSparseMatrix->getNNodes(); - - int lines = cutoff / (1.0/precision); - vector dist_cutoff(lines+1,0); - for (int i = 0; i <= lines;i++) { - dist_cutoff[i] = (i + 0.5) / precision; - } - vector dist_count(lines+1,0); - list::iterator currentCell; - SparseMatrix* smatrix = globaldata->gSparseMatrix; - for (currentCell = smatrix->begin(); currentCell != smatrix->end(); currentCell++) { - for (int i = 0; i <= lines;i++) { - if (currentCell->dist < dist_cutoff[i]) { - dist_count[i]++; - break; - } - } - } + } + + if (m->control_pressed) { return 0; } - // string dist_string = "Dist:"; - // string count_string = "Count: "; - //for (int i = 0; i <= lines;i++) { - //dist_string = dist_string.append("\t").append(toString(dist_cutoff[i])); - // count_string = count_string.append("\t").append(toString(dist_count[i])); - // } - //mothurOut(dist_string); mothurOutEndLine(); mothurOut(count_string); mothurOutEndLine(); + if (outputNames.size() != 0) { + m->mothurOutEndLine(); + m->mothurOut("Output File Name: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); } - mothurOut("It took " + toString(time(NULL) - start) + " secs to read "); mothurOutEndLine(); + + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to read "); m->mothurOutEndLine(); return 0; } catch(exception& e) { - errorOut(e, "ReadDistCommand", "execute"); + m->errorOut(e, "ReadDistCommand", "execute"); exit(1); } }