#include "readcolumn.h"
#include "readmatrix.hpp"
-ReadDistCommand::ReadDistCommand(string option){
+ReadDistCommand::ReadDistCommand(string option) {
try {
globaldata = GlobalData::getInstance();
abort = false;
else {
//valid paramters for this command
- string Array[] = {"phylip", "column", "name", "cutoff", "precision", "group"};
+ string Array[] = {"phylip", "column", "name", "cutoff","hard", "precision", "group","outputdir","inputdir","sim"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
map<string, string> parameters = parser.getParameters();
ValidParameters validParameter;
+ map<string,string>::iterator it;
//check to make sure all parameters are valid for command
- for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ for (it = parameters.begin(); it != parameters.end(); it++) {
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
globaldata->newRead();
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("phylip");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["phylip"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("column");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["column"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("name");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["name"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("group");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["group"] = inputDir + it->second; }
+ }
+ }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
//check for required parameters
phylipfile = validParameter.validFile(parameters, "phylip", true);
if (phylipfile == "not open") { abort = true; }
if (namefile == "not open") { abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { globaldata->setNameFile(namefile); }
-
//you are doing a list and group shared
if ((phylipfile != "") && (groupfile != "")) {
globaldata->setFormat("matrix"); }
- if ((phylipfile == "") && (columnfile == "")) { cout << "When executing a read.dist command you must enter a phylip or a column." << endl; abort = true; }
- else if ((phylipfile != "") && (columnfile != "")) { cout << "When executing a read.dist command you must enter ONLY ONE of the following: phylip or column." << endl; abort = true; }
+ if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a read.dist command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
+ else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a read.dist command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
if (columnfile != "") {
if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
}
-
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
//get user cutoff and precision or use defaults
string temp;
- temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
+ temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
convert(temp, precision);
+ temp = validParameter.validFile(parameters, "sim", false); if (temp == "not found") { temp = "F"; }
+ sim = isTrue(temp);
+ globaldata->sim = sim;
+
+ temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; }
+ hard = isTrue(temp);
+
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; }
convert(temp, cutoff);
- cutoff += (5 / (precision * 10.0));
+ if (!hard) { cutoff += (5 / (precision * 10.0)); }
if (abort == false) {
distFileName = globaldata->inputFileName;
else if (format == "phylip") { read = new ReadPhylipMatrix(distFileName); }
else if (format == "matrix") {
groupMap = new GroupMap(groupfile);
- groupMap->readMap();
- if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; }
- globaldata->gGroupmap = groupMap;
+ int error = groupMap->readMap();
+ if (error == 1) { delete groupMap; abort = true; }
+ else {
+ if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; }
+ globaldata->gGroupmap = groupMap;
+ }
}
if (format != "matrix" ) {
if(namefile != ""){
nameMap = new NameAssignment(namefile);
- nameMap->readMap(1,2);
+ nameMap->readMap();
}else{
nameMap = NULL;
}
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the ReadDistCommand class Function ReadDistCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the ReadDistCommand class function ReadDistCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ m->errorOut(e, "ReadDistCommand", "ReadDistCommand");
exit(1);
}
}
void ReadDistCommand::help(){
try {
- cout << "The read.dist command parameter options are phylip or column, group, name, cutoff and precision" << "\n";
- cout << "The read.dist command can be used in two ways. The first is to read a phylip or column and run the cluster command" << "\n";
- cout << "For this use the read.dist command should be in the following format: " << "\n";
- cout << "read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) " << "\n";
- cout << "The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. " << "\n";
- cout << "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed." << "\n";
- cout << "The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command." << "\n";
- cout << "For this use the read.dist command should be in the following format: " << "\n";
- cout << "read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use. " << "\n";
- cout << "Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile)." << "\n" << "\n";
+ m->mothurOut("The read.dist command parameter options are phylip or column, group, name, sim, cutoff and precision\n");
+ m->mothurOut("The read.dist command can be used in two ways. The first is to read a phylip or column and run the cluster command\n");
+ m->mothurOut("For this use the read.dist command should be in the following format: \n");
+ m->mothurOut("read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) \n");
+ m->mothurOut("The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. \n");
+ m->mothurOut("The sim parameter is used to indicate that your distance file contains similarity values instead of distance values. The default is false, if sim=true then mothur will convert the similarity values to distances. \n");
+ m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
+ m->mothurOut("The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command.\n");
+ m->mothurOut("For this use the read.dist command should be in the following format: \n");
+ m->mothurOut("read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use. \n");
+ m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile).\n\n");
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the ReadDistCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ m->errorOut(e, "ReadDistCommand", "help");
exit(1);
}
- catch(...) {
- cout << "An unknown error has occurred in the ReadDistCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
}
//**********************************************************************************************************************
ReadDistCommand::~ReadDistCommand(){
if (abort == false) {
- if (format != "matrix") { delete read; delete nameMap; }
+ if (format != "matrix") {
+ delete read;
+ delete nameMap;
+ }
}
}
try {
if (abort == true) { return 0; }
+
+ time_t start = time(NULL);
+ size_t numDists = 0;
+
+ vector<string> outputNames;
if (format == "matrix") {
ifstream in;
openInputFile(distFileName, in);
matrix = new FullMatrix(in); //reads the matrix file
in.close();
+
+ if (m->control_pressed) { delete groupMap; delete matrix; return 0; }
+
+ //if files don't match...
+ if (matrix->getNumSeqs() < groupMap->getNumSeqs()) {
+ m->mothurOut("Your distance file contains " + toString(matrix->getNumSeqs()) + " sequences, and your group file contains " + toString(groupMap->getNumSeqs()) + " sequences."); m->mothurOutEndLine();
+ //create new group file
+ if(outputDir == "") { outputDir += hasPath(groupfile); }
+
+ string newGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "editted.groups";
+ outputNames.push_back(newGroupFile);
+ ofstream outGroups;
+ openOutputFile(newGroupFile, outGroups);
+
+ for (int i = 0; i < matrix->getNumSeqs(); i++) {
+ if (m->control_pressed) { delete groupMap; delete matrix; outGroups.close(); remove(newGroupFile.c_str()); return 0; }
+
+ Names temp = matrix->getRowInfo(i);
+ outGroups << temp.seqName << '\t' << temp.groupName << endl;
+ }
+ outGroups.close();
+
+ m->mothurOut(newGroupFile + " is a new group file containing only the sequence that are in your distance file. I will read this file instead."); m->mothurOutEndLine();
+
+ //read new groupfile
+ delete groupMap; groupMap = NULL;
+ groupfile = newGroupFile;
+ globaldata->setGroupFile(groupfile);
+
+ groupMap = new GroupMap(groupfile);
+ groupMap->readMap();
+
+ if (m->control_pressed) { delete groupMap; delete matrix; remove(newGroupFile.c_str()); return 0; }
+
+ globaldata->gGroupmap = groupMap;
+ }
+
//memory leak prevention
if (globaldata->gMatrix != NULL) { delete globaldata->gMatrix; }
globaldata->gMatrix = matrix; //save matrix for coverage commands
- }else {
+ numDists = matrix->getSizes()[1];
+ } else {
read->read(nameMap);
//to prevent memory leak
-
+
+ if (m->control_pressed) { return 0; }
+
if (globaldata->gListVector != NULL) { delete globaldata->gListVector; }
globaldata->gListVector = read->getListVector();
if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix; }
globaldata->gSparseMatrix = read->getMatrix();
+ numDists = globaldata->gSparseMatrix->getNNodes();
+ }
+
+ if (m->control_pressed) { return 0; }
+ if (outputNames.size() != 0) {
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Name: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
}
+
+ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to read "); m->mothurOutEndLine();
return 0;
+
}
catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the ReadDistCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the ReadDistCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ m->errorOut(e, "ReadDistCommand", "execute");
exit(1);
}
}