5 * Created by Sarah Westcott on 1/20/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "readdistcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "readmatrix.hpp"
15 //**********************************************************************************************************************
16 vector<string> ReadDistCommand::getValidParameters(){
18 string Array[] = {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"};
19 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
23 m->errorOut(e, "ReadDistCommand", "getValidParameters");
27 //**********************************************************************************************************************
28 vector<string> ReadDistCommand::getRequiredParameters(){
30 string Array[] = {"phylip","column","or"};
31 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
35 m->errorOut(e, "ReadDistCommand", "getRequiredParameters");
39 //**********************************************************************************************************************
40 vector<string> ReadDistCommand::getRequiredFiles(){
42 vector<string> myArray;
46 m->errorOut(e, "ReadDistCommand", "getRequiredFiles");
50 //**********************************************************************************************************************
51 ReadDistCommand::ReadDistCommand(string option) {
53 globaldata = GlobalData::getInstance();
54 abort = false; calledHelp = false;
56 //allow user to run help
57 if(option == "help") { help(); abort = true; calledHelp = true; }
60 //valid paramters for this command
61 string Array[] = {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"};
62 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
64 OptionParser parser(option);
65 map<string, string> parameters = parser.getParameters();
67 ValidParameters validParameter;
68 map<string,string>::iterator it;
70 //check to make sure all parameters are valid for command
71 for (it = parameters.begin(); it != parameters.end(); it++) {
72 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
75 globaldata->newRead();
77 //if the user changes the input directory command factory will send this info to us in the output parameter
78 string inputDir = validParameter.validFile(parameters, "inputdir", false);
79 if (inputDir == "not found"){ inputDir = ""; }
82 it = parameters.find("phylip");
83 //user has given a template file
84 if(it != parameters.end()){
85 path = m->hasPath(it->second);
86 //if the user has not given a path then, add inputdir. else leave path alone.
87 if (path == "") { parameters["phylip"] = inputDir + it->second; }
90 it = parameters.find("column");
91 //user has given a template file
92 if(it != parameters.end()){
93 path = m->hasPath(it->second);
94 //if the user has not given a path then, add inputdir. else leave path alone.
95 if (path == "") { parameters["column"] = inputDir + it->second; }
98 it = parameters.find("name");
99 //user has given a template file
100 if(it != parameters.end()){
101 path = m->hasPath(it->second);
102 //if the user has not given a path then, add inputdir. else leave path alone.
103 if (path == "") { parameters["name"] = inputDir + it->second; }
106 it = parameters.find("group");
107 //user has given a template file
108 if(it != parameters.end()){
109 path = m->hasPath(it->second);
110 //if the user has not given a path then, add inputdir. else leave path alone.
111 if (path == "") { parameters["group"] = inputDir + it->second; }
115 //if the user changes the output directory command factory will send this info to us in the output parameter
116 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
118 //check for required parameters
119 phylipfile = validParameter.validFile(parameters, "phylip", true);
120 if (phylipfile == "not open") { abort = true; }
121 else if (phylipfile == "not found") { phylipfile = ""; }
122 else { globaldata->setPhylipFile(phylipfile); globaldata->setFormat("phylip"); }
124 columnfile = validParameter.validFile(parameters, "column", true);
125 if (columnfile == "not open") { abort = true; }
126 else if (columnfile == "not found") { columnfile = ""; }
127 else { globaldata->setColumnFile(columnfile); globaldata->setFormat("column"); }
129 groupfile = validParameter.validFile(parameters, "group", true);
130 if (groupfile == "not open") { abort = true; }
131 else if (groupfile == "not found") { groupfile = ""; }
133 globaldata->setGroupFile(groupfile);
134 //groupMap = new GroupMap(groupfile);
135 //groupMap->readMap();
138 namefile = validParameter.validFile(parameters, "name", true);
139 if (namefile == "not open") { abort = true; }
140 else if (namefile == "not found") { namefile = ""; }
141 else { globaldata->setNameFile(namefile); }
143 //you are doing a list and group shared
144 if ((phylipfile != "") && (groupfile != "")) {
145 globaldata->setFormat("matrix"); }
147 if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a read.dist command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
148 else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a read.dist command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
150 if (columnfile != "") {
151 if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
154 //check for optional parameter and set defaults
155 // ...at some point should added some additional type checking...
156 //get user cutoff and precision or use defaults
158 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
159 convert(temp, precision);
161 temp = validParameter.validFile(parameters, "sim", false); if (temp == "not found") { temp = "F"; }
162 sim = m->isTrue(temp);
163 globaldata->sim = sim;
165 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; }
166 convert(temp, cutoff);
167 cutoff += (5 / (precision * 10.0));
169 if (abort == false) {
170 distFileName = globaldata->inputFileName;
171 format = globaldata->getFormat();
173 if (format == "column") { read = new ReadColumnMatrix(distFileName); }
174 else if (format == "phylip") { read = new ReadPhylipMatrix(distFileName); }
175 else if (format == "matrix") {
176 groupMap = new GroupMap(groupfile);
177 int error = groupMap->readMap();
178 if (error == 1) { delete groupMap; abort = true; }
180 if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; }
181 globaldata->gGroupmap = groupMap;
185 if (format != "matrix" ) {
186 read->setCutoff(cutoff);
189 nameMap = new NameAssignment(namefile);
200 catch(exception& e) {
201 m->errorOut(e, "ReadDistCommand", "ReadDistCommand");
205 //**********************************************************************************************************************
207 void ReadDistCommand::help(){
209 m->mothurOut("The read.dist command parameter options are phylip or column, group, name, sim, cutoff and precision\n");
210 m->mothurOut("The read.dist command can be used in two ways. The first is to read a phylip or column and run the cluster command\n");
211 m->mothurOut("For this use the read.dist command should be in the following format: \n");
212 m->mothurOut("read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) \n");
213 m->mothurOut("The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. \n");
214 m->mothurOut("The sim parameter is used to indicate that your distance file contains similarity values instead of distance values. The default is false, if sim=true then mothur will convert the similarity values to distances. \n");
215 m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
216 m->mothurOut("The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command.\n");
217 m->mothurOut("For this use the read.dist command should be in the following format: \n");
218 m->mothurOut("read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use. \n");
219 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile).\n\n");
221 catch(exception& e) {
222 m->errorOut(e, "ReadDistCommand", "help");
227 //**********************************************************************************************************************
229 ReadDistCommand::~ReadDistCommand(){
230 if (abort == false) {
231 if (format != "matrix") {
238 //**********************************************************************************************************************
239 int ReadDistCommand::execute(){
242 if (abort == true) { if (calledHelp) { return 0; } return 2; }
244 time_t start = time(NULL);
247 if (format == "matrix") {
249 m->openInputFile(distFileName, in);
250 matrix = new FullMatrix(in); //reads the matrix file
253 if (m->control_pressed) { delete groupMap; delete matrix; return 0; }
255 //if files don't match...
256 if (matrix->getNumSeqs() < groupMap->getNumSeqs()) {
257 m->mothurOut("Your distance file contains " + toString(matrix->getNumSeqs()) + " sequences, and your group file contains " + toString(groupMap->getNumSeqs()) + " sequences."); m->mothurOutEndLine();
258 //create new group file
259 if(outputDir == "") { outputDir += m->hasPath(groupfile); }
261 string newGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "editted.groups";
262 outputNames.push_back(newGroupFile);
264 m->openOutputFile(newGroupFile, outGroups);
266 for (int i = 0; i < matrix->getNumSeqs(); i++) {
267 if (m->control_pressed) { delete groupMap; delete matrix; outGroups.close(); remove(newGroupFile.c_str()); return 0; }
269 Names temp = matrix->getRowInfo(i);
270 outGroups << temp.seqName << '\t' << temp.groupName << endl;
274 m->mothurOut(newGroupFile + " is a new group file containing only the sequence that are in your distance file. I will read this file instead."); m->mothurOutEndLine();
277 delete groupMap; groupMap = NULL;
278 groupfile = newGroupFile;
279 globaldata->setGroupFile(groupfile);
281 groupMap = new GroupMap(groupfile);
284 if (m->control_pressed) { delete groupMap; delete matrix; remove(newGroupFile.c_str()); return 0; }
286 globaldata->gGroupmap = groupMap;
289 //memory leak prevention
290 if (globaldata->gMatrix != NULL) { delete globaldata->gMatrix; }
291 globaldata->gMatrix = matrix; //save matrix for coverage commands
292 numDists = matrix->getSizes()[1];
295 //to prevent memory leak
297 if (m->control_pressed) { return 0; }
299 if (globaldata->gListVector != NULL) { delete globaldata->gListVector; }
300 globaldata->gListVector = read->getListVector();
302 if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix; }
303 globaldata->gSparseMatrix = read->getMatrix();
304 numDists = globaldata->gSparseMatrix->getNNodes();
307 if (m->control_pressed) { return 0; }
309 if (outputNames.size() != 0) {
310 m->mothurOutEndLine();
311 m->mothurOut("Output File Name: "); m->mothurOutEndLine();
312 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
313 m->mothurOutEndLine();
316 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to read "); m->mothurOutEndLine();
320 catch(exception& e) {
321 m->errorOut(e, "ReadDistCommand", "execute");