5 * Created by Sarah Westcott on 1/20/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "readdistcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "readmatrix.hpp"
15 ReadDistCommand::ReadDistCommand(string option){
17 globaldata = GlobalData::getInstance();
20 //allow user to run help
21 if(option == "help") { help(); abort = true; }
24 //valid paramters for this command
25 string Array[] = {"phylip", "column", "name", "cutoff", "precision", "group"};
26 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
28 OptionParser parser(option);
29 map<string, string> parameters = parser.getParameters();
31 ValidParameters validParameter;
33 //check to make sure all parameters are valid for command
34 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
35 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
38 globaldata->newRead();
40 //check for required parameters
41 phylipfile = validParameter.validFile(parameters, "phylip", true);
42 if (phylipfile == "not open") { abort = true; }
43 else if (phylipfile == "not found") { phylipfile = ""; }
44 else { globaldata->setPhylipFile(phylipfile); globaldata->setFormat("phylip"); }
46 columnfile = validParameter.validFile(parameters, "column", true);
47 if (columnfile == "not open") { abort = true; }
48 else if (columnfile == "not found") { columnfile = ""; }
49 else { globaldata->setColumnFile(columnfile); globaldata->setFormat("column"); }
51 groupfile = validParameter.validFile(parameters, "group", true);
52 if (groupfile == "not open") { abort = true; }
53 else if (groupfile == "not found") { groupfile = ""; }
55 globaldata->setGroupFile(groupfile);
56 //groupMap = new GroupMap(groupfile);
57 //groupMap->readMap();
60 namefile = validParameter.validFile(parameters, "name", true);
61 if (namefile == "not open") { abort = true; }
62 else if (namefile == "not found") { namefile = ""; }
63 else { globaldata->setNameFile(namefile); }
65 //you are doing a list and group shared
66 if ((phylipfile != "") && (groupfile != "")) {
67 globaldata->setFormat("matrix"); }
69 if ((phylipfile == "") && (columnfile == "")) { mothurOut("When executing a read.dist command you must enter a phylip or a column."); mothurOutEndLine(); abort = true; }
70 else if ((phylipfile != "") && (columnfile != "")) { mothurOut("When executing a read.dist command you must enter ONLY ONE of the following: phylip or column."); mothurOutEndLine(); abort = true; }
72 if (columnfile != "") {
73 if (namefile == "") { cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
76 //check for optional parameter and set defaults
77 // ...at some point should added some additional type checking...
78 //get user cutoff and precision or use defaults
80 temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
81 convert(temp, precision);
83 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; }
84 convert(temp, cutoff);
85 cutoff += (5 / (precision * 10.0));
88 distFileName = globaldata->inputFileName;
89 format = globaldata->getFormat();
91 if (format == "column") { read = new ReadColumnMatrix(distFileName); }
92 else if (format == "phylip") { read = new ReadPhylipMatrix(distFileName); }
93 else if (format == "matrix") {
94 groupMap = new GroupMap(groupfile);
97 if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap; }
98 globaldata->gGroupmap = groupMap;
101 if (format != "matrix" ) {
102 read->setCutoff(cutoff);
105 nameMap = new NameAssignment(namefile);
116 catch(exception& e) {
117 errorOut(e, "ReadDistCommand", "ReadDistCommand");
121 //**********************************************************************************************************************
123 void ReadDistCommand::help(){
125 mothurOut("The read.dist command parameter options are phylip or column, group, name, cutoff and precision\n");
126 mothurOut("The read.dist command can be used in two ways. The first is to read a phylip or column and run the cluster command\n");
127 mothurOut("For this use the read.dist command should be in the following format: \n");
128 mothurOut("read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) \n");
129 mothurOut("The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. \n");
130 mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
131 mothurOut("The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command.\n");
132 mothurOut("For this use the read.dist command should be in the following format: \n");
133 mothurOut("read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use. \n");
134 mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile).\n\n");
136 catch(exception& e) {
137 errorOut(e, "ReadDistCommand", "help");
142 //**********************************************************************************************************************
144 ReadDistCommand::~ReadDistCommand(){
145 if (abort == false) {
146 if (format != "matrix") {
153 //**********************************************************************************************************************
154 int ReadDistCommand::execute(){
157 if (abort == true) { return 0; }
159 time_t start = time(NULL);
162 if (format == "matrix") {
164 openInputFile(distFileName, in);
165 matrix = new FullMatrix(in); //reads the matrix file
168 //if files don't match...
169 if (matrix->getNumSeqs() < groupMap->getNumSeqs()) {
170 mothurOut("Your distance file contains " + toString(matrix->getNumSeqs()) + " sequences, and your group file contains " + toString(groupMap->getNumSeqs()) + " sequences."); mothurOutEndLine();
171 //create new group file
172 string newGroupFile = getRootName(groupfile) + "editted.groups";
174 openOutputFile(newGroupFile, outGroups);
176 for (int i = 0; i < matrix->getNumSeqs(); i++) {
177 Names temp = matrix->getRowInfo(i);
178 outGroups << temp.seqName << '\t' << temp.groupName << endl;
182 mothurOut(newGroupFile + " is a new group file containing only the sequence that are in your distance file. I will read this file instead."); mothurOutEndLine();
185 delete groupMap; groupMap = NULL;
186 groupfile = newGroupFile;
187 globaldata->setGroupFile(groupfile);
189 groupMap = new GroupMap(groupfile);
192 globaldata->gGroupmap = groupMap;
195 //memory leak prevention
196 if (globaldata->gMatrix != NULL) { delete globaldata->gMatrix; }
197 globaldata->gMatrix = matrix; //save matrix for coverage commands
198 numDists = matrix->getSizes()[1];
201 //to prevent memory leak
203 if (globaldata->gListVector != NULL) { delete globaldata->gListVector; }
204 globaldata->gListVector = read->getListVector();
206 if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix; }
207 globaldata->gSparseMatrix = read->getMatrix();
208 numDists = globaldata->gSparseMatrix->getNNodes();
209 //cout << "matrix contains " << numDists << " distances." << endl;
211 int lines = cutoff / (1.0/precision);
212 vector<float> dist_cutoff(lines+1,0);
213 for (int i = 0; i <= lines;i++) {
214 dist_cutoff[i] = (i + 0.5) / precision;
216 vector<int> dist_count(lines+1,0);
217 list<PCell>::iterator currentCell;
218 SparseMatrix* smatrix = globaldata->gSparseMatrix;
219 for (currentCell = smatrix->begin(); currentCell != smatrix->end(); currentCell++) {
220 for (int i = 0; i <= lines;i++) {
221 if (currentCell->dist < dist_cutoff[i]) {
228 // string dist_string = "Dist:";
229 // string count_string = "Count: ";
230 //for (int i = 0; i <= lines;i++) {
231 //dist_string = dist_string.append("\t").append(toString(dist_cutoff[i]));
232 // count_string = count_string.append("\t").append(toString(dist_count[i]));
234 //mothurOut(dist_string); mothurOutEndLine(); mothurOut(count_string); mothurOutEndLine();
236 mothurOut("It took " + toString(time(NULL) - start) + " secs to read "); mothurOutEndLine();
240 catch(exception& e) {
241 errorOut(e, "ReadDistCommand", "execute");