]> git.donarmstrong.com Git - mothur.git/blob - readdistcommand.cpp
630f6493fef9b542dfd8fea434f935987b6ac98f
[mothur.git] / readdistcommand.cpp
1 /*
2  *  readdistcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/20/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readdistcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "readmatrix.hpp"
14
15 //**********************************************************************************************************************
16 vector<string> ReadDistCommand::getValidParameters(){   
17         try {
18                 string Array[] =  {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"};
19                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20                 return myArray;
21         }
22         catch(exception& e) {
23                 m->errorOut(e, "ReadDistCommand", "getValidParameters");
24                 exit(1);
25         }
26 }
27 //**********************************************************************************************************************
28 vector<string> ReadDistCommand::getRequiredParameters(){        
29         try {
30                 string Array[] =  {"phylip","column","or"};
31                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "ReadDistCommand", "getRequiredParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 vector<string> ReadDistCommand::getRequiredFiles(){     
41         try {
42                 vector<string> myArray;
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "ReadDistCommand", "getRequiredFiles");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 ReadDistCommand::ReadDistCommand(string option) {
52         try {
53                 globaldata = GlobalData::getInstance();
54                 abort = false; calledHelp = false;   
55                 
56                 //allow user to run help
57                 if(option == "help") { help(); abort = true; calledHelp = true; }
58                 
59                 else {
60                         //valid paramters for this command
61                         string Array[] =  {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir","sim"};
62                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
63                         
64                         OptionParser parser(option);
65                         map<string, string> parameters = parser.getParameters();
66                         
67                         ValidParameters validParameter;
68                         map<string,string>::iterator it;
69                 
70                         //check to make sure all parameters are valid for command
71                         for (it = parameters.begin(); it != parameters.end(); it++) { 
72                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
73                         }
74                         
75                         globaldata->newRead();
76                         
77                         //if the user changes the input directory command factory will send this info to us in the output parameter 
78                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
79                         if (inputDir == "not found"){   inputDir = "";          }
80                         else {
81                                 string path;
82                                 it = parameters.find("phylip");
83                                 //user has given a template file
84                                 if(it != parameters.end()){ 
85                                         path = m->hasPath(it->second);
86                                         //if the user has not given a path then, add inputdir. else leave path alone.
87                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
88                                 }
89                                 
90                                 it = parameters.find("column");
91                                 //user has given a template file
92                                 if(it != parameters.end()){ 
93                                         path = m->hasPath(it->second);
94                                         //if the user has not given a path then, add inputdir. else leave path alone.
95                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
96                                 }
97                                 
98                                 it = parameters.find("name");
99                                 //user has given a template file
100                                 if(it != parameters.end()){ 
101                                         path = m->hasPath(it->second);
102                                         //if the user has not given a path then, add inputdir. else leave path alone.
103                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
104                                 }
105                                 
106                                 it = parameters.find("group");
107                                 //user has given a template file
108                                 if(it != parameters.end()){ 
109                                         path = m->hasPath(it->second);
110                                         //if the user has not given a path then, add inputdir. else leave path alone.
111                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
112                                 }
113                         }
114
115                         //if the user changes the output directory command factory will send this info to us in the output parameter 
116                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
117
118                         //check for required parameters
119                         phylipfile = validParameter.validFile(parameters, "phylip", true);
120                         if (phylipfile == "not open") { abort = true; }
121                         else if (phylipfile == "not found") { phylipfile = ""; }        
122                         else {  globaldata->setPhylipFile(phylipfile);  globaldata->setFormat("phylip");        }
123                         
124                         columnfile = validParameter.validFile(parameters, "column", true);
125                         if (columnfile == "not open") { abort = true; } 
126                         else if (columnfile == "not found") { columnfile = ""; }
127                         else {  globaldata->setColumnFile(columnfile); globaldata->setFormat("column"); }
128                         
129                         groupfile = validParameter.validFile(parameters, "group", true);
130                         if (groupfile == "not open") { abort = true; }  
131                         else if (groupfile == "not found") { groupfile = ""; }
132                         else {  
133                                 globaldata->setGroupFile(groupfile); 
134                                 //groupMap = new GroupMap(groupfile);
135                                 //groupMap->readMap();
136                         }
137
138                         namefile = validParameter.validFile(parameters, "name", true);
139                         if (namefile == "not open") { abort = true; }   
140                         else if (namefile == "not found") { namefile = ""; }
141                         else {  globaldata->setNameFile(namefile);      }
142                         
143                         //you are doing a list and group shared
144                         if ((phylipfile != "") && (groupfile != "")) { 
145                         globaldata->setFormat("matrix"); }
146                         
147                         if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a read.dist command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
148                         else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a read.dist command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
149                 
150                         if (columnfile != "") {
151                                 if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
152                         }
153                         
154                         //check for optional parameter and set defaults
155                         // ...at some point should added some additional type checking...
156                         //get user cutoff and precision or use defaults
157                         string temp;
158                         temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
159                         convert(temp, precision); 
160                         
161                         temp = validParameter.validFile(parameters, "sim", false);                              if (temp == "not found") { temp = "F"; }
162                         sim = m->isTrue(temp); 
163                         globaldata->sim = sim;
164                         
165                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10"; }
166                         convert(temp, cutoff); 
167                         cutoff += (5 / (precision * 10.0)); 
168                         
169                         if (abort == false) {
170                                 distFileName = globaldata->inputFileName;
171                                 format = globaldata->getFormat();       
172                 
173                                 if (format == "column") { read = new ReadColumnMatrix(distFileName); }  
174                                 else if (format == "phylip") { read = new ReadPhylipMatrix(distFileName); }
175                                 else if (format == "matrix") { 
176                                         groupMap = new GroupMap(groupfile);
177                                         int error = groupMap->readMap();
178                                         if (error == 1) { delete groupMap; abort = true; }
179                                         else {
180                                                 if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap;  }
181                                                 globaldata->gGroupmap = groupMap;
182                                         }
183                                 }
184                 
185                                 if (format != "matrix" ) {
186                                         read->setCutoff(cutoff);
187         
188                                         if(namefile != ""){     
189                                                 nameMap = new NameAssignment(namefile);
190                                                 nameMap->readMap();
191                                         }else{
192                                                 nameMap = NULL;
193                                         }
194                                 }
195                         }
196
197                 }
198
199         }
200         catch(exception& e) {
201                 m->errorOut(e, "ReadDistCommand", "ReadDistCommand");
202                 exit(1);
203         }
204 }
205 //**********************************************************************************************************************
206
207 void ReadDistCommand::help(){
208         try {
209                 m->mothurOut("The read.dist command parameter options are phylip or column, group, name, sim, cutoff and precision\n");
210                 m->mothurOut("The read.dist command can be used in two ways.  The first is to read a phylip or column and run the cluster command\n");
211                 m->mothurOut("For this use the read.dist command should be in the following format: \n");
212                 m->mothurOut("read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) \n");
213                 m->mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
214                 m->mothurOut("The sim parameter is used to indicate that your distance file contains similarity values instead of distance values. The default is false, if sim=true then mothur will convert the similarity values to distances. \n");
215                 m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
216                 m->mothurOut("The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command.\n");
217                 m->mothurOut("For this use the read.dist command should be in the following format: \n");
218                 m->mothurOut("read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use.  \n");
219                 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile).\n\n");
220         }
221         catch(exception& e) {
222                 m->errorOut(e, "ReadDistCommand", "help");
223                 exit(1);
224         }
225 }
226
227 //**********************************************************************************************************************
228
229 ReadDistCommand::~ReadDistCommand(){
230         if (abort == false) {
231                 if (format != "matrix") { 
232                         delete read; 
233                         delete nameMap; 
234                 }
235         }
236 }
237
238 //**********************************************************************************************************************
239 int ReadDistCommand::execute(){
240         try {
241                 
242                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
243
244                 time_t start = time(NULL);
245                 size_t numDists = 0;
246                 
247                 if (format == "matrix") {
248                         ifstream in;
249                         m->openInputFile(distFileName, in);
250                         matrix = new FullMatrix(in); //reads the matrix file
251                         in.close();
252                         
253                         if (m->control_pressed) { delete groupMap; delete matrix; return 0; }
254                         
255                         //if files don't match...
256                         if (matrix->getNumSeqs() < groupMap->getNumSeqs()) {  
257                                 m->mothurOut("Your distance file contains " + toString(matrix->getNumSeqs()) + " sequences, and your group file contains " + toString(groupMap->getNumSeqs()) + " sequences.");  m->mothurOutEndLine();                         
258                                 //create new group file
259                                 if(outputDir == "") { outputDir += m->hasPath(groupfile); }
260                                 
261                                 string newGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "editted.groups";
262                                 outputNames.push_back(newGroupFile);
263                                 ofstream outGroups;
264                                 m->openOutputFile(newGroupFile, outGroups);
265                                 
266                                 for (int i = 0; i < matrix->getNumSeqs(); i++) {
267                                         if (m->control_pressed) { delete groupMap; delete matrix; outGroups.close(); remove(newGroupFile.c_str()); return 0; }
268                                         
269                                         Names temp = matrix->getRowInfo(i);
270                                         outGroups << temp.seqName << '\t' << temp.groupName << endl;
271                                 }
272                                 outGroups.close();
273                                 
274                                 m->mothurOut(newGroupFile + " is a new group file containing only the sequence that are in your distance file. I will read this file instead."); m->mothurOutEndLine();
275                                 
276                                 //read new groupfile
277                                 delete groupMap; groupMap = NULL;
278                                 groupfile = newGroupFile;
279                                 globaldata->setGroupFile(groupfile); 
280                                 
281                                 groupMap = new GroupMap(groupfile);
282                                 groupMap->readMap();
283                                 
284                                 if (m->control_pressed) { delete groupMap; delete matrix; remove(newGroupFile.c_str()); return 0; }
285         
286                                 globaldata->gGroupmap = groupMap;
287                         }
288                         
289                         //memory leak prevention
290                         if (globaldata->gMatrix != NULL) { delete globaldata->gMatrix;  }
291                         globaldata->gMatrix = matrix; //save matrix for coverage commands
292                         numDists = matrix->getSizes()[1];
293                 } else {
294                         read->read(nameMap);
295                         //to prevent memory leak
296                         
297                         if (m->control_pressed) {  return 0; }
298                 
299                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
300                         globaldata->gListVector = read->getListVector();
301
302                         if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix;  }
303                         globaldata->gSparseMatrix = read->getMatrix();
304                         numDists = globaldata->gSparseMatrix->getNNodes();
305                 }
306                 
307                 if (m->control_pressed) {  return 0; }
308
309                 if (outputNames.size() != 0) {
310                         m->mothurOutEndLine();
311                         m->mothurOut("Output File Name: "); m->mothurOutEndLine();
312                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
313                         m->mothurOutEndLine();
314                 }
315                 
316                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to read "); m->mothurOutEndLine();
317                 return 0;
318                 
319         }
320         catch(exception& e) {
321                 m->errorOut(e, "ReadDistCommand", "execute");
322                 exit(1);
323         }
324 }