]> git.donarmstrong.com Git - mothur.git/blob - readdistcommand.cpp
c83ed8a9a6031663869176a3dda21923229da03a
[mothur.git] / readdistcommand.cpp
1 /*
2  *  readdistcommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/20/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readdistcommand.h"
11 #include "readphylip.h"
12 #include "readcolumn.h"
13 #include "readmatrix.hpp"
14
15 ReadDistCommand::ReadDistCommand(string option) {
16         try {
17                 globaldata = GlobalData::getInstance();
18                 abort = false;
19                 
20                 //allow user to run help
21                 if(option == "help") { help(); abort = true; }
22                 
23                 else {
24                         //valid paramters for this command
25                         string Array[] =  {"phylip", "column", "name", "cutoff", "precision", "group","outputdir","inputdir"};
26                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27                         
28                         OptionParser parser(option);
29                         map<string, string> parameters = parser.getParameters();
30                         
31                         ValidParameters validParameter;
32                         map<string,string>::iterator it;
33                 
34                         //check to make sure all parameters are valid for command
35                         for (it = parameters.begin(); it != parameters.end(); it++) { 
36                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
37                         }
38                         
39                         globaldata->newRead();
40                         
41                         //if the user changes the input directory command factory will send this info to us in the output parameter 
42                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
43                         if (inputDir == "not found"){   inputDir = "";          }
44                         else {
45                                 string path;
46                                 it = parameters.find("phylip");
47                                 //user has given a template file
48                                 if(it != parameters.end()){ 
49                                         path = hasPath(it->second);
50                                         //if the user has not given a path then, add inputdir. else leave path alone.
51                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
52                                 }
53                                 
54                                 it = parameters.find("column");
55                                 //user has given a template file
56                                 if(it != parameters.end()){ 
57                                         path = hasPath(it->second);
58                                         //if the user has not given a path then, add inputdir. else leave path alone.
59                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
60                                 }
61                                 
62                                 it = parameters.find("name");
63                                 //user has given a template file
64                                 if(it != parameters.end()){ 
65                                         path = hasPath(it->second);
66                                         //if the user has not given a path then, add inputdir. else leave path alone.
67                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
68                                 }
69                                 
70                                 it = parameters.find("group");
71                                 //user has given a template file
72                                 if(it != parameters.end()){ 
73                                         path = hasPath(it->second);
74                                         //if the user has not given a path then, add inputdir. else leave path alone.
75                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
76                                 }
77                         }
78
79                         //if the user changes the output directory command factory will send this info to us in the output parameter 
80                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
81
82                         //check for required parameters
83                         phylipfile = validParameter.validFile(parameters, "phylip", true);
84                         if (phylipfile == "not open") { abort = true; }
85                         else if (phylipfile == "not found") { phylipfile = ""; }        
86                         else {  globaldata->setPhylipFile(phylipfile);  globaldata->setFormat("phylip");        }
87                         
88                         columnfile = validParameter.validFile(parameters, "column", true);
89                         if (columnfile == "not open") { abort = true; } 
90                         else if (columnfile == "not found") { columnfile = ""; }
91                         else {  globaldata->setColumnFile(columnfile); globaldata->setFormat("column"); }
92                         
93                         groupfile = validParameter.validFile(parameters, "group", true);
94                         if (groupfile == "not open") { abort = true; }  
95                         else if (groupfile == "not found") { groupfile = ""; }
96                         else {  
97                                 globaldata->setGroupFile(groupfile); 
98                                 //groupMap = new GroupMap(groupfile);
99                                 //groupMap->readMap();
100                         }
101
102                         namefile = validParameter.validFile(parameters, "name", true);
103                         if (namefile == "not open") { abort = true; }   
104                         else if (namefile == "not found") { namefile = ""; }
105                         else {  globaldata->setNameFile(namefile);      }
106                         
107                         //you are doing a list and group shared
108                         if ((phylipfile != "") && (groupfile != "")) { 
109                         globaldata->setFormat("matrix"); }
110                         
111                         if ((phylipfile == "") && (columnfile == "")) { m->mothurOut("When executing a read.dist command you must enter a phylip or a column."); m->mothurOutEndLine(); abort = true; }
112                         else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a read.dist command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
113                 
114                         if (columnfile != "") {
115                                 if (namefile == "") {  cout << "You need to provide a namefile if you are going to use the column format." << endl; abort = true; }
116                         }
117                         
118                         //check for optional parameter and set defaults
119                         // ...at some point should added some additional type checking...
120                         //get user cutoff and precision or use defaults
121                         string temp;
122                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
123                         convert(temp, precision); 
124                         
125                         temp = validParameter.validFile(parameters, "cutoff", false);                   if (temp == "not found") { temp = "10"; }
126                         convert(temp, cutoff); 
127                         cutoff += (5 / (precision * 10.0));
128                         
129                         if (abort == false) {
130                                 distFileName = globaldata->inputFileName;
131                                 format = globaldata->getFormat();       
132                 
133                                 if (format == "column") { read = new ReadColumnMatrix(distFileName); }  
134                                 else if (format == "phylip") { read = new ReadPhylipMatrix(distFileName); }
135                                 else if (format == "matrix") { 
136                                         groupMap = new GroupMap(groupfile);
137                                         int error = groupMap->readMap();
138                                         if (error == 1) { delete groupMap; abort = true; }
139                                         else {
140                                                 if (globaldata->gGroupmap != NULL) { delete globaldata->gGroupmap;  }
141                                                 globaldata->gGroupmap = groupMap;
142                                         }
143                                 }
144                 
145                                 if (format != "matrix" ) {
146                                         read->setCutoff(cutoff);
147         
148                                         if(namefile != ""){     
149                                                 nameMap = new NameAssignment(namefile);
150                                                 nameMap->readMap();
151                                         }else{
152                                                 nameMap = NULL;
153                                         }
154                                 }
155                         }
156
157                 }
158
159         }
160         catch(exception& e) {
161                 m->errorOut(e, "ReadDistCommand", "ReadDistCommand");
162                 exit(1);
163         }
164 }
165 //**********************************************************************************************************************
166
167 void ReadDistCommand::help(){
168         try {
169                 m->mothurOut("The read.dist command parameter options are phylip or column, group, name, cutoff and precision\n");
170                 m->mothurOut("The read.dist command can be used in two ways.  The first is to read a phylip or column and run the cluster command\n");
171                 m->mothurOut("For this use the read.dist command should be in the following format: \n");
172                 m->mothurOut("read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) \n");
173                 m->mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
174                 m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
175                 m->mothurOut("The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command.\n");
176                 m->mothurOut("For this use the read.dist command should be in the following format: \n");
177                 m->mothurOut("read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use.  \n");
178                 m->mothurOut("Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile).\n\n");
179         }
180         catch(exception& e) {
181                 m->errorOut(e, "ReadDistCommand", "help");
182                 exit(1);
183         }
184 }
185
186 //**********************************************************************************************************************
187
188 ReadDistCommand::~ReadDistCommand(){
189         if (abort == false) {
190                 if (format != "matrix") { 
191                         delete read; 
192                         delete nameMap; 
193                 }
194         }
195 }
196
197 //**********************************************************************************************************************
198 int ReadDistCommand::execute(){
199         try {
200                 
201                 if (abort == true) {    return 0;       }
202
203                 time_t start = time(NULL);
204                 size_t numDists = 0;
205                 
206                 vector<string> outputNames;
207                 
208                 if (format == "matrix") {
209                         ifstream in;
210                         openInputFile(distFileName, in);
211                         matrix = new FullMatrix(in); //reads the matrix file
212                         in.close();
213                         
214                         //if files don't match...
215                         if (matrix->getNumSeqs() < groupMap->getNumSeqs()) {  
216                                 m->mothurOut("Your distance file contains " + toString(matrix->getNumSeqs()) + " sequences, and your group file contains " + toString(groupMap->getNumSeqs()) + " sequences.");  m->mothurOutEndLine();                         
217                                 //create new group file
218                                 if(outputDir == "") { outputDir += hasPath(groupfile); }
219                                 
220                                 string newGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "editted.groups";
221                                 outputNames.push_back(newGroupFile);
222                                 ofstream outGroups;
223                                 openOutputFile(newGroupFile, outGroups);
224                                 
225                                 for (int i = 0; i < matrix->getNumSeqs(); i++) {
226                                         Names temp = matrix->getRowInfo(i);
227                                         outGroups << temp.seqName << '\t' << temp.groupName << endl;
228                                 }
229                                 outGroups.close();
230                                 
231                                 m->mothurOut(newGroupFile + " is a new group file containing only the sequence that are in your distance file. I will read this file instead."); m->mothurOutEndLine();
232                                 
233                                 //read new groupfile
234                                 delete groupMap; groupMap = NULL;
235                                 groupfile = newGroupFile;
236                                 globaldata->setGroupFile(groupfile); 
237                                 
238                                 groupMap = new GroupMap(groupfile);
239                                 groupMap->readMap();
240                                 
241                                 globaldata->gGroupmap = groupMap;
242                         }
243                         
244                         //memory leak prevention
245                         if (globaldata->gMatrix != NULL) { delete globaldata->gMatrix;  }
246                         globaldata->gMatrix = matrix; //save matrix for coverage commands
247                         numDists = matrix->getSizes()[1];
248                 } else {
249                         read->read(nameMap);
250                         //to prevent memory leak
251
252                         if (globaldata->gListVector != NULL) {  delete globaldata->gListVector;  }
253                         globaldata->gListVector = read->getListVector();
254
255                         if (globaldata->gSparseMatrix != NULL) { delete globaldata->gSparseMatrix;  }
256                         globaldata->gSparseMatrix = read->getMatrix();
257                         numDists = globaldata->gSparseMatrix->getNNodes();
258         //cout << "matrix contains " << numDists << " distances." << endl;
259                         
260     /*  int lines = cutoff / (1.0/precision);
261       vector<float> dist_cutoff(lines+1,0);
262                         for (int i = 0; i <= lines;i++) {       
263         dist_cutoff[i] = (i + 0.5) / precision; 
264       } 
265       vector<int> dist_count(lines+1,0);
266       list<PCell>::iterator currentCell;
267       SparseMatrix* smatrix = globaldata->gSparseMatrix;
268                 for (currentCell = smatrix->begin(); currentCell != smatrix->end(); currentCell++) {
269                                 for (int i = 0; i <= lines;i++) {       
270                                         if (currentCell->dist < dist_cutoff[i]) {
271                                                 dist_count[i]++;
272             break;
273           }
274         }
275                         }
276 */
277      // string dist_string = "Dist:";
278     //  string count_string = "Count: ";
279                         //for (int i = 0; i <= lines;i++) {     
280         //dist_string = dist_string.append("\t").append(toString(dist_cutoff[i]));
281       //        count_string = count_string.append("\t").append(toString(dist_count[i]));
282                 //      }
283       //m->mothurOut(dist_string); m->mothurOutEndLine(); m->mothurOut(count_string); m->mothurOutEndLine();
284                 }
285                 
286                 if (outputNames.size() != 0) {
287                         m->mothurOutEndLine();
288                         m->mothurOut("Output File Name: "); m->mothurOutEndLine();
289                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
290                         m->mothurOutEndLine();
291                 }
292                 
293                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to read "); m->mothurOutEndLine();
294                 return 0;
295                 
296         }
297         catch(exception& e) {
298                 m->errorOut(e, "ReadDistCommand", "execute");
299                 exit(1);
300         }
301 }