]> git.donarmstrong.com Git - mothur.git/blob - getdistscommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / getdistscommand.cpp
1 //
2 //  getdistscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 1/28/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "getdistscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> GetDistsCommand::setParameters(){        
13         try {
14                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip);
15         CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn);      
16                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
17                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
18                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
19                 
20                 vector<string> myArray;
21                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
22                 return myArray;
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "GetDistsCommand", "setParameters");
26                 exit(1);
27         }
28 }
29 //**********************************************************************************************************************
30 string GetDistsCommand::getHelpString(){        
31         try {
32                 string helpString = "";
33                 helpString += "The get.dists command selects distances from a phylip or column file related to groups or sequences listed in an accnos file.\n";
34                 helpString += "The get.dists command parameters are accnos, phylip and column.\n";
35                 helpString += "The get.dists command should be in the following format: get.dists(accnos=yourAccnos, phylip=yourPhylip).\n";
36                 helpString += "Example get.dists(accnos=final.accnos, phylip=final.an.thetayc.0.03.lt.ave.dist).\n";
37                 helpString += "Note: No spaces between parameter labels (i.e. accnos), '=' and parameters (i.e.final.accnos).\n";
38                 return helpString;
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "GetDistsCommand", "getHelpString");
42                 exit(1);
43         }
44 }
45 //**********************************************************************************************************************
46 string GetDistsCommand::getOutputPattern(string type) {
47     try {
48         string pattern = "";
49         
50         if (type == "phylip")           {   pattern = "[filename],pick,[extension]";    }
51         else if (type == "column")      {   pattern = "[filename],pick,[extension]";    }
52         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
53         
54         return pattern;
55     }
56     catch(exception& e) {
57         m->errorOut(e, "GetDistsCommand", "getOutputPattern");
58         exit(1);
59     }
60 }
61 //**********************************************************************************************************************
62 GetDistsCommand::GetDistsCommand(){     
63         try {
64                 abort = true; calledHelp = true;
65                 setParameters();
66                 vector<string> tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "GetDistsCommand", "GetDistsCommand");
72                 exit(1);
73         }
74 }
75 //**********************************************************************************************************************
76 GetDistsCommand::GetDistsCommand(string option)  {
77         try {
78                 abort = false; calledHelp = false;   
79                 
80                 //allow user to run help
81                 if(option == "help") { help(); abort = true; calledHelp = true; }
82                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
83                 
84                 else {
85                         vector<string> myArray = setParameters();
86                         
87                         OptionParser parser(option);
88                         map<string,string> parameters = parser.getParameters();
89                         
90                         ValidParameters validParameter;
91                         map<string,string>::iterator it;
92                         
93                         //check to make sure all parameters are valid for command
94                         for (it = parameters.begin(); it != parameters.end(); it++) { 
95                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
96                         }
97                         
98                         //initialize outputTypes
99                         vector<string> tempOutNames;
100                         outputTypes["column"] = tempOutNames;
101                         outputTypes["phylip"] = tempOutNames;
102                         
103                         //if the user changes the output directory command factory will send this info to us in the output parameter 
104                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("phylip");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
117                                 }
118                                 
119                                 it = parameters.find("column");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
125                                 }
126                                 
127                 it = parameters.find("accnos");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
133                                 }
134             }
135                         
136                         
137                         //check for required parameters
138                         accnosfile = validParameter.validFile(parameters, "accnos", true);
139                         if (accnosfile == "not open") { abort = true; }
140                         else if (accnosfile == "not found") {  
141                                 accnosfile = m->getAccnosFile(); 
142                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
143                                 else { 
144                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
145                                         abort = true;
146                                 } 
147                         }else { m->setAccnosFile(accnosfile); } 
148                         
149                         phylipfile = validParameter.validFile(parameters, "phylip", true);
150                         if (phylipfile == "not open") { phylipfile = ""; abort = true; }
151                         else if (phylipfile == "not found") { phylipfile = ""; }        
152                         else {  m->setPhylipFile(phylipfile); }
153                         
154                         columnfile = validParameter.validFile(parameters, "column", true);
155                         if (columnfile == "not open") { columnfile = ""; abort = true; }        
156                         else if (columnfile == "not found") { columnfile = ""; }
157                         else {  m->setColumnFile(columnfile);   }
158                         
159                         if ((phylipfile == "") && (columnfile == "")) { 
160                                 //is there are current file available for either of these?
161                                 //give priority to column, then phylip
162                                 columnfile = m->getColumnFile(); 
163                                 if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
164                                 else { 
165                                         phylipfile = m->getPhylipFile(); 
166                                         if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
167                                         else { 
168                                                 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine(); 
169                                                 abort = true;
170                                         }
171                                 }
172                         }
173                 }
174                 
175         }
176         catch(exception& e) {
177                 m->errorOut(e, "GetDistsCommand", "GetDistsCommand");
178                 exit(1);
179         }
180 }
181 //**********************************************************************************************************************
182
183 int GetDistsCommand::execute(){
184         try {
185                 
186                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
187                 
188                 //get names you want to keep
189                 names = m->readAccnos(accnosfile);
190                 
191                 if (m->control_pressed) { return 0; }
192                 
193                 //read through the correct file and output lines you want to keep
194                 if (phylipfile != "")           {               readPhylip();           }
195                 if (columnfile != "")           {               readColumn();       }
196                 
197                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
198                 
199                 
200                 if (outputNames.size() != 0) {
201                         m->mothurOutEndLine();
202                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
203                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
204                         m->mothurOutEndLine();
205                         
206                         //set fasta file as new current fastafile
207                         string current = "";
208                         itTypes = outputTypes.find("phylip");
209                         if (itTypes != outputTypes.end()) {
210                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
211                         }
212                         
213                         itTypes = outputTypes.find("column");
214                         if (itTypes != outputTypes.end()) {
215                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
216                         }
217         }
218                 
219                 return 0;               
220         }
221         
222         catch(exception& e) {
223                 m->errorOut(e, "GetDistsCommand", "execute");
224                 exit(1);
225         }
226 }
227
228 //**********************************************************************************************************************
229 int GetDistsCommand::readPhylip(){
230         try {
231                 string thisOutputDir = outputDir;
232                 if (outputDir == "") {  thisOutputDir += m->hasPath(phylipfile);  }
233         map<string, string> variables; 
234         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(phylipfile));
235         variables["[extension]"] = m->getExtension(phylipfile);
236                 string outputFileName = getOutputFileName("phylip", variables);
237                 
238         ifstream in;
239         m->openInputFile(phylipfile, in);
240         
241         float distance;
242         int square, nseqs; 
243         string name;
244         unsigned int row;
245         set<unsigned int> rows; //converts names in names to a index
246         row = 0;
247         
248         string numTest;
249         in >> numTest >> name;
250         
251         if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
252         else { convert(numTest, nseqs); }
253         
254         if (names.count(name) != 0) { rows.insert(row); }
255         row++;
256         
257         //is the matrix square?
258         char d;
259         while((d=in.get()) != EOF){
260             
261             if(isalnum(d)){
262                 square = 1;
263                 in.putback(d);
264                 for(int i=0;i<nseqs;i++){
265                     in >> distance;
266                 }
267                 break;
268             }
269             if(d == '\n'){
270                 square = 0;
271                 break;
272             }
273         }
274         
275         //map name to row/column        
276         if(square == 0){
277             for(int i=1;i<nseqs;i++){
278                 in >> name;  
279                 if (names.count(name) != 0) { rows.insert(row); }
280                 row++;
281                 
282                 for(int j=0;j<i;j++){
283                     if (m->control_pressed) {  in.close(); return 0;  }
284                     in >> distance;
285                 }
286             }
287         }
288         else{
289              for(int i=1;i<nseqs;i++){
290                  in >> name;  
291                  if (names.count(name) != 0) { rows.insert(row); }
292                  row++;
293                  for(int j=0;j<nseqs;j++){
294                      if (m->control_pressed) {  in.close(); return 0;  }
295                      in >> distance;
296                  }
297              }
298         }
299         in.close();
300         
301         if (m->control_pressed) {  return 0; }
302         
303         //read through file only printing rows and columns of seqs in names
304         ifstream inPhylip;
305         m->openInputFile(phylipfile, inPhylip);
306         
307         inPhylip >> numTest;
308         
309                 ofstream out;
310                 m->openOutputFile(outputFileName, out);
311         outputTypes["phylip"].push_back(outputFileName);  outputNames.push_back(outputFileName);
312         out << names.size() << endl;
313         
314         unsigned int count = 0;
315                 if(square == 0){
316             for(int i=0;i<nseqs;i++){
317                 inPhylip >> name;  
318                 bool ignoreRow = false;
319                 
320                 if (names.count(name) == 0) { ignoreRow = true; }
321                 else{ out << name << '\t'; count++; }
322                 
323                 for(int j=0;j<i;j++){
324                     if (m->control_pressed) {  inPhylip.close(); out.close();  return 0;  }
325                     inPhylip >> distance;
326                     if (!ignoreRow) {
327                         //is this a column we want
328                         if(rows.count(j) != 0) {  out << distance << '\t';  }
329                     }
330                 }
331                 if (!ignoreRow) { out << endl; }
332             }
333         }
334         else{
335             for(int i=0;i<nseqs;i++){
336                 inPhylip >> name; 
337                 
338                 bool ignoreRow = false;
339                 
340                 if (names.count(name) == 0) { ignoreRow = true; }
341                 else{ out << name << '\t'; count++; }
342                 
343                 for(int j=0;j<nseqs;j++){
344                     if (m->control_pressed) {  inPhylip.close(); out.close(); return 0;  }
345                     inPhylip >> distance;
346                     if (!ignoreRow) {
347                         //is this a column we want
348                         if(rows.count(j) != 0) {  out << distance << '\t';  }
349                     }
350                 }
351                 if (!ignoreRow) { out << endl; }
352             }
353         }
354         inPhylip.close();
355                 out.close();
356                 
357                 if (count == 0) {  m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine();  }
358         else if (count != names.size()) {
359             m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file."); m->mothurOutEndLine();
360             //rewrite with new number
361             m->renameFile(outputFileName, outputFileName+".temp");
362             ofstream out2;
363             m->openOutputFile(outputFileName, out2);
364             out2 << count << endl;
365             
366             ifstream in3;
367             m->openInputFile(outputFileName+".temp", in3);
368             in3 >> nseqs; m->gobble(in3);
369             char buffer[4096];        
370             while (!in3.eof()) {
371                 in3.read(buffer, 4096);
372                 out2.write(buffer, in3.gcount());
373             }
374             in3.close();
375             out2.close();
376             m->mothurRemove(outputFileName+".temp");
377         }
378                 
379                 m->mothurOut("Selected " + toString(count) + " groups or sequences from your phylip file."); m->mothurOutEndLine();
380                 
381                 return 0;
382                 
383         }
384         catch(exception& e) {
385                 m->errorOut(e, "GetDistsCommand", "readPhylip");
386                 exit(1);
387         }
388 }
389 //**********************************************************************************************************************
390 int GetDistsCommand::readColumn(){
391         try {
392                 string thisOutputDir = outputDir;
393                 if (outputDir == "") {  thisOutputDir += m->hasPath(columnfile);  }
394         map<string, string> variables; 
395         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(columnfile));
396         variables["[extension]"] = m->getExtension(columnfile);
397                 string outputFileName = getOutputFileName("column", variables);
398         outputTypes["column"].push_back(outputFileName);  outputNames.push_back(outputFileName);
399                 
400                 ofstream out;
401                 m->openOutputFile(outputFileName, out);
402         
403         ifstream in;
404         m->openInputFile(columnfile, in);
405         
406         set<string> foundNames;
407         string firstName, secondName;
408         float distance;
409         while (!in.eof()) {
410             
411             if (m->control_pressed) { out.close(); in.close(); return 0; }
412             
413             in >> firstName >> secondName >> distance; m->gobble(in);
414             
415             //are both names in the accnos file
416             if ((names.count(firstName) != 0) && (names.count(secondName) != 0)) {
417                 out << firstName << '\t' << secondName << '\t' << distance << endl;
418                 foundNames.insert(firstName);
419                 foundNames.insert(secondName);
420             }
421         }
422                 in.close();
423                 out.close();
424         
425         if (foundNames.size() == 0) {  m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine();  }
426         else if (foundNames.size() != names.size()) {
427             m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(foundNames.size()) + " of them in the column file."); m->mothurOutEndLine();
428         }
429                 
430                 m->mothurOut("Selected " + toString(foundNames.size()) + " groups or sequences from your column file."); m->mothurOutEndLine();
431         
432                 return 0;
433                 
434         }
435         catch(exception& e) {
436                 m->errorOut(e, "GetDistsCommand", "readColumn");
437                 exit(1);
438         }
439 }
440 //**********************************************************************************************************************
441
442