]> git.donarmstrong.com Git - mothur.git/blob - removedistscommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / removedistscommand.cpp
1 //
2 //  removedistscommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 1/29/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "removedistscommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> RemoveDistsCommand::setParameters(){     
13         try {
14                 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip);
15         CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn);      
16                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
17                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
18                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
19                 
20                 vector<string> myArray;
21                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
22                 return myArray;
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "RemoveDistsCommand", "setParameters");
26                 exit(1);
27         }
28 }
29 //**********************************************************************************************************************
30 string RemoveDistsCommand::getHelpString(){     
31         try {
32                 string helpString = "";
33                 helpString += "The remove.dists command removes distances from a phylip or column file related to groups or sequences listed in an accnos file.\n";
34                 helpString += "The remove.dists command parameters are accnos, phylip and column.\n";
35                 helpString += "The remove.dists command should be in the following format: get.dists(accnos=yourAccnos, phylip=yourPhylip).\n";
36                 helpString += "Example remove.dists(accnos=final.accnos, phylip=final.an.thetayc.0.03.lt.ave.dist).\n";
37                 helpString += "Note: No spaces between parameter labels (i.e. accnos), '=' and parameters (i.e.final.accnos).\n";
38                 return helpString;
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "RemoveDistsCommand", "getHelpString");
42                 exit(1);
43         }
44 }
45 //**********************************************************************************************************************
46 string RemoveDistsCommand::getOutputPattern(string type) {
47     try {
48         string pattern = "";
49         
50         if (type == "phylip")           {   pattern = "[filename],pick,[extension]";    }
51         else if (type == "column")      {   pattern = "[filename],pick,[extension]";    }
52         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
53         
54         return pattern;
55     }
56     catch(exception& e) {
57         m->errorOut(e, "RemoveDistsCommand", "getOutputPattern");
58         exit(1);
59     }
60 }
61 //**********************************************************************************************************************
62 RemoveDistsCommand::RemoveDistsCommand(){       
63         try {
64                 abort = true; calledHelp = true;
65                 setParameters();
66                 vector<string> tempOutNames;
67                 outputTypes["phylip"] = tempOutNames;
68                 outputTypes["column"] = tempOutNames;
69         }
70         catch(exception& e) {
71                 m->errorOut(e, "RemoveDistsCommand", "RemoveDistsCommand");
72                 exit(1);
73         }
74 }
75 //**********************************************************************************************************************
76 RemoveDistsCommand::RemoveDistsCommand(string option)  {
77         try {
78                 abort = false; calledHelp = false;   
79                 
80                 //allow user to run help
81                 if(option == "help") { help(); abort = true; calledHelp = true; }
82                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
83                 
84                 else {
85                         vector<string> myArray = setParameters();
86                         
87                         OptionParser parser(option);
88                         map<string,string> parameters = parser.getParameters();
89                         
90                         ValidParameters validParameter;
91                         map<string,string>::iterator it;
92                         
93                         //check to make sure all parameters are valid for command
94                         for (it = parameters.begin(); it != parameters.end(); it++) { 
95                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
96                         }
97                         
98                         //initialize outputTypes
99                         vector<string> tempOutNames;
100                         outputTypes["column"] = tempOutNames;
101                         outputTypes["phylip"] = tempOutNames;
102                         
103                         //if the user changes the output directory command factory will send this info to us in the output parameter 
104                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
105                         
106                         //if the user changes the input directory command factory will send this info to us in the output parameter 
107                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
108                         if (inputDir == "not found"){   inputDir = "";          }
109                         else {
110                                 string path;
111                                 it = parameters.find("phylip");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["phylip"] = inputDir + it->second;           }
117                                 }
118                                 
119                                 it = parameters.find("column");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["column"] = inputDir + it->second;           }
125                                 }
126                                 
127                 it = parameters.find("accnos");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
133                                 }
134             }
135                         
136                         
137                         //check for required parameters
138                         accnosfile = validParameter.validFile(parameters, "accnos", true);
139                         if (accnosfile == "not open") { abort = true; }
140                         else if (accnosfile == "not found") {  
141                                 accnosfile = m->getAccnosFile(); 
142                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
143                                 else { 
144                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
145                                         abort = true;
146                                 } 
147                         }else { m->setAccnosFile(accnosfile); } 
148                         
149                         phylipfile = validParameter.validFile(parameters, "phylip", true);
150                         if (phylipfile == "not open") { phylipfile = ""; abort = true; }
151                         else if (phylipfile == "not found") { phylipfile = ""; }        
152                         else {  m->setPhylipFile(phylipfile); }
153                         
154                         columnfile = validParameter.validFile(parameters, "column", true);
155                         if (columnfile == "not open") { columnfile = ""; abort = true; }        
156                         else if (columnfile == "not found") { columnfile = ""; }
157                         else {  m->setColumnFile(columnfile);   }
158                         
159                         if ((phylipfile == "") && (columnfile == "")) { 
160                                 //is there are current file available for either of these?
161                                 //give priority to column, then phylip
162                                 columnfile = m->getColumnFile(); 
163                                 if (columnfile != "") {  m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
164                                 else { 
165                                         phylipfile = m->getPhylipFile(); 
166                                         if (phylipfile != "") {  m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
167                                         else { 
168                                                 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine(); 
169                                                 abort = true;
170                                         }
171                                 }
172                         }
173                 }
174                 
175         }
176         catch(exception& e) {
177                 m->errorOut(e, "RemoveDistsCommand", "RemoveDistsCommand");
178                 exit(1);
179         }
180 }
181 //**********************************************************************************************************************
182
183 int RemoveDistsCommand::execute(){
184         try {
185                 
186                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
187                 
188                 //get names you want to keep
189                 names = m->readAccnos(accnosfile);
190                 
191                 if (m->control_pressed) { return 0; }
192                 
193                 //read through the correct file and output lines you want to keep
194                 if (phylipfile != "")           {               readPhylip();           }
195                 if (columnfile != "")           {               readColumn();       }
196                 
197                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
198                 
199                 
200                 if (outputNames.size() != 0) {
201                         m->mothurOutEndLine();
202                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
203                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
204                         m->mothurOutEndLine();
205                         
206                         //set fasta file as new current fastafile
207                         string current = "";
208                         itTypes = outputTypes.find("phylip");
209                         if (itTypes != outputTypes.end()) {
210                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
211                         }
212                         
213                         itTypes = outputTypes.find("column");
214                         if (itTypes != outputTypes.end()) {
215                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
216                         }
217         }
218                 
219                 return 0;               
220         }
221         
222         catch(exception& e) {
223                 m->errorOut(e, "RemoveDistsCommand", "execute");
224                 exit(1);
225         }
226 }
227
228 //**********************************************************************************************************************
229 int RemoveDistsCommand::readPhylip(){
230         try {
231                 string thisOutputDir = outputDir;
232                 if (outputDir == "") {  thisOutputDir += m->hasPath(phylipfile);  }
233         map<string, string> variables; 
234         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(phylipfile));
235         variables["[extension]"] = m->getExtension(phylipfile);
236                 string outputFileName = getOutputFileName("phylip", variables);
237                 
238         ifstream in;
239         m->openInputFile(phylipfile, in);
240         
241         float distance;
242         int square, nseqs; 
243         string name;
244         unsigned int row;
245         set<unsigned int> rows; //converts names in names to a index
246         row = 0;
247         
248         string numTest;
249         in >> numTest >> name;
250         
251         if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
252         else { convert(numTest, nseqs); }
253         
254         //not one we want to remove
255         if (names.count(name) == 0) { rows.insert(row); }
256         row++;
257         
258         //is the matrix square?
259         char d;
260         while((d=in.get()) != EOF){
261             
262             if(isalnum(d)){
263                 square = 1;
264                 in.putback(d);
265                 for(int i=0;i<nseqs;i++){
266                     in >> distance;
267                 }
268                 break;
269             }
270             if(d == '\n'){
271                 square = 0;
272                 break;
273             }
274         }
275         
276         //map name to row/column        
277         if(square == 0){
278             for(int i=1;i<nseqs;i++){
279                 in >> name;  
280                 if (names.count(name) == 0) { rows.insert(row); }
281                 row++;
282                 
283                 for(int j=0;j<i;j++){
284                     if (m->control_pressed) {  in.close(); return 0;  }
285                     in >> distance;
286                 }
287             }
288         }
289         else{
290             for(int i=1;i<nseqs;i++){
291                 in >> name;  
292                 if (names.count(name) == 0) { rows.insert(row);  }
293                 row++;
294                 for(int j=0;j<nseqs;j++){
295                     if (m->control_pressed) {  in.close(); return 0;  }
296                     in >> distance;
297                 }
298             }
299         }
300         in.close();
301         
302         if (m->control_pressed) {  return 0; }
303         
304         //read through file only printing rows and columns of seqs in names
305         ifstream inPhylip;
306         m->openInputFile(phylipfile, inPhylip);
307         
308         inPhylip >> numTest;
309         
310                 ofstream out;
311                 m->openOutputFile(outputFileName, out);
312         outputTypes["phylip"].push_back(outputFileName);  outputNames.push_back(outputFileName);
313         out << names.size() << endl;
314             
315         unsigned int count = 0;
316         unsigned int keptCount = 0;
317                 if(square == 0){
318             for(int i=0;i<nseqs;i++){
319                 inPhylip >> name;  
320                 bool ignoreRow = false;
321                 
322                 if (names.count(name) != 0) { ignoreRow = true; count++; }
323                 else{ out << name << '\t'; keptCount++; }
324                 
325                 for(int j=0;j<i;j++){
326                     if (m->control_pressed) {  inPhylip.close(); out.close();  return 0;  }
327                     inPhylip >> distance;
328                     if (!ignoreRow) {
329                         //is this a column we want
330                         if(rows.count(j) != 0) {  out << distance << '\t';  }
331                     }
332                 }
333                 if (!ignoreRow) { out << endl; }
334             }
335         }
336         else{
337             for(int i=0;i<nseqs;i++){
338                 inPhylip >> name; 
339                 
340                 bool ignoreRow = false;
341                 
342                 if (names.count(name) != 0) { ignoreRow = true; count++; }
343                 else{ out << name << '\t'; keptCount++; }
344                 
345                 for(int j=0;j<nseqs;j++){
346                     if (m->control_pressed) {  inPhylip.close(); out.close(); return 0;  }
347                     inPhylip >> distance;
348                     if (!ignoreRow) {
349                         //is this a column we want
350                         if(rows.count(j) != 0) {  out << distance << '\t';  }
351                     }
352                 }
353                 if (!ignoreRow) { out << endl; }
354             }
355         }
356         inPhylip.close();
357                 out.close();
358                 
359                 if (keptCount == 0) {  m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine();  }
360         else if (count != names.size()) {
361             m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file."); m->mothurOutEndLine();
362             //rewrite with new number
363             m->renameFile(outputFileName, outputFileName+".temp");
364             ofstream out2;
365             m->openOutputFile(outputFileName, out2);
366             out2 << keptCount << endl;
367             
368             ifstream in3;
369             m->openInputFile(outputFileName+".temp", in3);
370             in3 >> nseqs; m->gobble(in3);
371             char buffer[4096];        
372             while (!in3.eof()) {
373                 in3.read(buffer, 4096);
374                 out2.write(buffer, in3.gcount());
375             }
376             in3.close();
377             out2.close();
378             m->mothurRemove(outputFileName+".temp");
379         }
380                 
381                 m->mothurOut("Removed " + toString(count) + " groups or sequences from your phylip file."); m->mothurOutEndLine();
382                 
383                 return 0;
384                 
385         }
386         catch(exception& e) {
387                 m->errorOut(e, "RemoveDistsCommand", "readPhylip");
388                 exit(1);
389         }
390 }
391 //**********************************************************************************************************************
392 int RemoveDistsCommand::readColumn(){
393         try {
394                 string thisOutputDir = outputDir;
395                 if (outputDir == "") {  thisOutputDir += m->hasPath(columnfile);  }
396         map<string, string> variables; 
397         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(columnfile));
398         variables["[extension]"] = m->getExtension(columnfile);
399                 string outputFileName = getOutputFileName("column", variables);
400         outputTypes["column"].push_back(outputFileName);  outputNames.push_back(outputFileName);
401                 
402                 ofstream out;
403                 m->openOutputFile(outputFileName, out);
404         
405         ifstream in;
406         m->openInputFile(columnfile, in);
407         
408         set<string> removeNames;
409         string firstName, secondName;
410         float distance;
411         bool wrote = false;
412         while (!in.eof()) {
413             
414             if (m->control_pressed) { out.close(); in.close(); return 0; }
415             
416             in >> firstName >> secondName >> distance; m->gobble(in);
417             
418             //is either names in the accnos file
419             if (names.count(firstName) != 0)       { 
420                 removeNames.insert(firstName);  
421                 if (names.count(secondName) != 0)  { removeNames.insert(secondName);      }   }
422             else if (names.count(secondName) != 0) { 
423                 removeNames.insert(secondName); 
424                 if (names.count(firstName) != 0)   { removeNames.insert(firstName);     }   }
425             else {
426                 wrote = true;
427                 out << firstName << '\t' << secondName << '\t' << distance << endl;
428             }
429         }
430                 in.close();
431                 out.close();
432         
433         if (!wrote) {  m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine();  }
434         else if (removeNames.size() != names.size()) {
435             m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(removeNames.size()) + " of them in the column file."); m->mothurOutEndLine();
436         }
437                 
438                 m->mothurOut("Removed " + toString(removeNames.size()) + " groups or sequences from your column file."); m->mothurOutEndLine();
439         
440                 return 0;
441                 
442         }
443         catch(exception& e) {
444                 m->errorOut(e, "RemoveDistsCommand", "readColumn");
445                 exit(1);
446         }
447 }
448 //**********************************************************************************************************************
449
450