2 // removedistscommand.cpp
5 // Created by Sarah Westcott on 1/29/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "removedistscommand.h"
11 //**********************************************************************************************************************
12 vector<string> RemoveDistsCommand::setParameters(){
14 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip);
15 CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn);
16 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
17 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
18 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
20 vector<string> myArray;
21 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
25 m->errorOut(e, "RemoveDistsCommand", "setParameters");
29 //**********************************************************************************************************************
30 string RemoveDistsCommand::getHelpString(){
32 string helpString = "";
33 helpString += "The remove.dists command removes distances from a phylip or column file related to groups or sequences listed in an accnos file.\n";
34 helpString += "The remove.dists command parameters are accnos, phylip and column.\n";
35 helpString += "The remove.dists command should be in the following format: get.dists(accnos=yourAccnos, phylip=yourPhylip).\n";
36 helpString += "Example remove.dists(accnos=final.accnos, phylip=final.an.thetayc.0.03.lt.ave.dist).\n";
37 helpString += "Note: No spaces between parameter labels (i.e. accnos), '=' and parameters (i.e.final.accnos).\n";
41 m->errorOut(e, "RemoveDistsCommand", "getHelpString");
45 //**********************************************************************************************************************
46 string RemoveDistsCommand::getOutputPattern(string type) {
50 if (type == "phylip") { pattern = "[filename],pick,[extension]"; }
51 else if (type == "column") { pattern = "[filename],pick,[extension]"; }
52 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
57 m->errorOut(e, "RemoveDistsCommand", "getOutputPattern");
61 //**********************************************************************************************************************
62 RemoveDistsCommand::RemoveDistsCommand(){
64 abort = true; calledHelp = true;
66 vector<string> tempOutNames;
67 outputTypes["phylip"] = tempOutNames;
68 outputTypes["column"] = tempOutNames;
71 m->errorOut(e, "RemoveDistsCommand", "RemoveDistsCommand");
75 //**********************************************************************************************************************
76 RemoveDistsCommand::RemoveDistsCommand(string option) {
78 abort = false; calledHelp = false;
80 //allow user to run help
81 if(option == "help") { help(); abort = true; calledHelp = true; }
82 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
85 vector<string> myArray = setParameters();
87 OptionParser parser(option);
88 map<string,string> parameters = parser.getParameters();
90 ValidParameters validParameter;
91 map<string,string>::iterator it;
93 //check to make sure all parameters are valid for command
94 for (it = parameters.begin(); it != parameters.end(); it++) {
95 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
98 //initialize outputTypes
99 vector<string> tempOutNames;
100 outputTypes["column"] = tempOutNames;
101 outputTypes["phylip"] = tempOutNames;
103 //if the user changes the output directory command factory will send this info to us in the output parameter
104 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("phylip");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["phylip"] = inputDir + it->second; }
119 it = parameters.find("column");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["column"] = inputDir + it->second; }
127 it = parameters.find("accnos");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["accnos"] = inputDir + it->second; }
137 //check for required parameters
138 accnosfile = validParameter.validFile(parameters, "accnos", true);
139 if (accnosfile == "not open") { abort = true; }
140 else if (accnosfile == "not found") {
141 accnosfile = m->getAccnosFile();
142 if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
144 m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine();
147 }else { m->setAccnosFile(accnosfile); }
149 phylipfile = validParameter.validFile(parameters, "phylip", true);
150 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
151 else if (phylipfile == "not found") { phylipfile = ""; }
152 else { m->setPhylipFile(phylipfile); }
154 columnfile = validParameter.validFile(parameters, "column", true);
155 if (columnfile == "not open") { columnfile = ""; abort = true; }
156 else if (columnfile == "not found") { columnfile = ""; }
157 else { m->setColumnFile(columnfile); }
159 if ((phylipfile == "") && (columnfile == "")) {
160 //is there are current file available for either of these?
161 //give priority to column, then phylip
162 columnfile = m->getColumnFile();
163 if (columnfile != "") { m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
165 phylipfile = m->getPhylipFile();
166 if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
168 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine();
176 catch(exception& e) {
177 m->errorOut(e, "RemoveDistsCommand", "RemoveDistsCommand");
181 //**********************************************************************************************************************
183 int RemoveDistsCommand::execute(){
186 if (abort == true) { if (calledHelp) { return 0; } return 2; }
188 //get names you want to keep
189 names = m->readAccnos(accnosfile);
191 if (m->control_pressed) { return 0; }
193 //read through the correct file and output lines you want to keep
194 if (phylipfile != "") { readPhylip(); }
195 if (columnfile != "") { readColumn(); }
197 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
200 if (outputNames.size() != 0) {
201 m->mothurOutEndLine();
202 m->mothurOut("Output File names: "); m->mothurOutEndLine();
203 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
204 m->mothurOutEndLine();
206 //set fasta file as new current fastafile
208 itTypes = outputTypes.find("phylip");
209 if (itTypes != outputTypes.end()) {
210 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
213 itTypes = outputTypes.find("column");
214 if (itTypes != outputTypes.end()) {
215 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
222 catch(exception& e) {
223 m->errorOut(e, "RemoveDistsCommand", "execute");
228 //**********************************************************************************************************************
229 int RemoveDistsCommand::readPhylip(){
231 string thisOutputDir = outputDir;
232 if (outputDir == "") { thisOutputDir += m->hasPath(phylipfile); }
233 map<string, string> variables;
234 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(phylipfile));
235 variables["[extension]"] = m->getExtension(phylipfile);
236 string outputFileName = getOutputFileName("phylip", variables);
239 m->openInputFile(phylipfile, in);
245 set<unsigned int> rows; //converts names in names to a index
249 in >> numTest >> name;
251 if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
252 else { convert(numTest, nseqs); }
254 //not one we want to remove
255 if (names.count(name) == 0) { rows.insert(row); }
258 //is the matrix square?
260 while((d=in.get()) != EOF){
265 for(int i=0;i<nseqs;i++){
276 //map name to row/column
278 for(int i=1;i<nseqs;i++){
280 if (names.count(name) == 0) { rows.insert(row); }
283 for(int j=0;j<i;j++){
284 if (m->control_pressed) { in.close(); return 0; }
290 for(int i=1;i<nseqs;i++){
292 if (names.count(name) == 0) { rows.insert(row); }
294 for(int j=0;j<nseqs;j++){
295 if (m->control_pressed) { in.close(); return 0; }
302 if (m->control_pressed) { return 0; }
304 //read through file only printing rows and columns of seqs in names
306 m->openInputFile(phylipfile, inPhylip);
311 m->openOutputFile(outputFileName, out);
312 outputTypes["phylip"].push_back(outputFileName); outputNames.push_back(outputFileName);
313 out << names.size() << endl;
315 unsigned int count = 0;
316 unsigned int keptCount = 0;
318 for(int i=0;i<nseqs;i++){
320 bool ignoreRow = false;
322 if (names.count(name) != 0) { ignoreRow = true; count++; }
323 else{ out << name << '\t'; keptCount++; }
325 for(int j=0;j<i;j++){
326 if (m->control_pressed) { inPhylip.close(); out.close(); return 0; }
327 inPhylip >> distance;
329 //is this a column we want
330 if(rows.count(j) != 0) { out << distance << '\t'; }
333 if (!ignoreRow) { out << endl; }
337 for(int i=0;i<nseqs;i++){
340 bool ignoreRow = false;
342 if (names.count(name) != 0) { ignoreRow = true; count++; }
343 else{ out << name << '\t'; keptCount++; }
345 for(int j=0;j<nseqs;j++){
346 if (m->control_pressed) { inPhylip.close(); out.close(); return 0; }
347 inPhylip >> distance;
349 //is this a column we want
350 if(rows.count(j) != 0) { out << distance << '\t'; }
353 if (!ignoreRow) { out << endl; }
359 if (keptCount == 0) { m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine(); }
360 else if (count != names.size()) {
361 m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file."); m->mothurOutEndLine();
362 //rewrite with new number
363 m->renameFile(outputFileName, outputFileName+".temp");
365 m->openOutputFile(outputFileName, out2);
366 out2 << keptCount << endl;
369 m->openInputFile(outputFileName+".temp", in3);
370 in3 >> nseqs; m->gobble(in3);
373 in3.read(buffer, 4096);
374 out2.write(buffer, in3.gcount());
378 m->mothurRemove(outputFileName+".temp");
381 m->mothurOut("Removed " + toString(count) + " groups or sequences from your phylip file."); m->mothurOutEndLine();
386 catch(exception& e) {
387 m->errorOut(e, "RemoveDistsCommand", "readPhylip");
391 //**********************************************************************************************************************
392 int RemoveDistsCommand::readColumn(){
394 string thisOutputDir = outputDir;
395 if (outputDir == "") { thisOutputDir += m->hasPath(columnfile); }
396 map<string, string> variables;
397 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(columnfile));
398 variables["[extension]"] = m->getExtension(columnfile);
399 string outputFileName = getOutputFileName("column", variables);
400 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
403 m->openOutputFile(outputFileName, out);
406 m->openInputFile(columnfile, in);
408 set<string> removeNames;
409 string firstName, secondName;
414 if (m->control_pressed) { out.close(); in.close(); return 0; }
416 in >> firstName >> secondName >> distance; m->gobble(in);
418 //is either names in the accnos file
419 if (names.count(firstName) != 0) {
420 removeNames.insert(firstName);
421 if (names.count(secondName) != 0) { removeNames.insert(secondName); } }
422 else if (names.count(secondName) != 0) {
423 removeNames.insert(secondName);
424 if (names.count(firstName) != 0) { removeNames.insert(firstName); } }
427 out << firstName << '\t' << secondName << '\t' << distance << endl;
433 if (!wrote) { m->mothurOut("Your file contains ONLY distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine(); }
434 else if (removeNames.size() != names.size()) {
435 m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(removeNames.size()) + " of them in the column file."); m->mothurOutEndLine();
438 m->mothurOut("Removed " + toString(removeNames.size()) + " groups or sequences from your column file."); m->mothurOutEndLine();
443 catch(exception& e) {
444 m->errorOut(e, "RemoveDistsCommand", "readColumn");
448 //**********************************************************************************************************************