5 // Created by Sarah Westcott on 1/28/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "getdistscommand.h"
11 //**********************************************************************************************************************
12 vector<string> GetDistsCommand::setParameters(){
14 CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "PhylipColumn", "none","phylip",false,false,true); parameters.push_back(pphylip);
15 CommandParameter pcolumn("column", "InputTypes", "", "", "none", "PhylipColumn", "none","column",false,false,true); parameters.push_back(pcolumn);
16 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
17 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
18 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
20 vector<string> myArray;
21 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
25 m->errorOut(e, "GetDistsCommand", "setParameters");
29 //**********************************************************************************************************************
30 string GetDistsCommand::getHelpString(){
32 string helpString = "";
33 helpString += "The get.dists command selects distances from a phylip or column file related to groups or sequences listed in an accnos file.\n";
34 helpString += "The get.dists command parameters are accnos, phylip and column.\n";
35 helpString += "The get.dists command should be in the following format: get.dists(accnos=yourAccnos, phylip=yourPhylip).\n";
36 helpString += "Example get.dists(accnos=final.accnos, phylip=final.an.thetayc.0.03.lt.ave.dist).\n";
37 helpString += "Note: No spaces between parameter labels (i.e. accnos), '=' and parameters (i.e.final.accnos).\n";
41 m->errorOut(e, "GetDistsCommand", "getHelpString");
45 //**********************************************************************************************************************
46 string GetDistsCommand::getOutputPattern(string type) {
50 if (type == "phylip") { pattern = "[filename],pick,[extension]"; }
51 else if (type == "column") { pattern = "[filename],pick,[extension]"; }
52 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
57 m->errorOut(e, "GetDistsCommand", "getOutputPattern");
61 //**********************************************************************************************************************
62 GetDistsCommand::GetDistsCommand(){
64 abort = true; calledHelp = true;
66 vector<string> tempOutNames;
67 outputTypes["phylip"] = tempOutNames;
68 outputTypes["column"] = tempOutNames;
71 m->errorOut(e, "GetDistsCommand", "GetDistsCommand");
75 //**********************************************************************************************************************
76 GetDistsCommand::GetDistsCommand(string option) {
78 abort = false; calledHelp = false;
80 //allow user to run help
81 if(option == "help") { help(); abort = true; calledHelp = true; }
82 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
85 vector<string> myArray = setParameters();
87 OptionParser parser(option);
88 map<string,string> parameters = parser.getParameters();
90 ValidParameters validParameter;
91 map<string,string>::iterator it;
93 //check to make sure all parameters are valid for command
94 for (it = parameters.begin(); it != parameters.end(); it++) {
95 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
98 //initialize outputTypes
99 vector<string> tempOutNames;
100 outputTypes["column"] = tempOutNames;
101 outputTypes["phylip"] = tempOutNames;
103 //if the user changes the output directory command factory will send this info to us in the output parameter
104 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
106 //if the user changes the input directory command factory will send this info to us in the output parameter
107 string inputDir = validParameter.validFile(parameters, "inputdir", false);
108 if (inputDir == "not found"){ inputDir = ""; }
111 it = parameters.find("phylip");
112 //user has given a template file
113 if(it != parameters.end()){
114 path = m->hasPath(it->second);
115 //if the user has not given a path then, add inputdir. else leave path alone.
116 if (path == "") { parameters["phylip"] = inputDir + it->second; }
119 it = parameters.find("column");
120 //user has given a template file
121 if(it != parameters.end()){
122 path = m->hasPath(it->second);
123 //if the user has not given a path then, add inputdir. else leave path alone.
124 if (path == "") { parameters["column"] = inputDir + it->second; }
127 it = parameters.find("accnos");
128 //user has given a template file
129 if(it != parameters.end()){
130 path = m->hasPath(it->second);
131 //if the user has not given a path then, add inputdir. else leave path alone.
132 if (path == "") { parameters["accnos"] = inputDir + it->second; }
137 //check for required parameters
138 accnosfile = validParameter.validFile(parameters, "accnos", true);
139 if (accnosfile == "not open") { abort = true; }
140 else if (accnosfile == "not found") {
141 accnosfile = m->getAccnosFile();
142 if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
144 m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine();
147 }else { m->setAccnosFile(accnosfile); }
149 phylipfile = validParameter.validFile(parameters, "phylip", true);
150 if (phylipfile == "not open") { phylipfile = ""; abort = true; }
151 else if (phylipfile == "not found") { phylipfile = ""; }
152 else { m->setPhylipFile(phylipfile); }
154 columnfile = validParameter.validFile(parameters, "column", true);
155 if (columnfile == "not open") { columnfile = ""; abort = true; }
156 else if (columnfile == "not found") { columnfile = ""; }
157 else { m->setColumnFile(columnfile); }
159 if ((phylipfile == "") && (columnfile == "")) {
160 //is there are current file available for either of these?
161 //give priority to column, then phylip
162 columnfile = m->getColumnFile();
163 if (columnfile != "") { m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
165 phylipfile = m->getPhylipFile();
166 if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
168 m->mothurOut("No valid current files. You must provide a phylip or column file."); m->mothurOutEndLine();
176 catch(exception& e) {
177 m->errorOut(e, "GetDistsCommand", "GetDistsCommand");
181 //**********************************************************************************************************************
183 int GetDistsCommand::execute(){
186 if (abort == true) { if (calledHelp) { return 0; } return 2; }
188 //get names you want to keep
189 names = m->readAccnos(accnosfile);
191 if (m->control_pressed) { return 0; }
193 //read through the correct file and output lines you want to keep
194 if (phylipfile != "") { readPhylip(); }
195 if (columnfile != "") { readColumn(); }
197 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
200 if (outputNames.size() != 0) {
201 m->mothurOutEndLine();
202 m->mothurOut("Output File names: "); m->mothurOutEndLine();
203 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
204 m->mothurOutEndLine();
206 //set fasta file as new current fastafile
208 itTypes = outputTypes.find("phylip");
209 if (itTypes != outputTypes.end()) {
210 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
213 itTypes = outputTypes.find("column");
214 if (itTypes != outputTypes.end()) {
215 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
222 catch(exception& e) {
223 m->errorOut(e, "GetDistsCommand", "execute");
228 //**********************************************************************************************************************
229 int GetDistsCommand::readPhylip(){
231 string thisOutputDir = outputDir;
232 if (outputDir == "") { thisOutputDir += m->hasPath(phylipfile); }
233 map<string, string> variables;
234 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(phylipfile));
235 variables["[extension]"] = m->getExtension(phylipfile);
236 string outputFileName = getOutputFileName("phylip", variables);
239 m->openInputFile(phylipfile, in);
245 set<unsigned int> rows; //converts names in names to a index
249 in >> numTest >> name;
251 if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
252 else { convert(numTest, nseqs); }
254 if (names.count(name) != 0) { rows.insert(row); }
257 //is the matrix square?
259 while((d=in.get()) != EOF){
264 for(int i=0;i<nseqs;i++){
275 //map name to row/column
277 for(int i=1;i<nseqs;i++){
279 if (names.count(name) != 0) { rows.insert(row); }
282 for(int j=0;j<i;j++){
283 if (m->control_pressed) { in.close(); return 0; }
289 for(int i=1;i<nseqs;i++){
291 if (names.count(name) != 0) { rows.insert(row); }
293 for(int j=0;j<nseqs;j++){
294 if (m->control_pressed) { in.close(); return 0; }
301 if (m->control_pressed) { return 0; }
303 //read through file only printing rows and columns of seqs in names
305 m->openInputFile(phylipfile, inPhylip);
310 m->openOutputFile(outputFileName, out);
311 outputTypes["phylip"].push_back(outputFileName); outputNames.push_back(outputFileName);
312 out << names.size() << endl;
314 unsigned int count = 0;
316 for(int i=0;i<nseqs;i++){
318 bool ignoreRow = false;
320 if (names.count(name) == 0) { ignoreRow = true; }
321 else{ out << name << '\t'; count++; }
323 for(int j=0;j<i;j++){
324 if (m->control_pressed) { inPhylip.close(); out.close(); return 0; }
325 inPhylip >> distance;
327 //is this a column we want
328 if(rows.count(j) != 0) { out << distance << '\t'; }
331 if (!ignoreRow) { out << endl; }
335 for(int i=0;i<nseqs;i++){
338 bool ignoreRow = false;
340 if (names.count(name) == 0) { ignoreRow = true; }
341 else{ out << name << '\t'; count++; }
343 for(int j=0;j<nseqs;j++){
344 if (m->control_pressed) { inPhylip.close(); out.close(); return 0; }
345 inPhylip >> distance;
347 //is this a column we want
348 if(rows.count(j) != 0) { out << distance << '\t'; }
351 if (!ignoreRow) { out << endl; }
357 if (count == 0) { m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine(); }
358 else if (count != names.size()) {
359 m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(count) + " of them in the phylip file."); m->mothurOutEndLine();
360 //rewrite with new number
361 m->renameFile(outputFileName, outputFileName+".temp");
363 m->openOutputFile(outputFileName, out2);
364 out2 << count << endl;
367 m->openInputFile(outputFileName+".temp", in3);
368 in3 >> nseqs; m->gobble(in3);
371 in3.read(buffer, 4096);
372 out2.write(buffer, in3.gcount());
376 m->mothurRemove(outputFileName+".temp");
379 m->mothurOut("Selected " + toString(count) + " groups or sequences from your phylip file."); m->mothurOutEndLine();
384 catch(exception& e) {
385 m->errorOut(e, "GetDistsCommand", "readPhylip");
389 //**********************************************************************************************************************
390 int GetDistsCommand::readColumn(){
392 string thisOutputDir = outputDir;
393 if (outputDir == "") { thisOutputDir += m->hasPath(columnfile); }
394 map<string, string> variables;
395 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(columnfile));
396 variables["[extension]"] = m->getExtension(columnfile);
397 string outputFileName = getOutputFileName("column", variables);
398 outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
401 m->openOutputFile(outputFileName, out);
404 m->openInputFile(columnfile, in);
406 set<string> foundNames;
407 string firstName, secondName;
411 if (m->control_pressed) { out.close(); in.close(); return 0; }
413 in >> firstName >> secondName >> distance; m->gobble(in);
415 //are both names in the accnos file
416 if ((names.count(firstName) != 0) && (names.count(secondName) != 0)) {
417 out << firstName << '\t' << secondName << '\t' << distance << endl;
418 foundNames.insert(firstName);
419 foundNames.insert(secondName);
425 if (foundNames.size() == 0) { m->mothurOut("Your file does NOT contain distances related to groups or sequences listed in the accnos file."); m->mothurOutEndLine(); }
426 else if (foundNames.size() != names.size()) {
427 m->mothurOut("[WARNING]: Your accnos file contains " + toString(names.size()) + " groups or sequences, but I only found " + toString(foundNames.size()) + " of them in the column file."); m->mothurOutEndLine();
430 m->mothurOut("Selected " + toString(foundNames.size()) + " groups or sequences from your column file."); m->mothurOutEndLine();
435 catch(exception& e) {
436 m->errorOut(e, "GetDistsCommand", "readColumn");
440 //**********************************************************************************************************************