2 * matrixoutputcommand.cpp
5 * Created by Sarah Westcott on 5/20/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "matrixoutputcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
23 //**********************************************************************************************************************
25 MatrixOutputCommand::MatrixOutputCommand(string option){
27 globaldata = GlobalData::getInstance();
35 //allow user to run help
36 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
39 //valid paramters for this command
40 string Array[] = {"line","label","calc","groups"};
41 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43 OptionParser parser(option);
44 map<string,string> parameters = parser.getParameters();
46 ValidParameters validParameter;
48 //check to make sure all parameters are valid for command
49 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
50 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
53 //make sure the user has already run the read.otu command
54 if (globaldata->getSharedFile() == "") {
55 if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
56 else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
59 //check for optional parameter and set defaults
60 // ...at some point should added some additional type checking...
61 line = validParameter.validFile(parameters, "line", false);
62 if (line == "not found") { line = ""; }
64 if(line != "all") { splitAtDash(line, lines); allLines = 0; }
65 else { allLines = 1; }
68 label = validParameter.validFile(parameters, "label", false);
69 if (label == "not found") { label = ""; }
71 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
72 else { allLines = 1; }
75 //make sure user did not use both the line and label parameters
76 if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; }
77 //if the user has not specified any line or labels use the ones from read.otu
78 else if((line == "") && (label == "")) {
79 allLines = globaldata->allLines;
80 labels = globaldata->labels;
81 lines = globaldata->lines;
84 groups = validParameter.validFile(parameters, "groups", false);
85 if (groups == "not found") { groups = ""; }
87 splitAtDash(groups, Groups);
88 globaldata->Groups = Groups;
91 calc = validParameter.validFile(parameters, "calc", false);
92 if (calc == "not found") { calc = "jclass-thetayc"; }
94 if (calc == "default") { calc = "jclass-thetayc"; }
96 splitAtDash(calc, Estimators);
100 validCalculator = new ValidCalculators();
103 for (i=0; i<Estimators.size(); i++) {
104 if (validCalculator->isValidCalculator("matrix", Estimators[i]) == true) {
105 if (Estimators[i] == "jabund") {
106 matrixCalculators.push_back(new JAbund());
107 }else if (Estimators[i] == "sorabund") {
108 matrixCalculators.push_back(new SorAbund());
109 }else if (Estimators[i] == "jclass") {
110 matrixCalculators.push_back(new Jclass());
111 }else if (Estimators[i] == "sorclass") {
112 matrixCalculators.push_back(new SorClass());
113 }else if (Estimators[i] == "jest") {
114 matrixCalculators.push_back(new Jest());
115 }else if (Estimators[i] == "sorest") {
116 matrixCalculators.push_back(new SorEst());
117 }else if (Estimators[i] == "thetayc") {
118 matrixCalculators.push_back(new ThetaYC());
119 }else if (Estimators[i] == "thetan") {
120 matrixCalculators.push_back(new ThetaN());
121 }else if (Estimators[i] == "morisitahorn") {
122 matrixCalculators.push_back(new MorHorn());
123 }else if (Estimators[i] == "braycurtis") {
124 matrixCalculators.push_back(new BrayCurtis());
133 catch(exception& e) {
134 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
138 cout << "An unknown error has occurred in the MatrixOutputCommand class function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
143 //**********************************************************************************************************************
145 void MatrixOutputCommand::help(){
147 cout << "The dist.shared command can only be executed after a successful read.otu command." << "\n";
148 cout << "The dist.shared command parameters are groups, calc, line and label. The calc parameter is required, and you may not use line and label at the same time." << "\n";
149 cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n";
150 cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like distance matrices created for, and are also separated by dashes." << "\n";
151 cout << "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n";
152 cout << "Example dist.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n";
153 cout << "The default value for groups is all the groups in your groupfile." << "\n";
154 cout << "The default value for calc is jclass and thetayc." << "\n";
155 validCalculator->printCalc("matrix", cout);
156 cout << "The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose." << "\n";
157 cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n";
159 catch(exception& e) {
160 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
164 cout << "An unknown error has occurred in the MatrixOutputCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
170 //**********************************************************************************************************************
172 MatrixOutputCommand::~MatrixOutputCommand(){
175 delete validCalculator;
178 //**********************************************************************************************************************
180 int MatrixOutputCommand::execute(){
183 if (abort == true) { return 0; }
187 //if the users entered no valid calculators don't execute command
188 if (matrixCalculators.size() == 0) { cout << "No valid calculators." << endl; return 0; }
191 read = new ReadOTUFile(globaldata->inputFileName);
192 read->read(&*globaldata);
194 input = globaldata->ginput;
195 lookup = input->getSharedRAbundVectors();
196 vector<SharedRAbundVector*> lastLookup = lookup;
198 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
199 set<string> processedLabels;
200 set<string> userLabels = labels;
201 set<int> userLines = lines;
203 if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0;}
205 numGroups = lookup.size();
207 //as long as you are not at the end of the file or done wih the lines you want
208 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
210 if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){
211 cout << lookup[0]->getLabel() << '\t' << count << endl;
214 processedLabels.insert(lookup[0]->getLabel());
215 userLabels.erase(lookup[0]->getLabel());
216 userLines.erase(count);
219 if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) {
220 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
223 processedLabels.insert(lastLookup[0]->getLabel());
224 userLabels.erase(lastLookup[0]->getLabel());
227 //prevent memory leak
228 if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } }
231 //get next line to process
232 lookup = input->getSharedRAbundVectors();
236 //output error messages about any remaining user labels
237 set<string>::iterator it;
238 bool needToRun = false;
239 for (it = userLabels.begin(); it != userLabels.end(); it++) {
240 cout << "Your file does not include the label "<< *it;
241 if (processedLabels.count(lastLookup[0]->getLabel()) != 1) {
242 cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl;
245 cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl;
249 //run last line if you need to
250 if (needToRun == true) {
251 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
255 for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; }
258 //reset groups parameter
259 globaldata->Groups.clear();
263 catch(exception& e) {
264 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
268 cout << "An unknown error has occurred in the MatrixOutputCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
272 /***********************************************************/
273 void MatrixOutputCommand::printSims(ostream& out) {
276 //output column headers
277 out << simMatrix.size() << endl;
279 for (int m = 0; m < simMatrix.size(); m++) {
280 out << lookup[m]->getGroup() << '\t';
281 for (int n = 0; n < m; n++) {
282 out << simMatrix[m][n] << '\t';
288 catch(exception& e) {
289 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
293 cout << "An unknown error has occurred in the MatrixOutputCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
297 /***********************************************************/
298 void MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
302 vector<SharedRAbundVector*> subset;
304 //for each calculator
305 for(int i = 0 ; i < matrixCalculators.size(); i++) {
307 //initialize simMatrix
309 simMatrix.resize(numGroups);
310 for (int m = 0; m < simMatrix.size(); m++) {
311 for (int j = 0; j < simMatrix.size(); j++) {
312 simMatrix[m].push_back(0.0);
316 for (int k = 0; k < thisLookup.size(); k++) {
317 for (int l = k; l < thisLookup.size(); l++) {
318 if (k != l) { //we dont need to similiarity of a groups to itself
319 //get estimated similarity between 2 groups
321 subset.clear(); //clear out old pair of sharedrabunds
322 //add new pair of sharedrabunds
323 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
325 data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
326 //save values in similarity matrix
327 simMatrix[k][l] = 1.0 - data[0]; //convert similiarity to distance
328 simMatrix[l][k] = 1.0 - data[0]; //convert similiarity to distance
333 exportFileName = getRootName(globaldata->inputFileName) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
334 openOutputFile(exportFileName, out);
343 catch(exception& e) {
344 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
348 cout << "An unknown error has occurred in the MatrixOutputCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
352 /***********************************************************/