2 * matrixoutputcommand.cpp
5 * Created by Sarah Westcott on 5/20/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "matrixoutputcommand.h"
11 #include "sharedjabund.h"
12 #include "sharedsorabund.h"
13 #include "sharedjclass.h"
14 #include "sharedsorclass.h"
15 #include "sharedjest.h"
16 #include "sharedsorest.h"
17 #include "sharedthetayc.h"
18 #include "sharedthetan.h"
19 #include "sharedmorisitahorn.h"
20 #include "sharedbraycurtis.h"
23 //**********************************************************************************************************************
25 MatrixOutputCommand::MatrixOutputCommand(string option){
27 globaldata = GlobalData::getInstance();
35 //allow user to run help
36 if(option == "help") { validCalculator = new ValidCalculators(); help(); abort = true; }
39 //valid paramters for this command
40 string Array[] = {"line","label","calc","groups"};
41 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43 parser = new OptionParser();
44 parser->parse(option, parameters); delete parser;
46 ValidParameters* validParameter = new ValidParameters();
48 //check to make sure all parameters are valid for command
49 for (it = parameters.begin(); it != parameters.end(); it++) {
50 if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
53 //make sure the user has already run the read.otu command
54 if (globaldata->getSharedFile() == "") {
55 if (globaldata->getListFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
56 else if (globaldata->getGroupFile() == "") { cout << "You must read a list and a group, or a shared before you can use the dist.shared command." << endl; abort = true; }
59 //check for optional parameter and set defaults
60 // ...at some point should added some additional type checking...
61 line = validParameter->validFile(parameters, "line", false);
62 if (line == "not found") { line = ""; }
64 if(line != "all") { splitAtDash(line, lines); allLines = 0; }
65 else { allLines = 1; }
68 label = validParameter->validFile(parameters, "label", false);
69 if (label == "not found") { label = ""; }
71 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
72 else { allLines = 1; }
75 //make sure user did not use both the line and label parameters
76 if ((line != "") && (label != "")) { cout << "You cannot use both the line and label parameters at the same time. " << endl; abort = true; }
77 //if the user has not specified any line or labels use the ones from read.otu
78 else if((line == "") && (label == "")) {
79 allLines = globaldata->allLines;
80 labels = globaldata->labels;
81 lines = globaldata->lines;
84 groups = validParameter->validFile(parameters, "groups", false);
85 if (groups == "not found") { groups = ""; }
87 splitAtDash(groups, Groups);
88 globaldata->Groups = Groups;
91 calc = validParameter->validFile(parameters, "calc", false);
92 if (calc == "not found") { calc = "jclass-thetayc"; }
94 if (calc == "default") { calc = "jclass-thetayc"; }
96 splitAtDash(calc, Estimators);
98 delete validParameter;
101 if (abort == false) {
103 validCalculator = new ValidCalculators();
106 for (i=0; i<Estimators.size(); i++) {
107 if (validCalculator->isValidCalculator("matrix", Estimators[i]) == true) {
108 if (Estimators[i] == "jabund") {
109 matrixCalculators.push_back(new JAbund());
110 }else if (Estimators[i] == "sorabund") {
111 matrixCalculators.push_back(new SorAbund());
112 }else if (Estimators[i] == "jclass") {
113 matrixCalculators.push_back(new Jclass());
114 }else if (Estimators[i] == "sorclass") {
115 matrixCalculators.push_back(new SorClass());
116 }else if (Estimators[i] == "jest") {
117 matrixCalculators.push_back(new Jest());
118 }else if (Estimators[i] == "sorest") {
119 matrixCalculators.push_back(new SorEst());
120 }else if (Estimators[i] == "thetayc") {
121 matrixCalculators.push_back(new ThetaYC());
122 }else if (Estimators[i] == "thetan") {
123 matrixCalculators.push_back(new ThetaN());
124 }else if (Estimators[i] == "morisitahorn") {
125 matrixCalculators.push_back(new MorHorn());
126 }else if (Estimators[i] == "braycurtis") {
127 matrixCalculators.push_back(new BrayCurtis());
136 catch(exception& e) {
137 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
141 cout << "An unknown error has occurred in the MatrixOutputCommand class function MatrixOutputCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
146 //**********************************************************************************************************************
148 void MatrixOutputCommand::help(){
150 cout << "The dist.shared command can only be executed after a successful read.otu command." << "\n";
151 cout << "The dist.shared command parameters are groups, calc, line and label. The calc parameter is required, and you may not use line and label at the same time." << "\n";
152 cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like included used." << "\n";
153 cout << "The group names are separated by dashes. The line and label allow you to select what distance levels you would like distance matrices created for, and are also separated by dashes." << "\n";
154 cout << "The dist.shared command should be in the following format: dist.shared(groups=yourGroups, calc=yourCalcs, line=yourLines, label=yourLabels)." << "\n";
155 cout << "Example dist.shared(groups=A-B-C, line=1-3-5, calc=jabund-sorabund)." << "\n";
156 cout << "The default value for groups is all the groups in your groupfile." << "\n";
157 cout << "The default value for calc is jclass and thetayc." << "\n";
158 validCalculator->printCalc("matrix", cout);
159 cout << "The dist.shared command outputs a .dist file for each calculator you specify at each distance you choose." << "\n";
160 cout << "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups)." << "\n" << "\n";
162 catch(exception& e) {
163 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
167 cout << "An unknown error has occurred in the MatrixOutputCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
173 //**********************************************************************************************************************
175 MatrixOutputCommand::~MatrixOutputCommand(){
178 delete validCalculator;
181 //**********************************************************************************************************************
183 int MatrixOutputCommand::execute(){
186 if (abort == true) { return 0; }
190 //if the users entered no valid calculators don't execute command
191 if (matrixCalculators.size() == 0) { cout << "No valid calculators." << endl; return 0; }
194 read = new ReadOTUFile(globaldata->inputFileName);
195 read->read(&*globaldata);
197 input = globaldata->ginput;
198 lookup = input->getSharedRAbundVectors();
199 vector<SharedRAbundVector*> lastLookup = lookup;
201 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
202 set<string> processedLabels;
203 set<string> userLabels = labels;
204 set<int> userLines = lines;
206 if (lookup.size() < 2) { cout << "You have not provided enough valid groups. I cannot run the command." << endl; return 0;}
208 numGroups = lookup.size();
210 //as long as you are not at the end of the file or done wih the lines you want
211 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0) || (userLines.size() != 0))) {
213 if(allLines == 1 || lines.count(count) == 1 || labels.count(lookup[0]->getLabel()) == 1){
214 cout << lookup[0]->getLabel() << '\t' << count << endl;
217 processedLabels.insert(lookup[0]->getLabel());
218 userLabels.erase(lookup[0]->getLabel());
219 userLines.erase(count);
222 if ((anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLookup[0]->getLabel()) != 1)) {
223 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
226 processedLabels.insert(lastLookup[0]->getLabel());
227 userLabels.erase(lastLookup[0]->getLabel());
230 //prevent memory leak
231 if (count != 1) { for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; } }
234 //get next line to process
235 lookup = input->getSharedRAbundVectors();
239 //output error messages about any remaining user labels
240 set<string>::iterator it;
241 bool needToRun = false;
242 for (it = userLabels.begin(); it != userLabels.end(); it++) {
243 cout << "Your file does not include the label "<< *it;
244 if (processedLabels.count(lastLookup[0]->getLabel()) != 1) {
245 cout << ". I will use " << lastLookup[0]->getLabel() << "." << endl;
248 cout << ". Please refer to " << lastLookup[0]->getLabel() << "." << endl;
252 //run last line if you need to
253 if (needToRun == true) {
254 cout << lastLookup[0]->getLabel() << '\t' << count << endl;
258 for (int i = 0; i < lastLookup.size(); i++) { delete lastLookup[i]; }
261 //reset groups parameter
262 globaldata->Groups.clear();
266 catch(exception& e) {
267 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
271 cout << "An unknown error has occurred in the MatrixOutputCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
275 /***********************************************************/
276 void MatrixOutputCommand::printSims(ostream& out) {
279 //output column headers
280 out << simMatrix.size() << endl;
282 for (int m = 0; m < simMatrix.size(); m++) {
283 out << lookup[m]->getGroup() << '\t';
284 for (int n = 0; n < m; n++) {
285 out << simMatrix[m][n] << '\t';
291 catch(exception& e) {
292 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
296 cout << "An unknown error has occurred in the MatrixOutputCommand class function printSims. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
300 /***********************************************************/
301 void MatrixOutputCommand::process(vector<SharedRAbundVector*> thisLookup){
305 vector<SharedRAbundVector*> subset;
307 //for each calculator
308 for(int i = 0 ; i < matrixCalculators.size(); i++) {
310 //initialize simMatrix
312 simMatrix.resize(numGroups);
313 for (int m = 0; m < simMatrix.size(); m++) {
314 for (int j = 0; j < simMatrix.size(); j++) {
315 simMatrix[m].push_back(0.0);
319 for (int k = 0; k < thisLookup.size(); k++) {
320 for (int l = k; l < thisLookup.size(); l++) {
321 if (k != l) { //we dont need to similiarity of a groups to itself
322 //get estimated similarity between 2 groups
324 subset.clear(); //clear out old pair of sharedrabunds
325 //add new pair of sharedrabunds
326 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
328 data = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
329 //save values in similarity matrix
330 simMatrix[k][l] = 1.0 - data[0]; //convert similiarity to distance
331 simMatrix[l][k] = 1.0 - data[0]; //convert similiarity to distance
336 exportFileName = getRootName(globaldata->inputFileName) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
337 openOutputFile(exportFileName, out);
346 catch(exception& e) {
347 cout << "Standard Error: " << e.what() << " has occurred in the MatrixOutputCommand class Function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
351 cout << "An unknown error has occurred in the MatrixOutputCommand class function process. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
355 /***********************************************************/