1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
8 * Created by Sarah Westcott on 4/8/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 #include "command.hpp"
14 #include "inputdata.h"
16 #include "validcalculator.h"
18 #include "counttable.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sharedsobscollectsummary.h"
23 #include "sharedchao1.h"
24 #include "sharedace.h"
25 #include "sharednseqs.h"
26 #include "sharedjabund.h"
27 #include "sharedsorabund.h"
28 #include "sharedjclass.h"
29 #include "sharedsorclass.h"
30 #include "sharedjest.h"
31 #include "sharedsorest.h"
32 #include "sharedthetayc.h"
33 #include "sharedthetan.h"
34 #include "sharedkstest.h"
35 #include "whittaker.h"
36 #include "sharedochiai.h"
37 #include "sharedanderbergs.h"
38 #include "sharedkulczynski.h"
39 #include "sharedkulczynskicody.h"
40 #include "sharedlennon.h"
41 #include "sharedmorisitahorn.h"
42 #include "sharedbraycurtis.h"
43 #include "sharedjackknife.h"
44 #include "whittaker.h"
47 #include "structeuclidean.h"
48 #include "structchord.h"
49 #include "hellinger.h"
50 #include "manhattan.h"
51 #include "structpearson.h"
54 #include "structkulczynski.h"
55 #include "structchi2.h"
56 #include "speciesprofile.h"
61 #include "memeuclidean.h"
62 #include "mempearson.h"
66 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
67 The user can select the lines or labels they wish to use as well as the groups they would like included.
68 They can also use as many or as few calculators as they wish. */
71 class TreeGroupCommand : public Command {
74 TreeGroupCommand(string);
78 vector<string> setParameters();
79 string getCommandName() { return "tree.shared"; }
80 string getCommandCategory() { return "OTU-Based Approaches"; }
82 string getHelpString();
83 string getOutputPattern(string);
84 string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
85 string getDescription() { return "generate a tree file that describes the dissimilarity among groups"; }
89 void help() { m->mothurOut(getHelpString()); }
97 vector<linePair> lines;
99 Tree* createTree(vector< vector<double> >&);
100 void printSims(ostream&, vector< vector<double> >&);
101 int makeSimsShared();
102 vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
103 int writeTree(string, Tree*);
104 int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
106 NameAssignment* nameMap;
111 vector<Calculator*> treeCalculators;
112 vector<SharedRAbundVector*> lookup;
114 string format, groupNames, filename, sharedfile, countfile, inputfile;
115 int numGroups, subsampleSize, iters, processors;
117 float precision, cutoff;
119 bool abort, allLines, subsample;
120 set<string> labels; //holds labels to be used
121 string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
122 vector<string> Estimators, Groups, outputNames; //holds estimators to be used
124 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
125 int process(vector<SharedRAbundVector*>);
131 /**************************************************************************************************/
132 //custom data structure for threads to use.
133 // This is passed by void pointer so it can be any data type
134 // that can be passed using a single void pointer (LPVOID).
135 struct treeSharedData {
136 vector<SharedRAbundVector*> thisLookup;
137 vector< vector<seqDist> > calcDists;
138 vector<string> Estimators;
139 unsigned long long start;
140 unsigned long long end;
144 treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
152 /**************************************************************************************************/
153 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
155 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){
156 treeSharedData* pDataArray;
157 pDataArray = (treeSharedData*)lpParam;
161 vector<Calculator*> treeCalculators;
162 ValidCalculators validCalculator;
163 for (int i=0; i<pDataArray->Estimators.size(); i++) {
164 if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) {
165 if (pDataArray->Estimators[i] == "sharedsobs") {
166 treeCalculators.push_back(new SharedSobsCS());
167 }else if (pDataArray->Estimators[i] == "sharedchao") {
168 treeCalculators.push_back(new SharedChao1());
169 }else if (pDataArray->Estimators[i] == "sharedace") {
170 treeCalculators.push_back(new SharedAce());
171 }else if (pDataArray->Estimators[i] == "jabund") {
172 treeCalculators.push_back(new JAbund());
173 }else if (pDataArray->Estimators[i] == "sorabund") {
174 treeCalculators.push_back(new SorAbund());
175 }else if (pDataArray->Estimators[i] == "jclass") {
176 treeCalculators.push_back(new Jclass());
177 }else if (pDataArray->Estimators[i] == "sorclass") {
178 treeCalculators.push_back(new SorClass());
179 }else if (pDataArray->Estimators[i] == "jest") {
180 treeCalculators.push_back(new Jest());
181 }else if (pDataArray->Estimators[i] == "sorest") {
182 treeCalculators.push_back(new SorEst());
183 }else if (pDataArray->Estimators[i] == "thetayc") {
184 treeCalculators.push_back(new ThetaYC());
185 }else if (pDataArray->Estimators[i] == "thetan") {
186 treeCalculators.push_back(new ThetaN());
187 }else if (pDataArray->Estimators[i] == "kstest") {
188 treeCalculators.push_back(new KSTest());
189 }else if (pDataArray->Estimators[i] == "sharednseqs") {
190 treeCalculators.push_back(new SharedNSeqs());
191 }else if (pDataArray->Estimators[i] == "ochiai") {
192 treeCalculators.push_back(new Ochiai());
193 }else if (pDataArray->Estimators[i] == "anderberg") {
194 treeCalculators.push_back(new Anderberg());
195 }else if (pDataArray->Estimators[i] == "kulczynski") {
196 treeCalculators.push_back(new Kulczynski());
197 }else if (pDataArray->Estimators[i] == "kulczynskicody") {
198 treeCalculators.push_back(new KulczynskiCody());
199 }else if (pDataArray->Estimators[i] == "lennon") {
200 treeCalculators.push_back(new Lennon());
201 }else if (pDataArray->Estimators[i] == "morisitahorn") {
202 treeCalculators.push_back(new MorHorn());
203 }else if (pDataArray->Estimators[i] == "braycurtis") {
204 treeCalculators.push_back(new BrayCurtis());
205 }else if (pDataArray->Estimators[i] == "whittaker") {
206 treeCalculators.push_back(new Whittaker());
207 }else if (pDataArray->Estimators[i] == "odum") {
208 treeCalculators.push_back(new Odum());
209 }else if (pDataArray->Estimators[i] == "canberra") {
210 treeCalculators.push_back(new Canberra());
211 }else if (pDataArray->Estimators[i] == "structeuclidean") {
212 treeCalculators.push_back(new StructEuclidean());
213 }else if (pDataArray->Estimators[i] == "structchord") {
214 treeCalculators.push_back(new StructChord());
215 }else if (pDataArray->Estimators[i] == "hellinger") {
216 treeCalculators.push_back(new Hellinger());
217 }else if (pDataArray->Estimators[i] == "manhattan") {
218 treeCalculators.push_back(new Manhattan());
219 }else if (pDataArray->Estimators[i] == "structpearson") {
220 treeCalculators.push_back(new StructPearson());
221 }else if (pDataArray->Estimators[i] == "soergel") {
222 treeCalculators.push_back(new Soergel());
223 }else if (pDataArray->Estimators[i] == "spearman") {
224 treeCalculators.push_back(new Spearman());
225 }else if (pDataArray->Estimators[i] == "structkulczynski") {
226 treeCalculators.push_back(new StructKulczynski());
227 }else if (pDataArray->Estimators[i] == "speciesprofile") {
228 treeCalculators.push_back(new SpeciesProfile());
229 }else if (pDataArray->Estimators[i] == "hamming") {
230 treeCalculators.push_back(new Hamming());
231 }else if (pDataArray->Estimators[i] == "structchi2") {
232 treeCalculators.push_back(new StructChi2());
233 }else if (pDataArray->Estimators[i] == "gower") {
234 treeCalculators.push_back(new Gower());
235 }else if (pDataArray->Estimators[i] == "memchi2") {
236 treeCalculators.push_back(new MemChi2());
237 }else if (pDataArray->Estimators[i] == "memchord") {
238 treeCalculators.push_back(new MemChord());
239 }else if (pDataArray->Estimators[i] == "memeuclidean") {
240 treeCalculators.push_back(new MemEuclidean());
241 }else if (pDataArray->Estimators[i] == "mempearson") {
242 treeCalculators.push_back(new MemPearson());
247 pDataArray->calcDists.resize(treeCalculators.size());
249 vector<SharedRAbundVector*> subset;
250 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
252 for (int l = 0; l < k; l++) {
254 if (k != l) { //we dont need to similiarity of a groups to itself
255 subset.clear(); //clear out old pair of sharedrabunds
256 //add new pair of sharedrabunds
257 subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]);
259 for(int i=0;i<treeCalculators.size();i++) {
261 //if this calc needs all groups to calculate the pair load all groups
262 if (treeCalculators[i]->getNeedsAll()) {
263 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
264 for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
265 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
269 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
271 if (pDataArray->m->control_pressed) { return 1; }
273 seqDist temp(l, k, -(tempdata[0]-1.0));
274 pDataArray->calcDists[i].push_back(temp);
280 for(int i=0;i<treeCalculators.size();i++){ delete treeCalculators[i]; }
285 catch(exception& e) {
286 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");