1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
8 * Created by Sarah Westcott on 4/8/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 #include "command.hpp"
14 #include "inputdata.h"
16 #include "validcalculator.h"
18 #include "counttable.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sharedsobscollectsummary.h"
23 #include "sharedchao1.h"
24 #include "sharedace.h"
25 #include "sharednseqs.h"
26 #include "sharedjabund.h"
27 #include "sharedsorabund.h"
28 #include "sharedjclass.h"
29 #include "sharedsorclass.h"
30 #include "sharedjest.h"
31 #include "sharedsorest.h"
32 #include "sharedthetayc.h"
33 #include "sharedthetan.h"
34 #include "sharedkstest.h"
35 #include "whittaker.h"
36 #include "sharedochiai.h"
37 #include "sharedanderbergs.h"
38 #include "sharedkulczynski.h"
39 #include "sharedkulczynskicody.h"
40 #include "sharedlennon.h"
41 #include "sharedmorisitahorn.h"
42 #include "sharedbraycurtis.h"
43 #include "sharedjackknife.h"
44 #include "whittaker.h"
47 #include "structeuclidean.h"
48 #include "structchord.h"
49 #include "hellinger.h"
50 #include "manhattan.h"
51 #include "structpearson.h"
54 #include "structkulczynski.h"
55 #include "structchi2.h"
56 #include "speciesprofile.h"
61 #include "memeuclidean.h"
62 #include "mempearson.h"
66 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
67 The user can select the lines or labels they wish to use as well as the groups they would like included.
68 They can also use as many or as few calculators as they wish. */
71 class TreeGroupCommand : public Command {
74 TreeGroupCommand(string);
78 vector<string> setParameters();
79 string getCommandName() { return "tree.shared"; }
80 string getCommandCategory() { return "OTU-Based Approaches"; }
82 string getHelpString();
83 string getOutputPattern(string);
84 string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
85 string getDescription() { return "generate a tree file that describes the dissimilarity among groups"; }
89 void help() { m->mothurOut(getHelpString()); }
97 vector<linePair> lines;
99 Tree* createTree(vector< vector<double> >&);
100 void printSims(ostream&, vector< vector<double> >&);
101 int makeSimsShared();
102 vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
103 int writeTree(string, Tree*);
104 int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
106 NameAssignment* nameMap;
111 vector<Calculator*> treeCalculators;
112 vector<SharedRAbundVector*> lookup;
114 string format, groupNames, filename, sharedfile, countfile, inputfile;
115 int numGroups, subsampleSize, iters, processors;
117 float precision, cutoff;
119 bool abort, allLines, subsample;
120 set<string> labels; //holds labels to be used
121 string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
122 vector<string> Estimators, Groups, outputNames; //holds estimators to be used
124 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
125 int process(vector<SharedRAbundVector*>);
131 /**************************************************************************************************/
132 //custom data structure for threads to use.
133 // This is passed by void pointer so it can be any data type
134 // that can be passed using a single void pointer (LPVOID).
135 struct treeSharedData {
136 vector<SharedRAbundVector*> thisLookup;
137 vector< vector<seqDist> > calcDists;
138 vector<string> Estimators;
139 unsigned long long start;
140 unsigned long long end;
145 treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
154 /**************************************************************************************************/
155 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
157 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){
158 treeSharedData* pDataArray;
159 pDataArray = (treeSharedData*)lpParam;
163 vector<Calculator*> treeCalculators;
164 ValidCalculators validCalculator;
165 for (int i=0; i<pDataArray->Estimators.size(); i++) {
166 if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) {
167 if (pDataArray->Estimators[i] == "sharedsobs") {
168 treeCalculators.push_back(new SharedSobsCS());
169 }else if (pDataArray->Estimators[i] == "sharedchao") {
170 treeCalculators.push_back(new SharedChao1());
171 }else if (pDataArray->Estimators[i] == "sharedace") {
172 treeCalculators.push_back(new SharedAce());
173 }else if (pDataArray->Estimators[i] == "jabund") {
174 treeCalculators.push_back(new JAbund());
175 }else if (pDataArray->Estimators[i] == "sorabund") {
176 treeCalculators.push_back(new SorAbund());
177 }else if (pDataArray->Estimators[i] == "jclass") {
178 treeCalculators.push_back(new Jclass());
179 }else if (pDataArray->Estimators[i] == "sorclass") {
180 treeCalculators.push_back(new SorClass());
181 }else if (pDataArray->Estimators[i] == "jest") {
182 treeCalculators.push_back(new Jest());
183 }else if (pDataArray->Estimators[i] == "sorest") {
184 treeCalculators.push_back(new SorEst());
185 }else if (pDataArray->Estimators[i] == "thetayc") {
186 treeCalculators.push_back(new ThetaYC());
187 }else if (pDataArray->Estimators[i] == "thetan") {
188 treeCalculators.push_back(new ThetaN());
189 }else if (pDataArray->Estimators[i] == "kstest") {
190 treeCalculators.push_back(new KSTest());
191 }else if (pDataArray->Estimators[i] == "sharednseqs") {
192 treeCalculators.push_back(new SharedNSeqs());
193 }else if (pDataArray->Estimators[i] == "ochiai") {
194 treeCalculators.push_back(new Ochiai());
195 }else if (pDataArray->Estimators[i] == "anderberg") {
196 treeCalculators.push_back(new Anderberg());
197 }else if (pDataArray->Estimators[i] == "kulczynski") {
198 treeCalculators.push_back(new Kulczynski());
199 }else if (pDataArray->Estimators[i] == "kulczynskicody") {
200 treeCalculators.push_back(new KulczynskiCody());
201 }else if (pDataArray->Estimators[i] == "lennon") {
202 treeCalculators.push_back(new Lennon());
203 }else if (pDataArray->Estimators[i] == "morisitahorn") {
204 treeCalculators.push_back(new MorHorn());
205 }else if (pDataArray->Estimators[i] == "braycurtis") {
206 treeCalculators.push_back(new BrayCurtis());
207 }else if (pDataArray->Estimators[i] == "whittaker") {
208 treeCalculators.push_back(new Whittaker());
209 }else if (pDataArray->Estimators[i] == "odum") {
210 treeCalculators.push_back(new Odum());
211 }else if (pDataArray->Estimators[i] == "canberra") {
212 treeCalculators.push_back(new Canberra());
213 }else if (pDataArray->Estimators[i] == "structeuclidean") {
214 treeCalculators.push_back(new StructEuclidean());
215 }else if (pDataArray->Estimators[i] == "structchord") {
216 treeCalculators.push_back(new StructChord());
217 }else if (pDataArray->Estimators[i] == "hellinger") {
218 treeCalculators.push_back(new Hellinger());
219 }else if (pDataArray->Estimators[i] == "manhattan") {
220 treeCalculators.push_back(new Manhattan());
221 }else if (pDataArray->Estimators[i] == "structpearson") {
222 treeCalculators.push_back(new StructPearson());
223 }else if (pDataArray->Estimators[i] == "soergel") {
224 treeCalculators.push_back(new Soergel());
225 }else if (pDataArray->Estimators[i] == "spearman") {
226 treeCalculators.push_back(new Spearman());
227 }else if (pDataArray->Estimators[i] == "structkulczynski") {
228 treeCalculators.push_back(new StructKulczynski());
229 }else if (pDataArray->Estimators[i] == "speciesprofile") {
230 treeCalculators.push_back(new SpeciesProfile());
231 }else if (pDataArray->Estimators[i] == "hamming") {
232 treeCalculators.push_back(new Hamming());
233 }else if (pDataArray->Estimators[i] == "structchi2") {
234 treeCalculators.push_back(new StructChi2());
235 }else if (pDataArray->Estimators[i] == "gower") {
236 treeCalculators.push_back(new Gower());
237 }else if (pDataArray->Estimators[i] == "memchi2") {
238 treeCalculators.push_back(new MemChi2());
239 }else if (pDataArray->Estimators[i] == "memchord") {
240 treeCalculators.push_back(new MemChord());
241 }else if (pDataArray->Estimators[i] == "memeuclidean") {
242 treeCalculators.push_back(new MemEuclidean());
243 }else if (pDataArray->Estimators[i] == "mempearson") {
244 treeCalculators.push_back(new MemPearson());
249 pDataArray->calcDists.resize(treeCalculators.size());
251 vector<SharedRAbundVector*> subset;
252 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
256 for (int l = 0; l < k; l++) {
258 if (k != l) { //we dont need to similiarity of a groups to itself
259 subset.clear(); //clear out old pair of sharedrabunds
260 //add new pair of sharedrabunds
261 subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]);
263 for(int i=0;i<treeCalculators.size();i++) {
265 //if this calc needs all groups to calculate the pair load all groups
266 if (treeCalculators[i]->getNeedsAll()) {
267 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
268 for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
269 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
273 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
275 if (pDataArray->m->control_pressed) { return 1; }
277 seqDist temp(l, k, -(tempdata[0]-1.0));
278 pDataArray->calcDists[i].push_back(temp);
284 for(int i=0;i<treeCalculators.size();i++){ delete treeCalculators[i]; }
289 catch(exception& e) {
290 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");