1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
8 * Created by Sarah Westcott on 4/8/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 #include "command.hpp"
14 #include "inputdata.h"
16 #include "validcalculator.h"
18 #include "counttable.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sharedsobscollectsummary.h"
23 #include "sharedchao1.h"
24 #include "sharedace.h"
25 #include "sharednseqs.h"
26 #include "sharedjabund.h"
27 #include "sharedsorabund.h"
28 #include "sharedjclass.h"
29 #include "sharedsorclass.h"
30 #include "sharedjest.h"
31 #include "sharedsorest.h"
32 #include "sharedthetayc.h"
33 #include "sharedthetan.h"
34 #include "sharedkstest.h"
35 #include "whittaker.h"
36 #include "sharedochiai.h"
37 #include "sharedanderbergs.h"
38 #include "sharedkulczynski.h"
39 #include "sharedkulczynskicody.h"
40 #include "sharedlennon.h"
41 #include "sharedmorisitahorn.h"
42 #include "sharedbraycurtis.h"
43 #include "sharedjackknife.h"
44 #include "whittaker.h"
47 #include "structeuclidean.h"
48 #include "structchord.h"
49 #include "hellinger.h"
50 #include "manhattan.h"
51 #include "structpearson.h"
54 #include "structkulczynski.h"
55 #include "structchi2.h"
56 #include "speciesprofile.h"
61 #include "memeuclidean.h"
62 #include "mempearson.h"
66 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
67 The user can select the lines or labels they wish to use as well as the groups they would like included.
68 They can also use as many or as few calculators as they wish. */
71 class TreeGroupCommand : public Command {
74 TreeGroupCommand(string);
78 vector<string> setParameters();
79 string getCommandName() { return "tree.shared"; }
80 string getCommandCategory() { return "OTU-Based Approaches"; }
81 string getOutputFileNameTag(string, string);
82 string getHelpString();
83 string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
84 string getDescription() { return "generate a tree file that describes the dissimilarity among groups"; }
88 void help() { m->mothurOut(getHelpString()); }
96 vector<linePair> lines;
98 Tree* createTree(vector< vector<double> >&);
99 void printSims(ostream&, vector< vector<double> >&);
100 int makeSimsShared();
101 vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
102 int writeTree(string, Tree*);
103 int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
105 NameAssignment* nameMap;
110 vector<Calculator*> treeCalculators;
111 vector<SharedRAbundVector*> lookup;
113 string format, groupNames, filename, sharedfile, countfile, inputfile;
114 int numGroups, subsampleSize, iters, processors;
116 float precision, cutoff;
118 bool abort, allLines, subsample;
119 set<string> labels; //holds labels to be used
120 string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
121 vector<string> Estimators, Groups, outputNames; //holds estimators to be used
123 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
124 int process(vector<SharedRAbundVector*>);
130 /**************************************************************************************************/
131 //custom data structure for threads to use.
132 // This is passed by void pointer so it can be any data type
133 // that can be passed using a single void pointer (LPVOID).
134 struct treeSharedData {
135 vector<SharedRAbundVector*> thisLookup;
136 vector< vector<seqDist> > calcDists;
137 vector<string> Estimators;
138 unsigned long long start;
139 unsigned long long end;
143 treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
151 /**************************************************************************************************/
152 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
154 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){
155 treeSharedData* pDataArray;
156 pDataArray = (treeSharedData*)lpParam;
160 vector<Calculator*> treeCalculators;
161 ValidCalculators validCalculator;
162 for (int i=0; i<pDataArray->Estimators.size(); i++) {
163 if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) {
164 if (pDataArray->Estimators[i] == "sharedsobs") {
165 treeCalculators.push_back(new SharedSobsCS());
166 }else if (pDataArray->Estimators[i] == "sharedchao") {
167 treeCalculators.push_back(new SharedChao1());
168 }else if (pDataArray->Estimators[i] == "sharedace") {
169 treeCalculators.push_back(new SharedAce());
170 }else if (pDataArray->Estimators[i] == "jabund") {
171 treeCalculators.push_back(new JAbund());
172 }else if (pDataArray->Estimators[i] == "sorabund") {
173 treeCalculators.push_back(new SorAbund());
174 }else if (pDataArray->Estimators[i] == "jclass") {
175 treeCalculators.push_back(new Jclass());
176 }else if (pDataArray->Estimators[i] == "sorclass") {
177 treeCalculators.push_back(new SorClass());
178 }else if (pDataArray->Estimators[i] == "jest") {
179 treeCalculators.push_back(new Jest());
180 }else if (pDataArray->Estimators[i] == "sorest") {
181 treeCalculators.push_back(new SorEst());
182 }else if (pDataArray->Estimators[i] == "thetayc") {
183 treeCalculators.push_back(new ThetaYC());
184 }else if (pDataArray->Estimators[i] == "thetan") {
185 treeCalculators.push_back(new ThetaN());
186 }else if (pDataArray->Estimators[i] == "kstest") {
187 treeCalculators.push_back(new KSTest());
188 }else if (pDataArray->Estimators[i] == "sharednseqs") {
189 treeCalculators.push_back(new SharedNSeqs());
190 }else if (pDataArray->Estimators[i] == "ochiai") {
191 treeCalculators.push_back(new Ochiai());
192 }else if (pDataArray->Estimators[i] == "anderberg") {
193 treeCalculators.push_back(new Anderberg());
194 }else if (pDataArray->Estimators[i] == "kulczynski") {
195 treeCalculators.push_back(new Kulczynski());
196 }else if (pDataArray->Estimators[i] == "kulczynskicody") {
197 treeCalculators.push_back(new KulczynskiCody());
198 }else if (pDataArray->Estimators[i] == "lennon") {
199 treeCalculators.push_back(new Lennon());
200 }else if (pDataArray->Estimators[i] == "morisitahorn") {
201 treeCalculators.push_back(new MorHorn());
202 }else if (pDataArray->Estimators[i] == "braycurtis") {
203 treeCalculators.push_back(new BrayCurtis());
204 }else if (pDataArray->Estimators[i] == "whittaker") {
205 treeCalculators.push_back(new Whittaker());
206 }else if (pDataArray->Estimators[i] == "odum") {
207 treeCalculators.push_back(new Odum());
208 }else if (pDataArray->Estimators[i] == "canberra") {
209 treeCalculators.push_back(new Canberra());
210 }else if (pDataArray->Estimators[i] == "structeuclidean") {
211 treeCalculators.push_back(new StructEuclidean());
212 }else if (pDataArray->Estimators[i] == "structchord") {
213 treeCalculators.push_back(new StructChord());
214 }else if (pDataArray->Estimators[i] == "hellinger") {
215 treeCalculators.push_back(new Hellinger());
216 }else if (pDataArray->Estimators[i] == "manhattan") {
217 treeCalculators.push_back(new Manhattan());
218 }else if (pDataArray->Estimators[i] == "structpearson") {
219 treeCalculators.push_back(new StructPearson());
220 }else if (pDataArray->Estimators[i] == "soergel") {
221 treeCalculators.push_back(new Soergel());
222 }else if (pDataArray->Estimators[i] == "spearman") {
223 treeCalculators.push_back(new Spearman());
224 }else if (pDataArray->Estimators[i] == "structkulczynski") {
225 treeCalculators.push_back(new StructKulczynski());
226 }else if (pDataArray->Estimators[i] == "speciesprofile") {
227 treeCalculators.push_back(new SpeciesProfile());
228 }else if (pDataArray->Estimators[i] == "hamming") {
229 treeCalculators.push_back(new Hamming());
230 }else if (pDataArray->Estimators[i] == "structchi2") {
231 treeCalculators.push_back(new StructChi2());
232 }else if (pDataArray->Estimators[i] == "gower") {
233 treeCalculators.push_back(new Gower());
234 }else if (pDataArray->Estimators[i] == "memchi2") {
235 treeCalculators.push_back(new MemChi2());
236 }else if (pDataArray->Estimators[i] == "memchord") {
237 treeCalculators.push_back(new MemChord());
238 }else if (pDataArray->Estimators[i] == "memeuclidean") {
239 treeCalculators.push_back(new MemEuclidean());
240 }else if (pDataArray->Estimators[i] == "mempearson") {
241 treeCalculators.push_back(new MemPearson());
246 pDataArray->calcDists.resize(treeCalculators.size());
248 vector<SharedRAbundVector*> subset;
249 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
251 for (int l = 0; l < k; l++) {
253 if (k != l) { //we dont need to similiarity of a groups to itself
254 subset.clear(); //clear out old pair of sharedrabunds
255 //add new pair of sharedrabunds
256 subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]);
258 for(int i=0;i<treeCalculators.size();i++) {
260 //if this calc needs all groups to calculate the pair load all groups
261 if (treeCalculators[i]->getNeedsAll()) {
262 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
263 for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
264 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
268 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
270 if (pDataArray->m->control_pressed) { return 1; }
272 seqDist temp(l, k, -(tempdata[0]-1.0));
273 pDataArray->calcDists[i].push_back(temp);
279 for(int i=0;i<treeCalculators.size();i++){ delete treeCalculators[i]; }
284 catch(exception& e) {
285 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");