1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
8 * Created by Sarah Westcott on 4/8/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 #include "command.hpp"
14 #include "inputdata.h"
16 #include "validcalculator.h"
18 #include "counttable.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sharedsobscollectsummary.h"
23 #include "sharedchao1.h"
24 #include "sharedace.h"
25 #include "sharednseqs.h"
26 #include "sharedjabund.h"
27 #include "sharedsorabund.h"
28 #include "sharedjclass.h"
29 #include "sharedsorclass.h"
30 #include "sharedjest.h"
31 #include "sharedsorest.h"
32 #include "sharedthetayc.h"
33 #include "sharedthetan.h"
34 #include "sharedkstest.h"
35 #include "whittaker.h"
36 #include "sharedochiai.h"
37 #include "sharedanderbergs.h"
38 #include "sharedkulczynski.h"
39 #include "sharedkulczynskicody.h"
40 #include "sharedlennon.h"
41 #include "sharedmorisitahorn.h"
42 #include "sharedbraycurtis.h"
43 #include "sharedjackknife.h"
44 #include "whittaker.h"
47 #include "structeuclidean.h"
48 #include "structchord.h"
49 #include "hellinger.h"
50 #include "manhattan.h"
51 #include "structpearson.h"
54 #include "structkulczynski.h"
55 #include "structchi2.h"
56 #include "speciesprofile.h"
61 #include "memeuclidean.h"
62 #include "mempearson.h"
63 #include "sharedrjsd.h"
64 #include "sharedjsd.h"
68 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
69 The user can select the lines or labels they wish to use as well as the groups they would like included.
70 They can also use as many or as few calculators as they wish. */
73 class TreeGroupCommand : public Command {
76 TreeGroupCommand(string);
80 vector<string> setParameters();
81 string getCommandName() { return "tree.shared"; }
82 string getCommandCategory() { return "OTU-Based Approaches"; }
84 string getHelpString();
85 string getOutputPattern(string);
86 string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
87 string getDescription() { return "generate a tree file that describes the dissimilarity among groups"; }
91 void help() { m->mothurOut(getHelpString()); }
99 vector<linePair> lines;
101 Tree* createTree(vector< vector<double> >&);
102 void printSims(ostream&, vector< vector<double> >&);
103 int makeSimsShared();
104 vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
105 int writeTree(string, Tree*);
106 int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
108 NameAssignment* nameMap;
113 vector<Calculator*> treeCalculators;
114 vector<SharedRAbundVector*> lookup;
116 string format, groupNames, filename, sharedfile, countfile, inputfile;
117 int numGroups, subsampleSize, iters, processors;
119 float precision, cutoff;
121 bool abort, allLines, subsample;
122 set<string> labels; //holds labels to be used
123 string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
124 vector<string> Estimators, Groups, outputNames; //holds estimators to be used
126 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
127 int process(vector<SharedRAbundVector*>);
133 /**************************************************************************************************/
134 //custom data structure for threads to use.
135 // This is passed by void pointer so it can be any data type
136 // that can be passed using a single void pointer (LPVOID).
137 struct treeSharedData {
138 vector<SharedRAbundVector*> thisLookup;
139 vector< vector<seqDist> > calcDists;
140 vector<string> Estimators;
141 unsigned long long start;
142 unsigned long long end;
147 treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
156 /**************************************************************************************************/
157 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
159 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){
160 treeSharedData* pDataArray;
161 pDataArray = (treeSharedData*)lpParam;
165 vector<Calculator*> treeCalculators;
166 ValidCalculators validCalculator;
167 for (int i=0; i<pDataArray->Estimators.size(); i++) {
168 if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) {
169 if (pDataArray->Estimators[i] == "sharedsobs") {
170 treeCalculators.push_back(new SharedSobsCS());
171 }else if (pDataArray->Estimators[i] == "sharedchao") {
172 treeCalculators.push_back(new SharedChao1());
173 }else if (pDataArray->Estimators[i] == "sharedace") {
174 treeCalculators.push_back(new SharedAce());
175 }else if (pDataArray->Estimators[i] == "jabund") {
176 treeCalculators.push_back(new JAbund());
177 }else if (pDataArray->Estimators[i] == "sorabund") {
178 treeCalculators.push_back(new SorAbund());
179 }else if (pDataArray->Estimators[i] == "jclass") {
180 treeCalculators.push_back(new Jclass());
181 }else if (pDataArray->Estimators[i] == "sorclass") {
182 treeCalculators.push_back(new SorClass());
183 }else if (pDataArray->Estimators[i] == "jest") {
184 treeCalculators.push_back(new Jest());
185 }else if (pDataArray->Estimators[i] == "sorest") {
186 treeCalculators.push_back(new SorEst());
187 }else if (pDataArray->Estimators[i] == "thetayc") {
188 treeCalculators.push_back(new ThetaYC());
189 }else if (pDataArray->Estimators[i] == "thetan") {
190 treeCalculators.push_back(new ThetaN());
191 }else if (pDataArray->Estimators[i] == "kstest") {
192 treeCalculators.push_back(new KSTest());
193 }else if (pDataArray->Estimators[i] == "sharednseqs") {
194 treeCalculators.push_back(new SharedNSeqs());
195 }else if (pDataArray->Estimators[i] == "ochiai") {
196 treeCalculators.push_back(new Ochiai());
197 }else if (pDataArray->Estimators[i] == "anderberg") {
198 treeCalculators.push_back(new Anderberg());
199 }else if (pDataArray->Estimators[i] == "kulczynski") {
200 treeCalculators.push_back(new Kulczynski());
201 }else if (pDataArray->Estimators[i] == "kulczynskicody") {
202 treeCalculators.push_back(new KulczynskiCody());
203 }else if (pDataArray->Estimators[i] == "lennon") {
204 treeCalculators.push_back(new Lennon());
205 }else if (pDataArray->Estimators[i] == "morisitahorn") {
206 treeCalculators.push_back(new MorHorn());
207 }else if (pDataArray->Estimators[i] == "braycurtis") {
208 treeCalculators.push_back(new BrayCurtis());
209 }else if (pDataArray->Estimators[i] == "whittaker") {
210 treeCalculators.push_back(new Whittaker());
211 }else if (pDataArray->Estimators[i] == "odum") {
212 treeCalculators.push_back(new Odum());
213 }else if (pDataArray->Estimators[i] == "canberra") {
214 treeCalculators.push_back(new Canberra());
215 }else if (pDataArray->Estimators[i] == "structeuclidean") {
216 treeCalculators.push_back(new StructEuclidean());
217 }else if (pDataArray->Estimators[i] == "structchord") {
218 treeCalculators.push_back(new StructChord());
219 }else if (pDataArray->Estimators[i] == "hellinger") {
220 treeCalculators.push_back(new Hellinger());
221 }else if (pDataArray->Estimators[i] == "manhattan") {
222 treeCalculators.push_back(new Manhattan());
223 }else if (pDataArray->Estimators[i] == "structpearson") {
224 treeCalculators.push_back(new StructPearson());
225 }else if (pDataArray->Estimators[i] == "soergel") {
226 treeCalculators.push_back(new Soergel());
227 }else if (pDataArray->Estimators[i] == "spearman") {
228 treeCalculators.push_back(new Spearman());
229 }else if (pDataArray->Estimators[i] == "structkulczynski") {
230 treeCalculators.push_back(new StructKulczynski());
231 }else if (pDataArray->Estimators[i] == "speciesprofile") {
232 treeCalculators.push_back(new SpeciesProfile());
233 }else if (pDataArray->Estimators[i] == "hamming") {
234 treeCalculators.push_back(new Hamming());
235 }else if (pDataArray->Estimators[i] == "structchi2") {
236 treeCalculators.push_back(new StructChi2());
237 }else if (pDataArray->Estimators[i] == "gower") {
238 treeCalculators.push_back(new Gower());
239 }else if (pDataArray->Estimators[i] == "memchi2") {
240 treeCalculators.push_back(new MemChi2());
241 }else if (pDataArray->Estimators[i] == "memchord") {
242 treeCalculators.push_back(new MemChord());
243 }else if (pDataArray->Estimators[i] == "memeuclidean") {
244 treeCalculators.push_back(new MemEuclidean());
245 }else if (pDataArray->Estimators[i] == "mempearson") {
246 treeCalculators.push_back(new MemPearson());
251 pDataArray->calcDists.resize(treeCalculators.size());
253 vector<SharedRAbundVector*> subset;
254 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
258 for (int l = 0; l < k; l++) {
260 if (k != l) { //we dont need to similiarity of a groups to itself
261 subset.clear(); //clear out old pair of sharedrabunds
262 //add new pair of sharedrabunds
263 subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]);
265 for(int i=0;i<treeCalculators.size();i++) {
267 //if this calc needs all groups to calculate the pair load all groups
268 if (treeCalculators[i]->getNeedsAll()) {
269 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
270 for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
271 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
275 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
277 if (pDataArray->m->control_pressed) { return 1; }
279 seqDist temp(l, k, -(tempdata[0]-1.0));
280 pDataArray->calcDists[i].push_back(temp);
286 for(int i=0;i<treeCalculators.size();i++){ delete treeCalculators[i]; }
291 catch(exception& e) {
292 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");