1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
8 * Created by Sarah Westcott on 4/8/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 #include "command.hpp"
14 #include "inputdata.h"
16 #include "validcalculator.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sparsematrix.hpp"
23 #include "sharedsobscollectsummary.h"
24 #include "sharedchao1.h"
25 #include "sharedace.h"
26 #include "sharednseqs.h"
27 #include "sharedjabund.h"
28 #include "sharedsorabund.h"
29 #include "sharedjclass.h"
30 #include "sharedsorclass.h"
31 #include "sharedjest.h"
32 #include "sharedsorest.h"
33 #include "sharedthetayc.h"
34 #include "sharedthetan.h"
35 #include "sharedkstest.h"
36 #include "whittaker.h"
37 #include "sharedochiai.h"
38 #include "sharedanderbergs.h"
39 #include "sharedkulczynski.h"
40 #include "sharedkulczynskicody.h"
41 #include "sharedlennon.h"
42 #include "sharedmorisitahorn.h"
43 #include "sharedbraycurtis.h"
44 #include "sharedjackknife.h"
45 #include "whittaker.h"
48 #include "structeuclidean.h"
49 #include "structchord.h"
50 #include "hellinger.h"
51 #include "manhattan.h"
52 #include "structpearson.h"
55 #include "structkulczynski.h"
56 #include "structchi2.h"
57 #include "speciesprofile.h"
62 #include "memeuclidean.h"
63 #include "mempearson.h"
67 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
68 The user can select the lines or labels they wish to use as well as the groups they would like included.
69 They can also use as many or as few calculators as they wish. */
72 typedef list<PCell>::iterator MatData;
74 class TreeGroupCommand : public Command {
77 TreeGroupCommand(string);
81 vector<string> setParameters();
82 string getCommandName() { return "tree.shared"; }
83 string getCommandCategory() { return "OTU-Based Approaches"; }
84 string getOutputFileNameTag(string, string);
85 string getHelpString();
86 string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
87 string getDescription() { return "generate a tree file that describes the dissimilarity among groups"; }
91 void help() { m->mothurOut(getHelpString()); }
99 vector<linePair> lines;
101 Tree* createTree(vector< vector<double> >&);
102 void printSims(ostream&, vector< vector<double> >&);
103 int makeSimsShared();
104 vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
105 int writeTree(string, Tree*);
106 int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
108 NameAssignment* nameMap;
113 vector<Calculator*> treeCalculators;
114 vector<SharedRAbundVector*> lookup;
116 string format, groupNames, filename, sharedfile, inputfile;
117 int numGroups, subsampleSize, iters, processors;
119 float precision, cutoff;
121 bool abort, allLines, subsample;
122 set<string> labels; //holds labels to be used
123 string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
124 vector<string> Estimators, Groups, outputNames; //holds estimators to be used
126 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
127 int process(vector<SharedRAbundVector*>);
133 /**************************************************************************************************/
134 //custom data structure for threads to use.
135 // This is passed by void pointer so it can be any data type
136 // that can be passed using a single void pointer (LPVOID).
137 struct treeSharedData {
138 vector<SharedRAbundVector*> thisLookup;
139 vector< vector<seqDist> > calcDists;
140 vector<string> Estimators;
141 unsigned long long start;
142 unsigned long long end;
146 treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
154 /**************************************************************************************************/
155 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
157 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){
158 treeSharedData* pDataArray;
159 pDataArray = (treeSharedData*)lpParam;
163 vector<Calculator*> treeCalculators;
164 ValidCalculators validCalculator;
165 for (int i=0; i<pDataArray->Estimators.size(); i++) {
166 if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) {
167 if (pDataArray->Estimators[i] == "sharedsobs") {
168 treeCalculators.push_back(new SharedSobsCS());
169 }else if (pDataArray->Estimators[i] == "sharedchao") {
170 treeCalculators.push_back(new SharedChao1());
171 }else if (pDataArray->Estimators[i] == "sharedace") {
172 treeCalculators.push_back(new SharedAce());
173 }else if (pDataArray->Estimators[i] == "jabund") {
174 treeCalculators.push_back(new JAbund());
175 }else if (pDataArray->Estimators[i] == "sorabund") {
176 treeCalculators.push_back(new SorAbund());
177 }else if (pDataArray->Estimators[i] == "jclass") {
178 treeCalculators.push_back(new Jclass());
179 }else if (pDataArray->Estimators[i] == "sorclass") {
180 treeCalculators.push_back(new SorClass());
181 }else if (pDataArray->Estimators[i] == "jest") {
182 treeCalculators.push_back(new Jest());
183 }else if (pDataArray->Estimators[i] == "sorest") {
184 treeCalculators.push_back(new SorEst());
185 }else if (pDataArray->Estimators[i] == "thetayc") {
186 treeCalculators.push_back(new ThetaYC());
187 }else if (pDataArray->Estimators[i] == "thetan") {
188 treeCalculators.push_back(new ThetaN());
189 }else if (pDataArray->Estimators[i] == "kstest") {
190 treeCalculators.push_back(new KSTest());
191 }else if (pDataArray->Estimators[i] == "sharednseqs") {
192 treeCalculators.push_back(new SharedNSeqs());
193 }else if (pDataArray->Estimators[i] == "ochiai") {
194 treeCalculators.push_back(new Ochiai());
195 }else if (pDataArray->Estimators[i] == "anderberg") {
196 treeCalculators.push_back(new Anderberg());
197 }else if (pDataArray->Estimators[i] == "kulczynski") {
198 treeCalculators.push_back(new Kulczynski());
199 }else if (pDataArray->Estimators[i] == "kulczynskicody") {
200 treeCalculators.push_back(new KulczynskiCody());
201 }else if (pDataArray->Estimators[i] == "lennon") {
202 treeCalculators.push_back(new Lennon());
203 }else if (pDataArray->Estimators[i] == "morisitahorn") {
204 treeCalculators.push_back(new MorHorn());
205 }else if (pDataArray->Estimators[i] == "braycurtis") {
206 treeCalculators.push_back(new BrayCurtis());
207 }else if (pDataArray->Estimators[i] == "whittaker") {
208 treeCalculators.push_back(new Whittaker());
209 }else if (pDataArray->Estimators[i] == "odum") {
210 treeCalculators.push_back(new Odum());
211 }else if (pDataArray->Estimators[i] == "canberra") {
212 treeCalculators.push_back(new Canberra());
213 }else if (pDataArray->Estimators[i] == "structeuclidean") {
214 treeCalculators.push_back(new StructEuclidean());
215 }else if (pDataArray->Estimators[i] == "structchord") {
216 treeCalculators.push_back(new StructChord());
217 }else if (pDataArray->Estimators[i] == "hellinger") {
218 treeCalculators.push_back(new Hellinger());
219 }else if (pDataArray->Estimators[i] == "manhattan") {
220 treeCalculators.push_back(new Manhattan());
221 }else if (pDataArray->Estimators[i] == "structpearson") {
222 treeCalculators.push_back(new StructPearson());
223 }else if (pDataArray->Estimators[i] == "soergel") {
224 treeCalculators.push_back(new Soergel());
225 }else if (pDataArray->Estimators[i] == "spearman") {
226 treeCalculators.push_back(new Spearman());
227 }else if (pDataArray->Estimators[i] == "structkulczynski") {
228 treeCalculators.push_back(new StructKulczynski());
229 }else if (pDataArray->Estimators[i] == "speciesprofile") {
230 treeCalculators.push_back(new SpeciesProfile());
231 }else if (pDataArray->Estimators[i] == "hamming") {
232 treeCalculators.push_back(new Hamming());
233 }else if (pDataArray->Estimators[i] == "structchi2") {
234 treeCalculators.push_back(new StructChi2());
235 }else if (pDataArray->Estimators[i] == "gower") {
236 treeCalculators.push_back(new Gower());
237 }else if (pDataArray->Estimators[i] == "memchi2") {
238 treeCalculators.push_back(new MemChi2());
239 }else if (pDataArray->Estimators[i] == "memchord") {
240 treeCalculators.push_back(new MemChord());
241 }else if (pDataArray->Estimators[i] == "memeuclidean") {
242 treeCalculators.push_back(new MemEuclidean());
243 }else if (pDataArray->Estimators[i] == "mempearson") {
244 treeCalculators.push_back(new MemPearson());
249 pDataArray->calcDists.resize(treeCalculators.size());
251 vector<SharedRAbundVector*> subset;
252 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
254 for (int l = 0; l < k; l++) {
256 if (k != l) { //we dont need to similiarity of a groups to itself
257 subset.clear(); //clear out old pair of sharedrabunds
258 //add new pair of sharedrabunds
259 subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]);
261 for(int i=0;i<treeCalculators.size();i++) {
263 //if this calc needs all groups to calculate the pair load all groups
264 if (treeCalculators[i]->getNeedsAll()) {
265 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
266 for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
267 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
271 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
273 if (pDataArray->m->control_pressed) { return 1; }
275 seqDist temp(l, k, -(tempdata[0]-1.0));
276 pDataArray->calcDists[i].push_back(temp);
282 for(int i=0;i<treeCalculators.size();i++){ delete treeCalculators[i]; }
287 catch(exception& e) {
288 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");