1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
8 * Created by Sarah Westcott on 4/8/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 #include "command.hpp"
14 #include "inputdata.h"
16 #include "validcalculator.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sparsematrix.hpp"
23 #include "sharedsobscollectsummary.h"
24 #include "sharedchao1.h"
25 #include "sharedace.h"
26 #include "sharednseqs.h"
27 #include "sharedjabund.h"
28 #include "sharedsorabund.h"
29 #include "sharedjclass.h"
30 #include "sharedsorclass.h"
31 #include "sharedjest.h"
32 #include "sharedsorest.h"
33 #include "sharedthetayc.h"
34 #include "sharedthetan.h"
35 #include "sharedkstest.h"
36 #include "whittaker.h"
37 #include "sharedochiai.h"
38 #include "sharedanderbergs.h"
39 #include "sharedkulczynski.h"
40 #include "sharedkulczynskicody.h"
41 #include "sharedlennon.h"
42 #include "sharedmorisitahorn.h"
43 #include "sharedbraycurtis.h"
44 #include "sharedjackknife.h"
45 #include "whittaker.h"
48 #include "structeuclidean.h"
49 #include "structchord.h"
50 #include "hellinger.h"
51 #include "manhattan.h"
52 #include "structpearson.h"
55 #include "structkulczynski.h"
56 #include "structchi2.h"
57 #include "speciesprofile.h"
62 #include "memeuclidean.h"
63 #include "mempearson.h"
67 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
68 The user can select the lines or labels they wish to use as well as the groups they would like included.
69 They can also use as many or as few calculators as they wish. */
72 typedef list<PCell>::iterator MatData;
74 class TreeGroupCommand : public Command {
77 TreeGroupCommand(string);
81 vector<string> setParameters();
82 string getCommandName() { return "tree.shared"; }
83 string getCommandCategory() { return "OTU-Based Approaches"; }
84 string getHelpString();
85 string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
86 string getDescription() { return "generate a tree file that describes the dissimilarity among groups"; }
90 void help() { m->mothurOut(getHelpString()); }
98 vector<linePair> lines;
100 Tree* createTree(vector< vector<double> >&);
101 void printSims(ostream&, vector< vector<double> >&);
102 int makeSimsShared();
103 vector< vector<double> > makeSimsDist();
104 int writeTree(string, Tree*);
105 int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
107 ReadMatrix* readMatrix;
108 SparseMatrix* matrix;
109 NameAssignment* nameMap;
114 vector<Calculator*> treeCalculators;
115 vector<SharedRAbundVector*> lookup;
117 string format, groupNames, filename, sharedfile, inputfile;
118 int numGroups, subsampleSize, iters, processors;
120 float precision, cutoff;
122 bool abort, allLines, subsample;
123 set<string> labels; //holds labels to be used
124 string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
125 vector<string> Estimators, Groups, outputNames; //holds estimators to be used
127 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
128 int process(vector<SharedRAbundVector*>);
134 /**************************************************************************************************/
135 //custom data structure for threads to use.
136 // This is passed by void pointer so it can be any data type
137 // that can be passed using a single void pointer (LPVOID).
138 struct treeSharedData {
139 vector<SharedRAbundVector*> thisLookup;
140 vector< vector<seqDist> > calcDists;
141 vector<string> Estimators;
142 unsigned long long start;
143 unsigned long long end;
147 treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
155 /**************************************************************************************************/
156 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
158 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){
159 treeSharedData* pDataArray;
160 pDataArray = (treeSharedData*)lpParam;
164 vector<Calculator*> treeCalculators;
165 ValidCalculators validCalculator;
166 for (int i=0; i<pDataArray->Estimators.size(); i++) {
167 if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) {
168 if (pDataArray->Estimators[i] == "sharedsobs") {
169 treeCalculators.push_back(new SharedSobsCS());
170 }else if (pDataArray->Estimators[i] == "sharedchao") {
171 treeCalculators.push_back(new SharedChao1());
172 }else if (pDataArray->Estimators[i] == "sharedace") {
173 treeCalculators.push_back(new SharedAce());
174 }else if (pDataArray->Estimators[i] == "jabund") {
175 treeCalculators.push_back(new JAbund());
176 }else if (pDataArray->Estimators[i] == "sorabund") {
177 treeCalculators.push_back(new SorAbund());
178 }else if (pDataArray->Estimators[i] == "jclass") {
179 treeCalculators.push_back(new Jclass());
180 }else if (pDataArray->Estimators[i] == "sorclass") {
181 treeCalculators.push_back(new SorClass());
182 }else if (pDataArray->Estimators[i] == "jest") {
183 treeCalculators.push_back(new Jest());
184 }else if (pDataArray->Estimators[i] == "sorest") {
185 treeCalculators.push_back(new SorEst());
186 }else if (pDataArray->Estimators[i] == "thetayc") {
187 treeCalculators.push_back(new ThetaYC());
188 }else if (pDataArray->Estimators[i] == "thetan") {
189 treeCalculators.push_back(new ThetaN());
190 }else if (pDataArray->Estimators[i] == "kstest") {
191 treeCalculators.push_back(new KSTest());
192 }else if (pDataArray->Estimators[i] == "sharednseqs") {
193 treeCalculators.push_back(new SharedNSeqs());
194 }else if (pDataArray->Estimators[i] == "ochiai") {
195 treeCalculators.push_back(new Ochiai());
196 }else if (pDataArray->Estimators[i] == "anderberg") {
197 treeCalculators.push_back(new Anderberg());
198 }else if (pDataArray->Estimators[i] == "kulczynski") {
199 treeCalculators.push_back(new Kulczynski());
200 }else if (pDataArray->Estimators[i] == "kulczynskicody") {
201 treeCalculators.push_back(new KulczynskiCody());
202 }else if (pDataArray->Estimators[i] == "lennon") {
203 treeCalculators.push_back(new Lennon());
204 }else if (pDataArray->Estimators[i] == "morisitahorn") {
205 treeCalculators.push_back(new MorHorn());
206 }else if (pDataArray->Estimators[i] == "braycurtis") {
207 treeCalculators.push_back(new BrayCurtis());
208 }else if (pDataArray->Estimators[i] == "whittaker") {
209 treeCalculators.push_back(new Whittaker());
210 }else if (pDataArray->Estimators[i] == "odum") {
211 treeCalculators.push_back(new Odum());
212 }else if (pDataArray->Estimators[i] == "canberra") {
213 treeCalculators.push_back(new Canberra());
214 }else if (pDataArray->Estimators[i] == "structeuclidean") {
215 treeCalculators.push_back(new StructEuclidean());
216 }else if (pDataArray->Estimators[i] == "structchord") {
217 treeCalculators.push_back(new StructChord());
218 }else if (pDataArray->Estimators[i] == "hellinger") {
219 treeCalculators.push_back(new Hellinger());
220 }else if (pDataArray->Estimators[i] == "manhattan") {
221 treeCalculators.push_back(new Manhattan());
222 }else if (pDataArray->Estimators[i] == "structpearson") {
223 treeCalculators.push_back(new StructPearson());
224 }else if (pDataArray->Estimators[i] == "soergel") {
225 treeCalculators.push_back(new Soergel());
226 }else if (pDataArray->Estimators[i] == "spearman") {
227 treeCalculators.push_back(new Spearman());
228 }else if (pDataArray->Estimators[i] == "structkulczynski") {
229 treeCalculators.push_back(new StructKulczynski());
230 }else if (pDataArray->Estimators[i] == "speciesprofile") {
231 treeCalculators.push_back(new SpeciesProfile());
232 }else if (pDataArray->Estimators[i] == "hamming") {
233 treeCalculators.push_back(new Hamming());
234 }else if (pDataArray->Estimators[i] == "structchi2") {
235 treeCalculators.push_back(new StructChi2());
236 }else if (pDataArray->Estimators[i] == "gower") {
237 treeCalculators.push_back(new Gower());
238 }else if (pDataArray->Estimators[i] == "memchi2") {
239 treeCalculators.push_back(new MemChi2());
240 }else if (pDataArray->Estimators[i] == "memchord") {
241 treeCalculators.push_back(new MemChord());
242 }else if (pDataArray->Estimators[i] == "memeuclidean") {
243 treeCalculators.push_back(new MemEuclidean());
244 }else if (pDataArray->Estimators[i] == "mempearson") {
245 treeCalculators.push_back(new MemPearson());
250 pDataArray->calcDists.resize(treeCalculators.size());
252 vector<SharedRAbundVector*> subset;
253 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
255 for (int l = 0; l < k; l++) {
257 if (k != l) { //we dont need to similiarity of a groups to itself
258 subset.clear(); //clear out old pair of sharedrabunds
259 //add new pair of sharedrabunds
260 subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]);
262 for(int i=0;i<treeCalculators.size();i++) {
264 //if this calc needs all groups to calculate the pair load all groups
265 if (treeCalculators[i]->getNeedsAll()) {
266 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
267 for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
268 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
272 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
274 if (pDataArray->m->control_pressed) { return 1; }
276 seqDist temp(l, k, -(tempdata[0]-1.0));
277 pDataArray->calcDists[i].push_back(temp);
283 for(int i=0;i<treeCalculators.size();i++){ delete treeCalculators[i]; }
288 catch(exception& e) {
289 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");