1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
8 * Created by Sarah Westcott on 4/8/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 #include "command.hpp"
14 #include "inputdata.h"
16 #include "validcalculator.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sparsematrix.hpp"
23 #include "sharedsobscollectsummary.h"
24 #include "sharedchao1.h"
25 #include "sharedace.h"
26 #include "sharednseqs.h"
27 #include "sharedjabund.h"
28 #include "sharedsorabund.h"
29 #include "sharedjclass.h"
30 #include "sharedsorclass.h"
31 #include "sharedjest.h"
32 #include "sharedsorest.h"
33 #include "sharedthetayc.h"
34 #include "sharedthetan.h"
35 #include "sharedkstest.h"
36 #include "whittaker.h"
37 #include "sharedochiai.h"
38 #include "sharedanderbergs.h"
39 #include "sharedkulczynski.h"
40 #include "sharedkulczynskicody.h"
41 #include "sharedlennon.h"
42 #include "sharedmorisitahorn.h"
43 #include "sharedbraycurtis.h"
44 #include "sharedjackknife.h"
45 #include "whittaker.h"
48 #include "structeuclidean.h"
49 #include "structchord.h"
50 #include "hellinger.h"
51 #include "manhattan.h"
52 #include "structpearson.h"
55 #include "structkulczynski.h"
56 #include "structchi2.h"
57 #include "speciesprofile.h"
62 #include "memeuclidean.h"
63 #include "mempearson.h"
67 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
68 The user can select the lines or labels they wish to use as well as the groups they would like included.
69 They can also use as many or as few calculators as they wish. */
72 typedef list<PCell>::iterator MatData;
74 class TreeGroupCommand : public Command {
77 TreeGroupCommand(string);
81 vector<string> setParameters();
82 string getCommandName() { return "tree.shared"; }
83 string getCommandCategory() { return "OTU-Based Approaches"; }
84 string getOutputFileNameTag(string, string);
85 string getHelpString();
86 string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
87 string getDescription() { return "generate a tree file that describes the dissimilarity among groups"; }
91 void help() { m->mothurOut(getHelpString()); }
99 vector<linePair> lines;
101 Tree* createTree(vector< vector<double> >&);
102 void printSims(ostream&, vector< vector<double> >&);
103 int makeSimsShared();
104 vector< vector<double> > makeSimsDist();
105 int writeTree(string, Tree*);
106 int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
108 ReadMatrix* readMatrix;
109 SparseMatrix* matrix;
110 NameAssignment* nameMap;
115 vector<Calculator*> treeCalculators;
116 vector<SharedRAbundVector*> lookup;
118 string format, groupNames, filename, sharedfile, inputfile;
119 int numGroups, subsampleSize, iters, processors;
121 float precision, cutoff;
123 bool abort, allLines, subsample;
124 set<string> labels; //holds labels to be used
125 string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
126 vector<string> Estimators, Groups, outputNames; //holds estimators to be used
128 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
129 int process(vector<SharedRAbundVector*>);
135 /**************************************************************************************************/
136 //custom data structure for threads to use.
137 // This is passed by void pointer so it can be any data type
138 // that can be passed using a single void pointer (LPVOID).
139 struct treeSharedData {
140 vector<SharedRAbundVector*> thisLookup;
141 vector< vector<seqDist> > calcDists;
142 vector<string> Estimators;
143 unsigned long long start;
144 unsigned long long end;
148 treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
156 /**************************************************************************************************/
157 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
159 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){
160 treeSharedData* pDataArray;
161 pDataArray = (treeSharedData*)lpParam;
165 vector<Calculator*> treeCalculators;
166 ValidCalculators validCalculator;
167 for (int i=0; i<pDataArray->Estimators.size(); i++) {
168 if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) {
169 if (pDataArray->Estimators[i] == "sharedsobs") {
170 treeCalculators.push_back(new SharedSobsCS());
171 }else if (pDataArray->Estimators[i] == "sharedchao") {
172 treeCalculators.push_back(new SharedChao1());
173 }else if (pDataArray->Estimators[i] == "sharedace") {
174 treeCalculators.push_back(new SharedAce());
175 }else if (pDataArray->Estimators[i] == "jabund") {
176 treeCalculators.push_back(new JAbund());
177 }else if (pDataArray->Estimators[i] == "sorabund") {
178 treeCalculators.push_back(new SorAbund());
179 }else if (pDataArray->Estimators[i] == "jclass") {
180 treeCalculators.push_back(new Jclass());
181 }else if (pDataArray->Estimators[i] == "sorclass") {
182 treeCalculators.push_back(new SorClass());
183 }else if (pDataArray->Estimators[i] == "jest") {
184 treeCalculators.push_back(new Jest());
185 }else if (pDataArray->Estimators[i] == "sorest") {
186 treeCalculators.push_back(new SorEst());
187 }else if (pDataArray->Estimators[i] == "thetayc") {
188 treeCalculators.push_back(new ThetaYC());
189 }else if (pDataArray->Estimators[i] == "thetan") {
190 treeCalculators.push_back(new ThetaN());
191 }else if (pDataArray->Estimators[i] == "kstest") {
192 treeCalculators.push_back(new KSTest());
193 }else if (pDataArray->Estimators[i] == "sharednseqs") {
194 treeCalculators.push_back(new SharedNSeqs());
195 }else if (pDataArray->Estimators[i] == "ochiai") {
196 treeCalculators.push_back(new Ochiai());
197 }else if (pDataArray->Estimators[i] == "anderberg") {
198 treeCalculators.push_back(new Anderberg());
199 }else if (pDataArray->Estimators[i] == "kulczynski") {
200 treeCalculators.push_back(new Kulczynski());
201 }else if (pDataArray->Estimators[i] == "kulczynskicody") {
202 treeCalculators.push_back(new KulczynskiCody());
203 }else if (pDataArray->Estimators[i] == "lennon") {
204 treeCalculators.push_back(new Lennon());
205 }else if (pDataArray->Estimators[i] == "morisitahorn") {
206 treeCalculators.push_back(new MorHorn());
207 }else if (pDataArray->Estimators[i] == "braycurtis") {
208 treeCalculators.push_back(new BrayCurtis());
209 }else if (pDataArray->Estimators[i] == "whittaker") {
210 treeCalculators.push_back(new Whittaker());
211 }else if (pDataArray->Estimators[i] == "odum") {
212 treeCalculators.push_back(new Odum());
213 }else if (pDataArray->Estimators[i] == "canberra") {
214 treeCalculators.push_back(new Canberra());
215 }else if (pDataArray->Estimators[i] == "structeuclidean") {
216 treeCalculators.push_back(new StructEuclidean());
217 }else if (pDataArray->Estimators[i] == "structchord") {
218 treeCalculators.push_back(new StructChord());
219 }else if (pDataArray->Estimators[i] == "hellinger") {
220 treeCalculators.push_back(new Hellinger());
221 }else if (pDataArray->Estimators[i] == "manhattan") {
222 treeCalculators.push_back(new Manhattan());
223 }else if (pDataArray->Estimators[i] == "structpearson") {
224 treeCalculators.push_back(new StructPearson());
225 }else if (pDataArray->Estimators[i] == "soergel") {
226 treeCalculators.push_back(new Soergel());
227 }else if (pDataArray->Estimators[i] == "spearman") {
228 treeCalculators.push_back(new Spearman());
229 }else if (pDataArray->Estimators[i] == "structkulczynski") {
230 treeCalculators.push_back(new StructKulczynski());
231 }else if (pDataArray->Estimators[i] == "speciesprofile") {
232 treeCalculators.push_back(new SpeciesProfile());
233 }else if (pDataArray->Estimators[i] == "hamming") {
234 treeCalculators.push_back(new Hamming());
235 }else if (pDataArray->Estimators[i] == "structchi2") {
236 treeCalculators.push_back(new StructChi2());
237 }else if (pDataArray->Estimators[i] == "gower") {
238 treeCalculators.push_back(new Gower());
239 }else if (pDataArray->Estimators[i] == "memchi2") {
240 treeCalculators.push_back(new MemChi2());
241 }else if (pDataArray->Estimators[i] == "memchord") {
242 treeCalculators.push_back(new MemChord());
243 }else if (pDataArray->Estimators[i] == "memeuclidean") {
244 treeCalculators.push_back(new MemEuclidean());
245 }else if (pDataArray->Estimators[i] == "mempearson") {
246 treeCalculators.push_back(new MemPearson());
251 pDataArray->calcDists.resize(treeCalculators.size());
253 vector<SharedRAbundVector*> subset;
254 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
256 for (int l = 0; l < k; l++) {
258 if (k != l) { //we dont need to similiarity of a groups to itself
259 subset.clear(); //clear out old pair of sharedrabunds
260 //add new pair of sharedrabunds
261 subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]);
263 for(int i=0;i<treeCalculators.size();i++) {
265 //if this calc needs all groups to calculate the pair load all groups
266 if (treeCalculators[i]->getNeedsAll()) {
267 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
268 for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
269 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
273 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
275 if (pDataArray->m->control_pressed) { return 1; }
277 seqDist temp(l, k, -(tempdata[0]-1.0));
278 pDataArray->calcDists[i].push_back(temp);
284 for(int i=0;i<treeCalculators.size();i++){ delete treeCalculators[i]; }
289 catch(exception& e) {
290 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");