]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.h
b0ae730d98aa8af6da4fecabe3cf0f7aa03df2d4
[mothur.git] / treegroupscommand.h
1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
3
4 /*
5  *  treegroupscommand.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 4/8/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12  
13 #include "command.hpp"
14 #include "inputdata.h"
15 #include "groupmap.h"
16 #include "validcalculator.h"
17 #include "tree.h"
18 #include "treemap.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sparsematrix.hpp"
23 #include "sharedsobscollectsummary.h"
24 #include "sharedchao1.h"
25 #include "sharedace.h"
26 #include "sharednseqs.h"
27 #include "sharedjabund.h"
28 #include "sharedsorabund.h"
29 #include "sharedjclass.h"
30 #include "sharedsorclass.h"
31 #include "sharedjest.h"
32 #include "sharedsorest.h"
33 #include "sharedthetayc.h"
34 #include "sharedthetan.h"
35 #include "sharedkstest.h"
36 #include "whittaker.h"
37 #include "sharedochiai.h"
38 #include "sharedanderbergs.h"
39 #include "sharedkulczynski.h"
40 #include "sharedkulczynskicody.h"
41 #include "sharedlennon.h"
42 #include "sharedmorisitahorn.h"
43 #include "sharedbraycurtis.h"
44 #include "sharedjackknife.h"
45 #include "whittaker.h"
46 #include "odum.h"
47 #include "canberra.h"
48 #include "structeuclidean.h"
49 #include "structchord.h"
50 #include "hellinger.h"
51 #include "manhattan.h"
52 #include "structpearson.h"
53 #include "soergel.h"
54 #include "spearman.h"
55 #include "structkulczynski.h"
56 #include "structchi2.h"
57 #include "speciesprofile.h"
58 #include "hamming.h"
59 #include "gower.h"
60 #include "memchi2.h"
61 #include "memchord.h"
62 #include "memeuclidean.h"
63 #include "mempearson.h"
64
65
66
67 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. 
68         The user can select the lines or labels they wish to use as well as the groups they would like included.
69         They can also use as many or as few calculators as they wish. */
70         
71
72 typedef list<PCell>::iterator MatData;
73
74 class TreeGroupCommand : public Command {
75         
76 public:
77         TreeGroupCommand(string);       
78         TreeGroupCommand();
79         ~TreeGroupCommand();
80         
81         vector<string> setParameters();
82         string getCommandName()                 { return "tree.shared";                         }
83         string getCommandCategory()             { return "OTU-Based Approaches";        }
84         string getOutputFileNameTag(string, string);
85         string getHelpString(); 
86         string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
87         string getDescription()         { return "generate a tree file that describes the dissimilarity among groups"; }
88
89         
90         int execute(); 
91         void help() { m->mothurOut(getHelpString()); }  
92         
93 private:
94     
95     struct linePair {
96                 int start;
97                 int end;
98         };
99         vector<linePair> lines;
100     
101         Tree* createTree(vector< vector<double> >&);
102         void printSims(ostream&, vector< vector<double> >&);
103         int makeSimsShared();
104         vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
105     int writeTree(string, Tree*);
106     int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
107         
108         NameAssignment* nameMap;
109         ListVector* list;
110         TreeMap* tmap;
111         Tree* t;
112     InputData* input;
113         vector<Calculator*> treeCalculators;
114         vector<SharedRAbundVector*> lookup;
115         string lastLabel;
116         string format, groupNames, filename, sharedfile, inputfile;
117         int numGroups, subsampleSize, iters, processors;
118         ofstream out;
119         float precision, cutoff;
120
121         bool abort, allLines, subsample;
122         set<string> labels; //holds labels to be used
123         string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
124         vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
125         
126         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
127         int process(vector<SharedRAbundVector*>);
128         
129         
130
131 };
132
133 /**************************************************************************************************/
134 //custom data structure for threads to use.
135 // This is passed by void pointer so it can be any data type
136 // that can be passed using a single void pointer (LPVOID).
137 struct treeSharedData {
138     vector<SharedRAbundVector*> thisLookup;
139     vector< vector<seqDist> > calcDists;
140     vector<string>  Estimators;
141         unsigned long long start;
142         unsigned long long end;
143         MothurOut* m;
144         
145         treeSharedData(){}
146         treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
147                 m = mout;
148                 start = st;
149                 end = en;
150         Estimators = est;
151         thisLookup = lu;
152         }
153 };
154 /**************************************************************************************************/
155 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
156 #else
157 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){ 
158         treeSharedData* pDataArray;
159         pDataArray = (treeSharedData*)lpParam;
160         
161         try {
162         
163         vector<Calculator*> treeCalculators;
164         ValidCalculators validCalculator;
165         for (int i=0; i<pDataArray->Estimators.size(); i++) {
166             if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { 
167                 if (pDataArray->Estimators[i] == "sharedsobs") { 
168                     treeCalculators.push_back(new SharedSobsCS());
169                 }else if (pDataArray->Estimators[i] == "sharedchao") { 
170                     treeCalculators.push_back(new SharedChao1());
171                 }else if (pDataArray->Estimators[i] == "sharedace") { 
172                     treeCalculators.push_back(new SharedAce());
173                 }else if (pDataArray->Estimators[i] == "jabund") {      
174                     treeCalculators.push_back(new JAbund());
175                 }else if (pDataArray->Estimators[i] == "sorabund") { 
176                     treeCalculators.push_back(new SorAbund());
177                 }else if (pDataArray->Estimators[i] == "jclass") { 
178                     treeCalculators.push_back(new Jclass());
179                 }else if (pDataArray->Estimators[i] == "sorclass") { 
180                     treeCalculators.push_back(new SorClass());
181                 }else if (pDataArray->Estimators[i] == "jest") { 
182                     treeCalculators.push_back(new Jest());
183                 }else if (pDataArray->Estimators[i] == "sorest") { 
184                     treeCalculators.push_back(new SorEst());
185                 }else if (pDataArray->Estimators[i] == "thetayc") { 
186                     treeCalculators.push_back(new ThetaYC());
187                 }else if (pDataArray->Estimators[i] == "thetan") { 
188                     treeCalculators.push_back(new ThetaN());
189                 }else if (pDataArray->Estimators[i] == "kstest") { 
190                     treeCalculators.push_back(new KSTest());
191                 }else if (pDataArray->Estimators[i] == "sharednseqs") { 
192                     treeCalculators.push_back(new SharedNSeqs());
193                 }else if (pDataArray->Estimators[i] == "ochiai") { 
194                     treeCalculators.push_back(new Ochiai());
195                 }else if (pDataArray->Estimators[i] == "anderberg") { 
196                     treeCalculators.push_back(new Anderberg());
197                 }else if (pDataArray->Estimators[i] == "kulczynski") { 
198                     treeCalculators.push_back(new Kulczynski());
199                 }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
200                     treeCalculators.push_back(new KulczynskiCody());
201                 }else if (pDataArray->Estimators[i] == "lennon") { 
202                     treeCalculators.push_back(new Lennon());
203                 }else if (pDataArray->Estimators[i] == "morisitahorn") { 
204                     treeCalculators.push_back(new MorHorn());
205                 }else if (pDataArray->Estimators[i] == "braycurtis") { 
206                     treeCalculators.push_back(new BrayCurtis());
207                 }else if (pDataArray->Estimators[i] == "whittaker") { 
208                     treeCalculators.push_back(new Whittaker());
209                 }else if (pDataArray->Estimators[i] == "odum") { 
210                     treeCalculators.push_back(new Odum());
211                 }else if (pDataArray->Estimators[i] == "canberra") { 
212                     treeCalculators.push_back(new Canberra());
213                 }else if (pDataArray->Estimators[i] == "structeuclidean") { 
214                     treeCalculators.push_back(new StructEuclidean());
215                 }else if (pDataArray->Estimators[i] == "structchord") { 
216                     treeCalculators.push_back(new StructChord());
217                 }else if (pDataArray->Estimators[i] == "hellinger") { 
218                     treeCalculators.push_back(new Hellinger());
219                 }else if (pDataArray->Estimators[i] == "manhattan") { 
220                     treeCalculators.push_back(new Manhattan());
221                 }else if (pDataArray->Estimators[i] == "structpearson") { 
222                     treeCalculators.push_back(new StructPearson());
223                 }else if (pDataArray->Estimators[i] == "soergel") { 
224                     treeCalculators.push_back(new Soergel());
225                 }else if (pDataArray->Estimators[i] == "spearman") { 
226                     treeCalculators.push_back(new Spearman());
227                 }else if (pDataArray->Estimators[i] == "structkulczynski") { 
228                     treeCalculators.push_back(new StructKulczynski());
229                 }else if (pDataArray->Estimators[i] == "speciesprofile") { 
230                     treeCalculators.push_back(new SpeciesProfile());
231                 }else if (pDataArray->Estimators[i] == "hamming") { 
232                     treeCalculators.push_back(new Hamming());
233                 }else if (pDataArray->Estimators[i] == "structchi2") { 
234                     treeCalculators.push_back(new StructChi2());
235                 }else if (pDataArray->Estimators[i] == "gower") { 
236                     treeCalculators.push_back(new Gower());
237                 }else if (pDataArray->Estimators[i] == "memchi2") { 
238                     treeCalculators.push_back(new MemChi2());
239                 }else if (pDataArray->Estimators[i] == "memchord") { 
240                     treeCalculators.push_back(new MemChord());
241                 }else if (pDataArray->Estimators[i] == "memeuclidean") { 
242                     treeCalculators.push_back(new MemEuclidean());
243                 }else if (pDataArray->Estimators[i] == "mempearson") { 
244                     treeCalculators.push_back(new MemPearson());
245                 }
246             }
247         }
248         
249         pDataArray->calcDists.resize(treeCalculators.size());
250         
251                 vector<SharedRAbundVector*> subset;
252                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
253                         
254                         for (int l = 0; l < k; l++) {
255                                 
256                                 if (k != l) { //we dont need to similiarity of a groups to itself
257                                         subset.clear(); //clear out old pair of sharedrabunds
258                                         //add new pair of sharedrabunds
259                                         subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
260                                         
261                                         for(int i=0;i<treeCalculators.size();i++) {
262                                                 
263                                                 //if this calc needs all groups to calculate the pair load all groups
264                                                 if (treeCalculators[i]->getNeedsAll()) { 
265                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
266                                                         for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
267                                                                 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
268                                                         }
269                                                 }
270                                                 
271                                                 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
272                                                 
273                                                 if (pDataArray->m->control_pressed) { return 1; }
274                                                 
275                                                 seqDist temp(l, k, -(tempdata[0]-1.0));
276                                                 pDataArray->calcDists[i].push_back(temp);
277                                         }
278                                 }
279                         }
280                 }
281         
282         for(int i=0;i<treeCalculators.size();i++){  delete treeCalculators[i]; }
283                 
284                 return 0;
285                 
286         }
287         catch(exception& e) {
288                 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");
289                 exit(1);
290         }
291
292 #endif
293
294
295         
296 #endif
297
298