]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.h
major change to the tree class to use the count table class instead of tree map....
[mothur.git] / treegroupscommand.h
1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
3
4 /*
5  *  treegroupscommand.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 4/8/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12  
13 #include "command.hpp"
14 #include "inputdata.h"
15 #include "groupmap.h"
16 #include "validcalculator.h"
17 #include "tree.h"
18 #include "counttable.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sharedsobscollectsummary.h"
23 #include "sharedchao1.h"
24 #include "sharedace.h"
25 #include "sharednseqs.h"
26 #include "sharedjabund.h"
27 #include "sharedsorabund.h"
28 #include "sharedjclass.h"
29 #include "sharedsorclass.h"
30 #include "sharedjest.h"
31 #include "sharedsorest.h"
32 #include "sharedthetayc.h"
33 #include "sharedthetan.h"
34 #include "sharedkstest.h"
35 #include "whittaker.h"
36 #include "sharedochiai.h"
37 #include "sharedanderbergs.h"
38 #include "sharedkulczynski.h"
39 #include "sharedkulczynskicody.h"
40 #include "sharedlennon.h"
41 #include "sharedmorisitahorn.h"
42 #include "sharedbraycurtis.h"
43 #include "sharedjackknife.h"
44 #include "whittaker.h"
45 #include "odum.h"
46 #include "canberra.h"
47 #include "structeuclidean.h"
48 #include "structchord.h"
49 #include "hellinger.h"
50 #include "manhattan.h"
51 #include "structpearson.h"
52 #include "soergel.h"
53 #include "spearman.h"
54 #include "structkulczynski.h"
55 #include "structchi2.h"
56 #include "speciesprofile.h"
57 #include "hamming.h"
58 #include "gower.h"
59 #include "memchi2.h"
60 #include "memchord.h"
61 #include "memeuclidean.h"
62 #include "mempearson.h"
63
64
65
66 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. 
67         The user can select the lines or labels they wish to use as well as the groups they would like included.
68         They can also use as many or as few calculators as they wish. */
69         
70
71 class TreeGroupCommand : public Command {
72         
73 public:
74         TreeGroupCommand(string);       
75         TreeGroupCommand();
76         ~TreeGroupCommand();
77         
78         vector<string> setParameters();
79         string getCommandName()                 { return "tree.shared";                         }
80         string getCommandCategory()             { return "OTU-Based Approaches";        }
81         string getOutputFileNameTag(string, string);
82         string getHelpString(); 
83         string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
84         string getDescription()         { return "generate a tree file that describes the dissimilarity among groups"; }
85
86         
87         int execute(); 
88         void help() { m->mothurOut(getHelpString()); }  
89         
90 private:
91     
92     struct linePair {
93                 int start;
94                 int end;
95         };
96         vector<linePair> lines;
97     
98         Tree* createTree(vector< vector<double> >&);
99         void printSims(ostream&, vector< vector<double> >&);
100         int makeSimsShared();
101         vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
102     int writeTree(string, Tree*);
103     int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
104         
105         NameAssignment* nameMap;
106         ListVector* list;
107         CountTable* ct;
108         Tree* t;
109     InputData* input;
110         vector<Calculator*> treeCalculators;
111         vector<SharedRAbundVector*> lookup;
112         string lastLabel;
113         string format, groupNames, filename, sharedfile, inputfile;
114         int numGroups, subsampleSize, iters, processors;
115         ofstream out;
116         float precision, cutoff;
117
118         bool abort, allLines, subsample;
119         set<string> labels; //holds labels to be used
120         string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
121         vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
122         
123         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
124         int process(vector<SharedRAbundVector*>);
125         
126         
127
128 };
129
130 /**************************************************************************************************/
131 //custom data structure for threads to use.
132 // This is passed by void pointer so it can be any data type
133 // that can be passed using a single void pointer (LPVOID).
134 struct treeSharedData {
135     vector<SharedRAbundVector*> thisLookup;
136     vector< vector<seqDist> > calcDists;
137     vector<string>  Estimators;
138         unsigned long long start;
139         unsigned long long end;
140         MothurOut* m;
141         
142         treeSharedData(){}
143         treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
144                 m = mout;
145                 start = st;
146                 end = en;
147         Estimators = est;
148         thisLookup = lu;
149         }
150 };
151 /**************************************************************************************************/
152 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
153 #else
154 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){ 
155         treeSharedData* pDataArray;
156         pDataArray = (treeSharedData*)lpParam;
157         
158         try {
159         
160         vector<Calculator*> treeCalculators;
161         ValidCalculators validCalculator;
162         for (int i=0; i<pDataArray->Estimators.size(); i++) {
163             if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { 
164                 if (pDataArray->Estimators[i] == "sharedsobs") { 
165                     treeCalculators.push_back(new SharedSobsCS());
166                 }else if (pDataArray->Estimators[i] == "sharedchao") { 
167                     treeCalculators.push_back(new SharedChao1());
168                 }else if (pDataArray->Estimators[i] == "sharedace") { 
169                     treeCalculators.push_back(new SharedAce());
170                 }else if (pDataArray->Estimators[i] == "jabund") {      
171                     treeCalculators.push_back(new JAbund());
172                 }else if (pDataArray->Estimators[i] == "sorabund") { 
173                     treeCalculators.push_back(new SorAbund());
174                 }else if (pDataArray->Estimators[i] == "jclass") { 
175                     treeCalculators.push_back(new Jclass());
176                 }else if (pDataArray->Estimators[i] == "sorclass") { 
177                     treeCalculators.push_back(new SorClass());
178                 }else if (pDataArray->Estimators[i] == "jest") { 
179                     treeCalculators.push_back(new Jest());
180                 }else if (pDataArray->Estimators[i] == "sorest") { 
181                     treeCalculators.push_back(new SorEst());
182                 }else if (pDataArray->Estimators[i] == "thetayc") { 
183                     treeCalculators.push_back(new ThetaYC());
184                 }else if (pDataArray->Estimators[i] == "thetan") { 
185                     treeCalculators.push_back(new ThetaN());
186                 }else if (pDataArray->Estimators[i] == "kstest") { 
187                     treeCalculators.push_back(new KSTest());
188                 }else if (pDataArray->Estimators[i] == "sharednseqs") { 
189                     treeCalculators.push_back(new SharedNSeqs());
190                 }else if (pDataArray->Estimators[i] == "ochiai") { 
191                     treeCalculators.push_back(new Ochiai());
192                 }else if (pDataArray->Estimators[i] == "anderberg") { 
193                     treeCalculators.push_back(new Anderberg());
194                 }else if (pDataArray->Estimators[i] == "kulczynski") { 
195                     treeCalculators.push_back(new Kulczynski());
196                 }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
197                     treeCalculators.push_back(new KulczynskiCody());
198                 }else if (pDataArray->Estimators[i] == "lennon") { 
199                     treeCalculators.push_back(new Lennon());
200                 }else if (pDataArray->Estimators[i] == "morisitahorn") { 
201                     treeCalculators.push_back(new MorHorn());
202                 }else if (pDataArray->Estimators[i] == "braycurtis") { 
203                     treeCalculators.push_back(new BrayCurtis());
204                 }else if (pDataArray->Estimators[i] == "whittaker") { 
205                     treeCalculators.push_back(new Whittaker());
206                 }else if (pDataArray->Estimators[i] == "odum") { 
207                     treeCalculators.push_back(new Odum());
208                 }else if (pDataArray->Estimators[i] == "canberra") { 
209                     treeCalculators.push_back(new Canberra());
210                 }else if (pDataArray->Estimators[i] == "structeuclidean") { 
211                     treeCalculators.push_back(new StructEuclidean());
212                 }else if (pDataArray->Estimators[i] == "structchord") { 
213                     treeCalculators.push_back(new StructChord());
214                 }else if (pDataArray->Estimators[i] == "hellinger") { 
215                     treeCalculators.push_back(new Hellinger());
216                 }else if (pDataArray->Estimators[i] == "manhattan") { 
217                     treeCalculators.push_back(new Manhattan());
218                 }else if (pDataArray->Estimators[i] == "structpearson") { 
219                     treeCalculators.push_back(new StructPearson());
220                 }else if (pDataArray->Estimators[i] == "soergel") { 
221                     treeCalculators.push_back(new Soergel());
222                 }else if (pDataArray->Estimators[i] == "spearman") { 
223                     treeCalculators.push_back(new Spearman());
224                 }else if (pDataArray->Estimators[i] == "structkulczynski") { 
225                     treeCalculators.push_back(new StructKulczynski());
226                 }else if (pDataArray->Estimators[i] == "speciesprofile") { 
227                     treeCalculators.push_back(new SpeciesProfile());
228                 }else if (pDataArray->Estimators[i] == "hamming") { 
229                     treeCalculators.push_back(new Hamming());
230                 }else if (pDataArray->Estimators[i] == "structchi2") { 
231                     treeCalculators.push_back(new StructChi2());
232                 }else if (pDataArray->Estimators[i] == "gower") { 
233                     treeCalculators.push_back(new Gower());
234                 }else if (pDataArray->Estimators[i] == "memchi2") { 
235                     treeCalculators.push_back(new MemChi2());
236                 }else if (pDataArray->Estimators[i] == "memchord") { 
237                     treeCalculators.push_back(new MemChord());
238                 }else if (pDataArray->Estimators[i] == "memeuclidean") { 
239                     treeCalculators.push_back(new MemEuclidean());
240                 }else if (pDataArray->Estimators[i] == "mempearson") { 
241                     treeCalculators.push_back(new MemPearson());
242                 }
243             }
244         }
245         
246         pDataArray->calcDists.resize(treeCalculators.size());
247         
248                 vector<SharedRAbundVector*> subset;
249                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
250                         
251                         for (int l = 0; l < k; l++) {
252                                 
253                                 if (k != l) { //we dont need to similiarity of a groups to itself
254                                         subset.clear(); //clear out old pair of sharedrabunds
255                                         //add new pair of sharedrabunds
256                                         subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
257                                         
258                                         for(int i=0;i<treeCalculators.size();i++) {
259                                                 
260                                                 //if this calc needs all groups to calculate the pair load all groups
261                                                 if (treeCalculators[i]->getNeedsAll()) { 
262                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
263                                                         for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
264                                                                 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
265                                                         }
266                                                 }
267                                                 
268                                                 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
269                                                 
270                                                 if (pDataArray->m->control_pressed) { return 1; }
271                                                 
272                                                 seqDist temp(l, k, -(tempdata[0]-1.0));
273                                                 pDataArray->calcDists[i].push_back(temp);
274                                         }
275                                 }
276                         }
277                 }
278         
279         for(int i=0;i<treeCalculators.size();i++){  delete treeCalculators[i]; }
280                 
281                 return 0;
282                 
283         }
284         catch(exception& e) {
285                 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");
286                 exit(1);
287         }
288
289 #endif
290
291
292         
293 #endif
294
295