]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.h
fixes while testing 1.33.0
[mothur.git] / treegroupscommand.h
1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
3
4 /*
5  *  treegroupscommand.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 4/8/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12  
13 #include "command.hpp"
14 #include "inputdata.h"
15 #include "groupmap.h"
16 #include "validcalculator.h"
17 #include "tree.h"
18 #include "counttable.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sharedsobscollectsummary.h"
23 #include "sharedchao1.h"
24 #include "sharedace.h"
25 #include "sharednseqs.h"
26 #include "sharedjabund.h"
27 #include "sharedsorabund.h"
28 #include "sharedjclass.h"
29 #include "sharedsorclass.h"
30 #include "sharedjest.h"
31 #include "sharedsorest.h"
32 #include "sharedthetayc.h"
33 #include "sharedthetan.h"
34 #include "sharedkstest.h"
35 #include "whittaker.h"
36 #include "sharedochiai.h"
37 #include "sharedanderbergs.h"
38 #include "sharedkulczynski.h"
39 #include "sharedkulczynskicody.h"
40 #include "sharedlennon.h"
41 #include "sharedmorisitahorn.h"
42 #include "sharedbraycurtis.h"
43 #include "sharedjackknife.h"
44 #include "whittaker.h"
45 #include "odum.h"
46 #include "canberra.h"
47 #include "structeuclidean.h"
48 #include "structchord.h"
49 #include "hellinger.h"
50 #include "manhattan.h"
51 #include "structpearson.h"
52 #include "soergel.h"
53 #include "spearman.h"
54 #include "structkulczynski.h"
55 #include "structchi2.h"
56 #include "speciesprofile.h"
57 #include "hamming.h"
58 #include "gower.h"
59 #include "memchi2.h"
60 #include "memchord.h"
61 #include "memeuclidean.h"
62 #include "mempearson.h"
63
64
65
66 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. 
67         The user can select the lines or labels they wish to use as well as the groups they would like included.
68         They can also use as many or as few calculators as they wish. */
69         
70
71 class TreeGroupCommand : public Command {
72         
73 public:
74         TreeGroupCommand(string);       
75         TreeGroupCommand();
76         ~TreeGroupCommand();
77         
78         vector<string> setParameters();
79         string getCommandName()                 { return "tree.shared";                         }
80         string getCommandCategory()             { return "OTU-Based Approaches";        }
81         
82         string getHelpString(); 
83     string getOutputPattern(string);    
84         string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
85         string getDescription()         { return "generate a tree file that describes the dissimilarity among groups"; }
86
87         
88         int execute(); 
89         void help() { m->mothurOut(getHelpString()); }  
90         
91 private:
92     
93     struct linePair {
94                 int start;
95                 int end;
96         };
97         vector<linePair> lines;
98     
99         Tree* createTree(vector< vector<double> >&);
100         void printSims(ostream&, vector< vector<double> >&);
101         int makeSimsShared();
102         vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
103     int writeTree(string, Tree*);
104     int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
105         
106         NameAssignment* nameMap;
107         ListVector* list;
108         CountTable* ct;
109         Tree* t;
110     InputData* input;
111         vector<Calculator*> treeCalculators;
112         vector<SharedRAbundVector*> lookup;
113         string lastLabel;
114         string format, groupNames, filename, sharedfile, countfile, inputfile;
115         int numGroups, subsampleSize, iters, processors;
116         ofstream out;
117         float precision, cutoff;
118
119         bool abort, allLines, subsample;
120         set<string> labels; //holds labels to be used
121         string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
122         vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
123         
124         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
125         int process(vector<SharedRAbundVector*>);
126         
127         
128
129 };
130
131 /**************************************************************************************************/
132 //custom data structure for threads to use.
133 // This is passed by void pointer so it can be any data type
134 // that can be passed using a single void pointer (LPVOID).
135 struct treeSharedData {
136     vector<SharedRAbundVector*> thisLookup;
137     vector< vector<seqDist> > calcDists;
138     vector<string>  Estimators;
139         unsigned long long start;
140         unsigned long long end;
141         MothurOut* m;
142     int count;
143         
144         treeSharedData(){}
145         treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
146                 m = mout;
147                 start = st;
148                 end = en;
149         Estimators = est;
150         thisLookup = lu;
151         count=0;
152         }
153 };
154 /**************************************************************************************************/
155 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
156 #else
157 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){ 
158         treeSharedData* pDataArray;
159         pDataArray = (treeSharedData*)lpParam;
160         
161         try {
162         
163         vector<Calculator*> treeCalculators;
164         ValidCalculators validCalculator;
165         for (int i=0; i<pDataArray->Estimators.size(); i++) {
166             if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { 
167                 if (pDataArray->Estimators[i] == "sharedsobs") { 
168                     treeCalculators.push_back(new SharedSobsCS());
169                 }else if (pDataArray->Estimators[i] == "sharedchao") { 
170                     treeCalculators.push_back(new SharedChao1());
171                 }else if (pDataArray->Estimators[i] == "sharedace") { 
172                     treeCalculators.push_back(new SharedAce());
173                 }else if (pDataArray->Estimators[i] == "jabund") {      
174                     treeCalculators.push_back(new JAbund());
175                 }else if (pDataArray->Estimators[i] == "sorabund") { 
176                     treeCalculators.push_back(new SorAbund());
177                 }else if (pDataArray->Estimators[i] == "jclass") { 
178                     treeCalculators.push_back(new Jclass());
179                 }else if (pDataArray->Estimators[i] == "sorclass") { 
180                     treeCalculators.push_back(new SorClass());
181                 }else if (pDataArray->Estimators[i] == "jest") { 
182                     treeCalculators.push_back(new Jest());
183                 }else if (pDataArray->Estimators[i] == "sorest") { 
184                     treeCalculators.push_back(new SorEst());
185                 }else if (pDataArray->Estimators[i] == "thetayc") { 
186                     treeCalculators.push_back(new ThetaYC());
187                 }else if (pDataArray->Estimators[i] == "thetan") { 
188                     treeCalculators.push_back(new ThetaN());
189                 }else if (pDataArray->Estimators[i] == "kstest") { 
190                     treeCalculators.push_back(new KSTest());
191                 }else if (pDataArray->Estimators[i] == "sharednseqs") { 
192                     treeCalculators.push_back(new SharedNSeqs());
193                 }else if (pDataArray->Estimators[i] == "ochiai") { 
194                     treeCalculators.push_back(new Ochiai());
195                 }else if (pDataArray->Estimators[i] == "anderberg") { 
196                     treeCalculators.push_back(new Anderberg());
197                 }else if (pDataArray->Estimators[i] == "kulczynski") { 
198                     treeCalculators.push_back(new Kulczynski());
199                 }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
200                     treeCalculators.push_back(new KulczynskiCody());
201                 }else if (pDataArray->Estimators[i] == "lennon") { 
202                     treeCalculators.push_back(new Lennon());
203                 }else if (pDataArray->Estimators[i] == "morisitahorn") { 
204                     treeCalculators.push_back(new MorHorn());
205                 }else if (pDataArray->Estimators[i] == "braycurtis") { 
206                     treeCalculators.push_back(new BrayCurtis());
207                 }else if (pDataArray->Estimators[i] == "whittaker") { 
208                     treeCalculators.push_back(new Whittaker());
209                 }else if (pDataArray->Estimators[i] == "odum") { 
210                     treeCalculators.push_back(new Odum());
211                 }else if (pDataArray->Estimators[i] == "canberra") { 
212                     treeCalculators.push_back(new Canberra());
213                 }else if (pDataArray->Estimators[i] == "structeuclidean") { 
214                     treeCalculators.push_back(new StructEuclidean());
215                 }else if (pDataArray->Estimators[i] == "structchord") { 
216                     treeCalculators.push_back(new StructChord());
217                 }else if (pDataArray->Estimators[i] == "hellinger") { 
218                     treeCalculators.push_back(new Hellinger());
219                 }else if (pDataArray->Estimators[i] == "manhattan") { 
220                     treeCalculators.push_back(new Manhattan());
221                 }else if (pDataArray->Estimators[i] == "structpearson") { 
222                     treeCalculators.push_back(new StructPearson());
223                 }else if (pDataArray->Estimators[i] == "soergel") { 
224                     treeCalculators.push_back(new Soergel());
225                 }else if (pDataArray->Estimators[i] == "spearman") { 
226                     treeCalculators.push_back(new Spearman());
227                 }else if (pDataArray->Estimators[i] == "structkulczynski") { 
228                     treeCalculators.push_back(new StructKulczynski());
229                 }else if (pDataArray->Estimators[i] == "speciesprofile") { 
230                     treeCalculators.push_back(new SpeciesProfile());
231                 }else if (pDataArray->Estimators[i] == "hamming") { 
232                     treeCalculators.push_back(new Hamming());
233                 }else if (pDataArray->Estimators[i] == "structchi2") { 
234                     treeCalculators.push_back(new StructChi2());
235                 }else if (pDataArray->Estimators[i] == "gower") { 
236                     treeCalculators.push_back(new Gower());
237                 }else if (pDataArray->Estimators[i] == "memchi2") { 
238                     treeCalculators.push_back(new MemChi2());
239                 }else if (pDataArray->Estimators[i] == "memchord") { 
240                     treeCalculators.push_back(new MemChord());
241                 }else if (pDataArray->Estimators[i] == "memeuclidean") { 
242                     treeCalculators.push_back(new MemEuclidean());
243                 }else if (pDataArray->Estimators[i] == "mempearson") { 
244                     treeCalculators.push_back(new MemPearson());
245                 }
246             }
247         }
248         
249         pDataArray->calcDists.resize(treeCalculators.size());
250         
251                 vector<SharedRAbundVector*> subset;
252                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
253                         
254             pDataArray->count++;
255             
256                         for (int l = 0; l < k; l++) {
257                                 
258                                 if (k != l) { //we dont need to similiarity of a groups to itself
259                                         subset.clear(); //clear out old pair of sharedrabunds
260                                         //add new pair of sharedrabunds
261                                         subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
262                                         
263                                         for(int i=0;i<treeCalculators.size();i++) {
264                                                 
265                                                 //if this calc needs all groups to calculate the pair load all groups
266                                                 if (treeCalculators[i]->getNeedsAll()) { 
267                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
268                                                         for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
269                                                                 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
270                                                         }
271                                                 }
272                                                 
273                                                 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
274                                                 
275                                                 if (pDataArray->m->control_pressed) { return 1; }
276                                                 
277                                                 seqDist temp(l, k, -(tempdata[0]-1.0));
278                                                 pDataArray->calcDists[i].push_back(temp);
279                                         }
280                                 }
281                         }
282                 }
283         
284         for(int i=0;i<treeCalculators.size();i++){  delete treeCalculators[i]; }
285                 
286                 return 0;
287                 
288         }
289         catch(exception& e) {
290                 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");
291                 exit(1);
292         }
293
294 #endif
295
296
297         
298 #endif
299
300