]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.h
fixed bug in phylo.diversity rooting. added filename patterns and create filename...
[mothur.git] / treegroupscommand.h
1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
3
4 /*
5  *  treegroupscommand.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 4/8/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12  
13 #include "command.hpp"
14 #include "inputdata.h"
15 #include "groupmap.h"
16 #include "validcalculator.h"
17 #include "tree.h"
18 #include "counttable.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sharedsobscollectsummary.h"
23 #include "sharedchao1.h"
24 #include "sharedace.h"
25 #include "sharednseqs.h"
26 #include "sharedjabund.h"
27 #include "sharedsorabund.h"
28 #include "sharedjclass.h"
29 #include "sharedsorclass.h"
30 #include "sharedjest.h"
31 #include "sharedsorest.h"
32 #include "sharedthetayc.h"
33 #include "sharedthetan.h"
34 #include "sharedkstest.h"
35 #include "whittaker.h"
36 #include "sharedochiai.h"
37 #include "sharedanderbergs.h"
38 #include "sharedkulczynski.h"
39 #include "sharedkulczynskicody.h"
40 #include "sharedlennon.h"
41 #include "sharedmorisitahorn.h"
42 #include "sharedbraycurtis.h"
43 #include "sharedjackknife.h"
44 #include "whittaker.h"
45 #include "odum.h"
46 #include "canberra.h"
47 #include "structeuclidean.h"
48 #include "structchord.h"
49 #include "hellinger.h"
50 #include "manhattan.h"
51 #include "structpearson.h"
52 #include "soergel.h"
53 #include "spearman.h"
54 #include "structkulczynski.h"
55 #include "structchi2.h"
56 #include "speciesprofile.h"
57 #include "hamming.h"
58 #include "gower.h"
59 #include "memchi2.h"
60 #include "memchord.h"
61 #include "memeuclidean.h"
62 #include "mempearson.h"
63
64
65
66 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. 
67         The user can select the lines or labels they wish to use as well as the groups they would like included.
68         They can also use as many or as few calculators as they wish. */
69         
70
71 class TreeGroupCommand : public Command {
72         
73 public:
74         TreeGroupCommand(string);       
75         TreeGroupCommand();
76         ~TreeGroupCommand();
77         
78         vector<string> setParameters();
79         string getCommandName()                 { return "tree.shared";                         }
80         string getCommandCategory()             { return "OTU-Based Approaches";        }
81         
82         string getHelpString(); 
83     string getOutputPattern(string);    
84         string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
85         string getDescription()         { return "generate a tree file that describes the dissimilarity among groups"; }
86
87         
88         int execute(); 
89         void help() { m->mothurOut(getHelpString()); }  
90         
91 private:
92     
93     struct linePair {
94                 int start;
95                 int end;
96         };
97         vector<linePair> lines;
98     
99         Tree* createTree(vector< vector<double> >&);
100         void printSims(ostream&, vector< vector<double> >&);
101         int makeSimsShared();
102         vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
103     int writeTree(string, Tree*);
104     int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
105         
106         NameAssignment* nameMap;
107         ListVector* list;
108         CountTable* ct;
109         Tree* t;
110     InputData* input;
111         vector<Calculator*> treeCalculators;
112         vector<SharedRAbundVector*> lookup;
113         string lastLabel;
114         string format, groupNames, filename, sharedfile, countfile, inputfile;
115         int numGroups, subsampleSize, iters, processors;
116         ofstream out;
117         float precision, cutoff;
118
119         bool abort, allLines, subsample;
120         set<string> labels; //holds labels to be used
121         string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
122         vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
123         
124         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
125         int process(vector<SharedRAbundVector*>);
126         
127         
128
129 };
130
131 /**************************************************************************************************/
132 //custom data structure for threads to use.
133 // This is passed by void pointer so it can be any data type
134 // that can be passed using a single void pointer (LPVOID).
135 struct treeSharedData {
136     vector<SharedRAbundVector*> thisLookup;
137     vector< vector<seqDist> > calcDists;
138     vector<string>  Estimators;
139         unsigned long long start;
140         unsigned long long end;
141         MothurOut* m;
142         
143         treeSharedData(){}
144         treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
145                 m = mout;
146                 start = st;
147                 end = en;
148         Estimators = est;
149         thisLookup = lu;
150         }
151 };
152 /**************************************************************************************************/
153 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
154 #else
155 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){ 
156         treeSharedData* pDataArray;
157         pDataArray = (treeSharedData*)lpParam;
158         
159         try {
160         
161         vector<Calculator*> treeCalculators;
162         ValidCalculators validCalculator;
163         for (int i=0; i<pDataArray->Estimators.size(); i++) {
164             if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { 
165                 if (pDataArray->Estimators[i] == "sharedsobs") { 
166                     treeCalculators.push_back(new SharedSobsCS());
167                 }else if (pDataArray->Estimators[i] == "sharedchao") { 
168                     treeCalculators.push_back(new SharedChao1());
169                 }else if (pDataArray->Estimators[i] == "sharedace") { 
170                     treeCalculators.push_back(new SharedAce());
171                 }else if (pDataArray->Estimators[i] == "jabund") {      
172                     treeCalculators.push_back(new JAbund());
173                 }else if (pDataArray->Estimators[i] == "sorabund") { 
174                     treeCalculators.push_back(new SorAbund());
175                 }else if (pDataArray->Estimators[i] == "jclass") { 
176                     treeCalculators.push_back(new Jclass());
177                 }else if (pDataArray->Estimators[i] == "sorclass") { 
178                     treeCalculators.push_back(new SorClass());
179                 }else if (pDataArray->Estimators[i] == "jest") { 
180                     treeCalculators.push_back(new Jest());
181                 }else if (pDataArray->Estimators[i] == "sorest") { 
182                     treeCalculators.push_back(new SorEst());
183                 }else if (pDataArray->Estimators[i] == "thetayc") { 
184                     treeCalculators.push_back(new ThetaYC());
185                 }else if (pDataArray->Estimators[i] == "thetan") { 
186                     treeCalculators.push_back(new ThetaN());
187                 }else if (pDataArray->Estimators[i] == "kstest") { 
188                     treeCalculators.push_back(new KSTest());
189                 }else if (pDataArray->Estimators[i] == "sharednseqs") { 
190                     treeCalculators.push_back(new SharedNSeqs());
191                 }else if (pDataArray->Estimators[i] == "ochiai") { 
192                     treeCalculators.push_back(new Ochiai());
193                 }else if (pDataArray->Estimators[i] == "anderberg") { 
194                     treeCalculators.push_back(new Anderberg());
195                 }else if (pDataArray->Estimators[i] == "kulczynski") { 
196                     treeCalculators.push_back(new Kulczynski());
197                 }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
198                     treeCalculators.push_back(new KulczynskiCody());
199                 }else if (pDataArray->Estimators[i] == "lennon") { 
200                     treeCalculators.push_back(new Lennon());
201                 }else if (pDataArray->Estimators[i] == "morisitahorn") { 
202                     treeCalculators.push_back(new MorHorn());
203                 }else if (pDataArray->Estimators[i] == "braycurtis") { 
204                     treeCalculators.push_back(new BrayCurtis());
205                 }else if (pDataArray->Estimators[i] == "whittaker") { 
206                     treeCalculators.push_back(new Whittaker());
207                 }else if (pDataArray->Estimators[i] == "odum") { 
208                     treeCalculators.push_back(new Odum());
209                 }else if (pDataArray->Estimators[i] == "canberra") { 
210                     treeCalculators.push_back(new Canberra());
211                 }else if (pDataArray->Estimators[i] == "structeuclidean") { 
212                     treeCalculators.push_back(new StructEuclidean());
213                 }else if (pDataArray->Estimators[i] == "structchord") { 
214                     treeCalculators.push_back(new StructChord());
215                 }else if (pDataArray->Estimators[i] == "hellinger") { 
216                     treeCalculators.push_back(new Hellinger());
217                 }else if (pDataArray->Estimators[i] == "manhattan") { 
218                     treeCalculators.push_back(new Manhattan());
219                 }else if (pDataArray->Estimators[i] == "structpearson") { 
220                     treeCalculators.push_back(new StructPearson());
221                 }else if (pDataArray->Estimators[i] == "soergel") { 
222                     treeCalculators.push_back(new Soergel());
223                 }else if (pDataArray->Estimators[i] == "spearman") { 
224                     treeCalculators.push_back(new Spearman());
225                 }else if (pDataArray->Estimators[i] == "structkulczynski") { 
226                     treeCalculators.push_back(new StructKulczynski());
227                 }else if (pDataArray->Estimators[i] == "speciesprofile") { 
228                     treeCalculators.push_back(new SpeciesProfile());
229                 }else if (pDataArray->Estimators[i] == "hamming") { 
230                     treeCalculators.push_back(new Hamming());
231                 }else if (pDataArray->Estimators[i] == "structchi2") { 
232                     treeCalculators.push_back(new StructChi2());
233                 }else if (pDataArray->Estimators[i] == "gower") { 
234                     treeCalculators.push_back(new Gower());
235                 }else if (pDataArray->Estimators[i] == "memchi2") { 
236                     treeCalculators.push_back(new MemChi2());
237                 }else if (pDataArray->Estimators[i] == "memchord") { 
238                     treeCalculators.push_back(new MemChord());
239                 }else if (pDataArray->Estimators[i] == "memeuclidean") { 
240                     treeCalculators.push_back(new MemEuclidean());
241                 }else if (pDataArray->Estimators[i] == "mempearson") { 
242                     treeCalculators.push_back(new MemPearson());
243                 }
244             }
245         }
246         
247         pDataArray->calcDists.resize(treeCalculators.size());
248         
249                 vector<SharedRAbundVector*> subset;
250                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
251                         
252                         for (int l = 0; l < k; l++) {
253                                 
254                                 if (k != l) { //we dont need to similiarity of a groups to itself
255                                         subset.clear(); //clear out old pair of sharedrabunds
256                                         //add new pair of sharedrabunds
257                                         subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
258                                         
259                                         for(int i=0;i<treeCalculators.size();i++) {
260                                                 
261                                                 //if this calc needs all groups to calculate the pair load all groups
262                                                 if (treeCalculators[i]->getNeedsAll()) { 
263                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
264                                                         for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
265                                                                 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
266                                                         }
267                                                 }
268                                                 
269                                                 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
270                                                 
271                                                 if (pDataArray->m->control_pressed) { return 1; }
272                                                 
273                                                 seqDist temp(l, k, -(tempdata[0]-1.0));
274                                                 pDataArray->calcDists[i].push_back(temp);
275                                         }
276                                 }
277                         }
278                 }
279         
280         for(int i=0;i<treeCalculators.size();i++){  delete treeCalculators[i]; }
281                 
282                 return 0;
283                 
284         }
285         catch(exception& e) {
286                 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");
287                 exit(1);
288         }
289
290 #endif
291
292
293         
294 #endif
295
296