]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.h
update .gitignore
[mothur.git] / treegroupscommand.h
1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
3
4 /*
5  *  treegroupscommand.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 4/8/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12  
13 #include "command.hpp"
14 #include "inputdata.h"
15 #include "groupmap.h"
16 #include "validcalculator.h"
17 #include "tree.h"
18 #include "counttable.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sharedsobscollectsummary.h"
23 #include "sharedchao1.h"
24 #include "sharedace.h"
25 #include "sharednseqs.h"
26 #include "sharedjabund.h"
27 #include "sharedsorabund.h"
28 #include "sharedjclass.h"
29 #include "sharedsorclass.h"
30 #include "sharedjest.h"
31 #include "sharedsorest.h"
32 #include "sharedthetayc.h"
33 #include "sharedthetan.h"
34 #include "sharedkstest.h"
35 #include "whittaker.h"
36 #include "sharedochiai.h"
37 #include "sharedanderbergs.h"
38 #include "sharedkulczynski.h"
39 #include "sharedkulczynskicody.h"
40 #include "sharedlennon.h"
41 #include "sharedmorisitahorn.h"
42 #include "sharedbraycurtis.h"
43 #include "sharedjackknife.h"
44 #include "whittaker.h"
45 #include "odum.h"
46 #include "canberra.h"
47 #include "structeuclidean.h"
48 #include "structchord.h"
49 #include "hellinger.h"
50 #include "manhattan.h"
51 #include "structpearson.h"
52 #include "soergel.h"
53 #include "spearman.h"
54 #include "structkulczynski.h"
55 #include "structchi2.h"
56 #include "speciesprofile.h"
57 #include "hamming.h"
58 #include "gower.h"
59 #include "memchi2.h"
60 #include "memchord.h"
61 #include "memeuclidean.h"
62 #include "mempearson.h"
63 #include "sharedrjsd.h"
64 #include "sharedjsd.h"
65
66
67
68 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. 
69         The user can select the lines or labels they wish to use as well as the groups they would like included.
70         They can also use as many or as few calculators as they wish. */
71         
72
73 class TreeGroupCommand : public Command {
74         
75 public:
76         TreeGroupCommand(string);       
77         TreeGroupCommand();
78         ~TreeGroupCommand();
79         
80         vector<string> setParameters();
81         string getCommandName()                 { return "tree.shared";                         }
82         string getCommandCategory()             { return "OTU-Based Approaches";        }
83         
84         string getHelpString(); 
85     string getOutputPattern(string);    
86         string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
87         string getDescription()         { return "generate a tree file that describes the dissimilarity among groups"; }
88
89         
90         int execute(); 
91         void help() { m->mothurOut(getHelpString()); }  
92         
93 private:
94     
95     struct linePair {
96                 int start;
97                 int end;
98         };
99         vector<linePair> lines;
100     
101         Tree* createTree(vector< vector<double> >&);
102         void printSims(ostream&, vector< vector<double> >&);
103         int makeSimsShared();
104         vector< vector<double> > makeSimsDist(SparseDistanceMatrix*);
105     int writeTree(string, Tree*);
106     int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
107         
108         NameAssignment* nameMap;
109         ListVector* list;
110         CountTable* ct;
111         Tree* t;
112     InputData* input;
113         vector<Calculator*> treeCalculators;
114         vector<SharedRAbundVector*> lookup;
115         string lastLabel;
116         string format, groupNames, filename, sharedfile, countfile, inputfile;
117         int numGroups, subsampleSize, iters, processors;
118         ofstream out;
119         float precision, cutoff;
120
121         bool abort, allLines, subsample;
122         set<string> labels; //holds labels to be used
123         string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
124         vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
125         
126         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
127         int process(vector<SharedRAbundVector*>);
128         
129         
130
131 };
132
133 /**************************************************************************************************/
134 //custom data structure for threads to use.
135 // This is passed by void pointer so it can be any data type
136 // that can be passed using a single void pointer (LPVOID).
137 struct treeSharedData {
138     vector<SharedRAbundVector*> thisLookup;
139     vector< vector<seqDist> > calcDists;
140     vector<string>  Estimators;
141         unsigned long long start;
142         unsigned long long end;
143         MothurOut* m;
144     int count;
145         
146         treeSharedData(){}
147         treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
148                 m = mout;
149                 start = st;
150                 end = en;
151         Estimators = est;
152         thisLookup = lu;
153         count=0;
154         }
155 };
156 /**************************************************************************************************/
157 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
158 #else
159 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){ 
160         treeSharedData* pDataArray;
161         pDataArray = (treeSharedData*)lpParam;
162         
163         try {
164         
165         vector<Calculator*> treeCalculators;
166         ValidCalculators validCalculator;
167         for (int i=0; i<pDataArray->Estimators.size(); i++) {
168             if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { 
169                 if (pDataArray->Estimators[i] == "sharedsobs") { 
170                     treeCalculators.push_back(new SharedSobsCS());
171                 }else if (pDataArray->Estimators[i] == "sharedchao") { 
172                     treeCalculators.push_back(new SharedChao1());
173                 }else if (pDataArray->Estimators[i] == "sharedace") { 
174                     treeCalculators.push_back(new SharedAce());
175                 }else if (pDataArray->Estimators[i] == "jabund") {      
176                     treeCalculators.push_back(new JAbund());
177                 }else if (pDataArray->Estimators[i] == "sorabund") { 
178                     treeCalculators.push_back(new SorAbund());
179                 }else if (pDataArray->Estimators[i] == "jclass") { 
180                     treeCalculators.push_back(new Jclass());
181                 }else if (pDataArray->Estimators[i] == "sorclass") { 
182                     treeCalculators.push_back(new SorClass());
183                 }else if (pDataArray->Estimators[i] == "jest") { 
184                     treeCalculators.push_back(new Jest());
185                 }else if (pDataArray->Estimators[i] == "sorest") { 
186                     treeCalculators.push_back(new SorEst());
187                 }else if (pDataArray->Estimators[i] == "thetayc") { 
188                     treeCalculators.push_back(new ThetaYC());
189                 }else if (pDataArray->Estimators[i] == "thetan") { 
190                     treeCalculators.push_back(new ThetaN());
191                 }else if (pDataArray->Estimators[i] == "kstest") { 
192                     treeCalculators.push_back(new KSTest());
193                 }else if (pDataArray->Estimators[i] == "sharednseqs") { 
194                     treeCalculators.push_back(new SharedNSeqs());
195                 }else if (pDataArray->Estimators[i] == "ochiai") { 
196                     treeCalculators.push_back(new Ochiai());
197                 }else if (pDataArray->Estimators[i] == "anderberg") { 
198                     treeCalculators.push_back(new Anderberg());
199                 }else if (pDataArray->Estimators[i] == "kulczynski") { 
200                     treeCalculators.push_back(new Kulczynski());
201                 }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
202                     treeCalculators.push_back(new KulczynskiCody());
203                 }else if (pDataArray->Estimators[i] == "lennon") { 
204                     treeCalculators.push_back(new Lennon());
205                 }else if (pDataArray->Estimators[i] == "morisitahorn") { 
206                     treeCalculators.push_back(new MorHorn());
207                 }else if (pDataArray->Estimators[i] == "braycurtis") { 
208                     treeCalculators.push_back(new BrayCurtis());
209                 }else if (pDataArray->Estimators[i] == "whittaker") { 
210                     treeCalculators.push_back(new Whittaker());
211                 }else if (pDataArray->Estimators[i] == "odum") { 
212                     treeCalculators.push_back(new Odum());
213                 }else if (pDataArray->Estimators[i] == "canberra") { 
214                     treeCalculators.push_back(new Canberra());
215                 }else if (pDataArray->Estimators[i] == "structeuclidean") { 
216                     treeCalculators.push_back(new StructEuclidean());
217                 }else if (pDataArray->Estimators[i] == "structchord") { 
218                     treeCalculators.push_back(new StructChord());
219                 }else if (pDataArray->Estimators[i] == "hellinger") { 
220                     treeCalculators.push_back(new Hellinger());
221                 }else if (pDataArray->Estimators[i] == "manhattan") { 
222                     treeCalculators.push_back(new Manhattan());
223                 }else if (pDataArray->Estimators[i] == "structpearson") { 
224                     treeCalculators.push_back(new StructPearson());
225                 }else if (pDataArray->Estimators[i] == "soergel") { 
226                     treeCalculators.push_back(new Soergel());
227                 }else if (pDataArray->Estimators[i] == "spearman") { 
228                     treeCalculators.push_back(new Spearman());
229                 }else if (pDataArray->Estimators[i] == "structkulczynski") { 
230                     treeCalculators.push_back(new StructKulczynski());
231                 }else if (pDataArray->Estimators[i] == "speciesprofile") { 
232                     treeCalculators.push_back(new SpeciesProfile());
233                 }else if (pDataArray->Estimators[i] == "hamming") { 
234                     treeCalculators.push_back(new Hamming());
235                 }else if (pDataArray->Estimators[i] == "structchi2") { 
236                     treeCalculators.push_back(new StructChi2());
237                 }else if (pDataArray->Estimators[i] == "gower") { 
238                     treeCalculators.push_back(new Gower());
239                 }else if (pDataArray->Estimators[i] == "memchi2") { 
240                     treeCalculators.push_back(new MemChi2());
241                 }else if (pDataArray->Estimators[i] == "memchord") { 
242                     treeCalculators.push_back(new MemChord());
243                 }else if (pDataArray->Estimators[i] == "memeuclidean") { 
244                     treeCalculators.push_back(new MemEuclidean());
245                 }else if (pDataArray->Estimators[i] == "mempearson") { 
246                     treeCalculators.push_back(new MemPearson());
247                 }
248             }
249         }
250         
251         pDataArray->calcDists.resize(treeCalculators.size());
252         
253                 vector<SharedRAbundVector*> subset;
254                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
255                         
256             pDataArray->count++;
257             
258                         for (int l = 0; l < k; l++) {
259                                 
260                                 if (k != l) { //we dont need to similiarity of a groups to itself
261                                         subset.clear(); //clear out old pair of sharedrabunds
262                                         //add new pair of sharedrabunds
263                                         subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
264                                         
265                                         for(int i=0;i<treeCalculators.size();i++) {
266                                                 
267                                                 //if this calc needs all groups to calculate the pair load all groups
268                                                 if (treeCalculators[i]->getNeedsAll()) { 
269                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
270                                                         for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
271                                                                 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
272                                                         }
273                                                 }
274                                                 
275                                                 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
276                                                 
277                                                 if (pDataArray->m->control_pressed) { return 1; }
278                                                 
279                                                 seqDist temp(l, k, -(tempdata[0]-1.0));
280                                                 pDataArray->calcDists[i].push_back(temp);
281                                         }
282                                 }
283                         }
284                 }
285         
286         for(int i=0;i<treeCalculators.size();i++){  delete treeCalculators[i]; }
287                 
288                 return 0;
289                 
290         }
291         catch(exception& e) {
292                 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");
293                 exit(1);
294         }
295
296 #endif
297
298
299         
300 #endif
301
302