]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.h
d3c1b3e47615fb6d3b342c8d41758aa7b9d9178a
[mothur.git] / treegroupscommand.h
1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
3
4 /*
5  *  treegroupscommand.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 4/8/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12  
13 #include "command.hpp"
14 #include "inputdata.h"
15 #include "groupmap.h"
16 #include "validcalculator.h"
17 #include "tree.h"
18 #include "treemap.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sparsematrix.hpp"
23 #include "sharedsobscollectsummary.h"
24 #include "sharedchao1.h"
25 #include "sharedace.h"
26 #include "sharednseqs.h"
27 #include "sharedjabund.h"
28 #include "sharedsorabund.h"
29 #include "sharedjclass.h"
30 #include "sharedsorclass.h"
31 #include "sharedjest.h"
32 #include "sharedsorest.h"
33 #include "sharedthetayc.h"
34 #include "sharedthetan.h"
35 #include "sharedkstest.h"
36 #include "whittaker.h"
37 #include "sharedochiai.h"
38 #include "sharedanderbergs.h"
39 #include "sharedkulczynski.h"
40 #include "sharedkulczynskicody.h"
41 #include "sharedlennon.h"
42 #include "sharedmorisitahorn.h"
43 #include "sharedbraycurtis.h"
44 #include "sharedjackknife.h"
45 #include "whittaker.h"
46 #include "odum.h"
47 #include "canberra.h"
48 #include "structeuclidean.h"
49 #include "structchord.h"
50 #include "hellinger.h"
51 #include "manhattan.h"
52 #include "structpearson.h"
53 #include "soergel.h"
54 #include "spearman.h"
55 #include "structkulczynski.h"
56 #include "structchi2.h"
57 #include "speciesprofile.h"
58 #include "hamming.h"
59 #include "gower.h"
60 #include "memchi2.h"
61 #include "memchord.h"
62 #include "memeuclidean.h"
63 #include "mempearson.h"
64
65
66
67 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. 
68         The user can select the lines or labels they wish to use as well as the groups they would like included.
69         They can also use as many or as few calculators as they wish. */
70         
71
72 typedef list<PCell>::iterator MatData;
73
74 class TreeGroupCommand : public Command {
75         
76 public:
77         TreeGroupCommand(string);       
78         TreeGroupCommand();
79         ~TreeGroupCommand();
80         
81         vector<string> setParameters();
82         string getCommandName()                 { return "tree.shared";                         }
83         string getCommandCategory()             { return "OTU-Based Approaches";        }
84         string getHelpString(); 
85         string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
86         string getDescription()         { return "generate a tree file that describes the dissimilarity among groups"; }
87
88         
89         int execute(); 
90         void help() { m->mothurOut(getHelpString()); }  
91         
92 private:
93     
94     struct linePair {
95                 int start;
96                 int end;
97         };
98         vector<linePair> lines;
99     
100         Tree* createTree(vector< vector<double> >&);
101         void printSims(ostream&, vector< vector<double> >&);
102         int makeSimsShared();
103         vector< vector<double> > makeSimsDist();
104     int writeTree(string, Tree*);
105     int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
106         
107         ReadMatrix* readMatrix;
108         SparseMatrix* matrix;
109         NameAssignment* nameMap;
110         ListVector* list;
111         TreeMap* tmap;
112         Tree* t;
113     InputData* input;
114         vector<Calculator*> treeCalculators;
115         vector<SharedRAbundVector*> lookup;
116         string lastLabel;
117         string format, groupNames, filename, sharedfile, inputfile;
118         int numGroups, subsampleSize, iters, processors;
119         ofstream out;
120         float precision, cutoff;
121
122         bool abort, allLines, subsample;
123         set<string> labels; //holds labels to be used
124         string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
125         vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
126         
127         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
128         int process(vector<SharedRAbundVector*>);
129         
130         
131
132 };
133
134 /**************************************************************************************************/
135 //custom data structure for threads to use.
136 // This is passed by void pointer so it can be any data type
137 // that can be passed using a single void pointer (LPVOID).
138 struct treeSharedData {
139     vector<SharedRAbundVector*> thisLookup;
140     vector< vector<seqDist> > calcDists;
141     vector<string>  Estimators;
142         unsigned long long start;
143         unsigned long long end;
144         MothurOut* m;
145         
146         treeSharedData(){}
147         treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
148                 m = mout;
149                 start = st;
150                 end = en;
151         Estimators = est;
152         thisLookup = lu;
153         }
154 };
155 /**************************************************************************************************/
156 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
157 #else
158 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){ 
159         treeSharedData* pDataArray;
160         pDataArray = (treeSharedData*)lpParam;
161         
162         try {
163         
164         vector<Calculator*> treeCalculators;
165         ValidCalculators validCalculator;
166         for (int i=0; i<pDataArray->Estimators.size(); i++) {
167             if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { 
168                 if (pDataArray->Estimators[i] == "sharedsobs") { 
169                     treeCalculators.push_back(new SharedSobsCS());
170                 }else if (pDataArray->Estimators[i] == "sharedchao") { 
171                     treeCalculators.push_back(new SharedChao1());
172                 }else if (pDataArray->Estimators[i] == "sharedace") { 
173                     treeCalculators.push_back(new SharedAce());
174                 }else if (pDataArray->Estimators[i] == "jabund") {      
175                     treeCalculators.push_back(new JAbund());
176                 }else if (pDataArray->Estimators[i] == "sorabund") { 
177                     treeCalculators.push_back(new SorAbund());
178                 }else if (pDataArray->Estimators[i] == "jclass") { 
179                     treeCalculators.push_back(new Jclass());
180                 }else if (pDataArray->Estimators[i] == "sorclass") { 
181                     treeCalculators.push_back(new SorClass());
182                 }else if (pDataArray->Estimators[i] == "jest") { 
183                     treeCalculators.push_back(new Jest());
184                 }else if (pDataArray->Estimators[i] == "sorest") { 
185                     treeCalculators.push_back(new SorEst());
186                 }else if (pDataArray->Estimators[i] == "thetayc") { 
187                     treeCalculators.push_back(new ThetaYC());
188                 }else if (pDataArray->Estimators[i] == "thetan") { 
189                     treeCalculators.push_back(new ThetaN());
190                 }else if (pDataArray->Estimators[i] == "kstest") { 
191                     treeCalculators.push_back(new KSTest());
192                 }else if (pDataArray->Estimators[i] == "sharednseqs") { 
193                     treeCalculators.push_back(new SharedNSeqs());
194                 }else if (pDataArray->Estimators[i] == "ochiai") { 
195                     treeCalculators.push_back(new Ochiai());
196                 }else if (pDataArray->Estimators[i] == "anderberg") { 
197                     treeCalculators.push_back(new Anderberg());
198                 }else if (pDataArray->Estimators[i] == "kulczynski") { 
199                     treeCalculators.push_back(new Kulczynski());
200                 }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
201                     treeCalculators.push_back(new KulczynskiCody());
202                 }else if (pDataArray->Estimators[i] == "lennon") { 
203                     treeCalculators.push_back(new Lennon());
204                 }else if (pDataArray->Estimators[i] == "morisitahorn") { 
205                     treeCalculators.push_back(new MorHorn());
206                 }else if (pDataArray->Estimators[i] == "braycurtis") { 
207                     treeCalculators.push_back(new BrayCurtis());
208                 }else if (pDataArray->Estimators[i] == "whittaker") { 
209                     treeCalculators.push_back(new Whittaker());
210                 }else if (pDataArray->Estimators[i] == "odum") { 
211                     treeCalculators.push_back(new Odum());
212                 }else if (pDataArray->Estimators[i] == "canberra") { 
213                     treeCalculators.push_back(new Canberra());
214                 }else if (pDataArray->Estimators[i] == "structeuclidean") { 
215                     treeCalculators.push_back(new StructEuclidean());
216                 }else if (pDataArray->Estimators[i] == "structchord") { 
217                     treeCalculators.push_back(new StructChord());
218                 }else if (pDataArray->Estimators[i] == "hellinger") { 
219                     treeCalculators.push_back(new Hellinger());
220                 }else if (pDataArray->Estimators[i] == "manhattan") { 
221                     treeCalculators.push_back(new Manhattan());
222                 }else if (pDataArray->Estimators[i] == "structpearson") { 
223                     treeCalculators.push_back(new StructPearson());
224                 }else if (pDataArray->Estimators[i] == "soergel") { 
225                     treeCalculators.push_back(new Soergel());
226                 }else if (pDataArray->Estimators[i] == "spearman") { 
227                     treeCalculators.push_back(new Spearman());
228                 }else if (pDataArray->Estimators[i] == "structkulczynski") { 
229                     treeCalculators.push_back(new StructKulczynski());
230                 }else if (pDataArray->Estimators[i] == "speciesprofile") { 
231                     treeCalculators.push_back(new SpeciesProfile());
232                 }else if (pDataArray->Estimators[i] == "hamming") { 
233                     treeCalculators.push_back(new Hamming());
234                 }else if (pDataArray->Estimators[i] == "structchi2") { 
235                     treeCalculators.push_back(new StructChi2());
236                 }else if (pDataArray->Estimators[i] == "gower") { 
237                     treeCalculators.push_back(new Gower());
238                 }else if (pDataArray->Estimators[i] == "memchi2") { 
239                     treeCalculators.push_back(new MemChi2());
240                 }else if (pDataArray->Estimators[i] == "memchord") { 
241                     treeCalculators.push_back(new MemChord());
242                 }else if (pDataArray->Estimators[i] == "memeuclidean") { 
243                     treeCalculators.push_back(new MemEuclidean());
244                 }else if (pDataArray->Estimators[i] == "mempearson") { 
245                     treeCalculators.push_back(new MemPearson());
246                 }
247             }
248         }
249         
250         pDataArray->calcDists.resize(treeCalculators.size());
251         
252                 vector<SharedRAbundVector*> subset;
253                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
254                         
255                         for (int l = 0; l < k; l++) {
256                                 
257                                 if (k != l) { //we dont need to similiarity of a groups to itself
258                                         subset.clear(); //clear out old pair of sharedrabunds
259                                         //add new pair of sharedrabunds
260                                         subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
261                                         
262                                         for(int i=0;i<treeCalculators.size();i++) {
263                                                 
264                                                 //if this calc needs all groups to calculate the pair load all groups
265                                                 if (treeCalculators[i]->getNeedsAll()) { 
266                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
267                                                         for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
268                                                                 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
269                                                         }
270                                                 }
271                                                 
272                                                 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
273                                                 
274                                                 if (pDataArray->m->control_pressed) { return 1; }
275                                                 
276                                                 seqDist temp(l, k, -(tempdata[0]-1.0));
277                                                 pDataArray->calcDists[i].push_back(temp);
278                                         }
279                                 }
280                         }
281                 }
282         
283         for(int i=0;i<treeCalculators.size();i++){  delete treeCalculators[i]; }
284                 
285                 return 0;
286                 
287         }
288         catch(exception& e) {
289                 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");
290                 exit(1);
291         }
292
293 #endif
294
295
296         
297 #endif
298
299