]> git.donarmstrong.com Git - mothur.git/blob - treegroupscommand.h
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / treegroupscommand.h
1 #ifndef TREEGROUPCOMMAND_H
2 #define TREEGROUPCOMMAND_H
3
4 /*
5  *  treegroupscommand.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 4/8/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */
12  
13 #include "command.hpp"
14 #include "inputdata.h"
15 #include "groupmap.h"
16 #include "validcalculator.h"
17 #include "tree.h"
18 #include "treemap.h"
19 #include "readmatrix.hpp"
20 #include "readcolumn.h"
21 #include "readphylip.h"
22 #include "sparsematrix.hpp"
23 #include "sharedsobscollectsummary.h"
24 #include "sharedchao1.h"
25 #include "sharedace.h"
26 #include "sharednseqs.h"
27 #include "sharedjabund.h"
28 #include "sharedsorabund.h"
29 #include "sharedjclass.h"
30 #include "sharedsorclass.h"
31 #include "sharedjest.h"
32 #include "sharedsorest.h"
33 #include "sharedthetayc.h"
34 #include "sharedthetan.h"
35 #include "sharedkstest.h"
36 #include "whittaker.h"
37 #include "sharedochiai.h"
38 #include "sharedanderbergs.h"
39 #include "sharedkulczynski.h"
40 #include "sharedkulczynskicody.h"
41 #include "sharedlennon.h"
42 #include "sharedmorisitahorn.h"
43 #include "sharedbraycurtis.h"
44 #include "sharedjackknife.h"
45 #include "whittaker.h"
46 #include "odum.h"
47 #include "canberra.h"
48 #include "structeuclidean.h"
49 #include "structchord.h"
50 #include "hellinger.h"
51 #include "manhattan.h"
52 #include "structpearson.h"
53 #include "soergel.h"
54 #include "spearman.h"
55 #include "structkulczynski.h"
56 #include "structchi2.h"
57 #include "speciesprofile.h"
58 #include "hamming.h"
59 #include "gower.h"
60 #include "memchi2.h"
61 #include "memchord.h"
62 #include "memeuclidean.h"
63 #include "mempearson.h"
64
65
66
67 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. 
68         The user can select the lines or labels they wish to use as well as the groups they would like included.
69         They can also use as many or as few calculators as they wish. */
70         
71
72 typedef list<PCell>::iterator MatData;
73
74 class TreeGroupCommand : public Command {
75         
76 public:
77         TreeGroupCommand(string);       
78         TreeGroupCommand();
79         ~TreeGroupCommand();
80         
81         vector<string> setParameters();
82         string getCommandName()                 { return "tree.shared";                         }
83         string getCommandCategory()             { return "OTU-Based Approaches";        }
84         string getOutputFileNameTag(string, string);
85         string getHelpString(); 
86         string getCitation() { return "http://www.mothur.org/wiki/Tree.shared"; }
87         string getDescription()         { return "generate a tree file that describes the dissimilarity among groups"; }
88
89         
90         int execute(); 
91         void help() { m->mothurOut(getHelpString()); }  
92         
93 private:
94     
95     struct linePair {
96                 int start;
97                 int end;
98         };
99         vector<linePair> lines;
100     
101         Tree* createTree(vector< vector<double> >&);
102         void printSims(ostream&, vector< vector<double> >&);
103         int makeSimsShared();
104         vector< vector<double> > makeSimsDist();
105     int writeTree(string, Tree*);
106     int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
107         
108         ReadMatrix* readMatrix;
109         SparseMatrix* matrix;
110         NameAssignment* nameMap;
111         ListVector* list;
112         TreeMap* tmap;
113         Tree* t;
114     InputData* input;
115         vector<Calculator*> treeCalculators;
116         vector<SharedRAbundVector*> lookup;
117         string lastLabel;
118         string format, groupNames, filename, sharedfile, inputfile;
119         int numGroups, subsampleSize, iters, processors;
120         ofstream out;
121         float precision, cutoff;
122
123         bool abort, allLines, subsample;
124         set<string> labels; //holds labels to be used
125         string phylipfile, columnfile, namefile, calc, groups, label, outputDir;
126         vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
127         
128         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
129         int process(vector<SharedRAbundVector*>);
130         
131         
132
133 };
134
135 /**************************************************************************************************/
136 //custom data structure for threads to use.
137 // This is passed by void pointer so it can be any data type
138 // that can be passed using a single void pointer (LPVOID).
139 struct treeSharedData {
140     vector<SharedRAbundVector*> thisLookup;
141     vector< vector<seqDist> > calcDists;
142     vector<string>  Estimators;
143         unsigned long long start;
144         unsigned long long end;
145         MothurOut* m;
146         
147         treeSharedData(){}
148         treeSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
149                 m = mout;
150                 start = st;
151                 end = en;
152         Estimators = est;
153         thisLookup = lu;
154         }
155 };
156 /**************************************************************************************************/
157 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
158 #else
159 static DWORD WINAPI MyTreeSharedThreadFunction(LPVOID lpParam){ 
160         treeSharedData* pDataArray;
161         pDataArray = (treeSharedData*)lpParam;
162         
163         try {
164         
165         vector<Calculator*> treeCalculators;
166         ValidCalculators validCalculator;
167         for (int i=0; i<pDataArray->Estimators.size(); i++) {
168             if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { 
169                 if (pDataArray->Estimators[i] == "sharedsobs") { 
170                     treeCalculators.push_back(new SharedSobsCS());
171                 }else if (pDataArray->Estimators[i] == "sharedchao") { 
172                     treeCalculators.push_back(new SharedChao1());
173                 }else if (pDataArray->Estimators[i] == "sharedace") { 
174                     treeCalculators.push_back(new SharedAce());
175                 }else if (pDataArray->Estimators[i] == "jabund") {      
176                     treeCalculators.push_back(new JAbund());
177                 }else if (pDataArray->Estimators[i] == "sorabund") { 
178                     treeCalculators.push_back(new SorAbund());
179                 }else if (pDataArray->Estimators[i] == "jclass") { 
180                     treeCalculators.push_back(new Jclass());
181                 }else if (pDataArray->Estimators[i] == "sorclass") { 
182                     treeCalculators.push_back(new SorClass());
183                 }else if (pDataArray->Estimators[i] == "jest") { 
184                     treeCalculators.push_back(new Jest());
185                 }else if (pDataArray->Estimators[i] == "sorest") { 
186                     treeCalculators.push_back(new SorEst());
187                 }else if (pDataArray->Estimators[i] == "thetayc") { 
188                     treeCalculators.push_back(new ThetaYC());
189                 }else if (pDataArray->Estimators[i] == "thetan") { 
190                     treeCalculators.push_back(new ThetaN());
191                 }else if (pDataArray->Estimators[i] == "kstest") { 
192                     treeCalculators.push_back(new KSTest());
193                 }else if (pDataArray->Estimators[i] == "sharednseqs") { 
194                     treeCalculators.push_back(new SharedNSeqs());
195                 }else if (pDataArray->Estimators[i] == "ochiai") { 
196                     treeCalculators.push_back(new Ochiai());
197                 }else if (pDataArray->Estimators[i] == "anderberg") { 
198                     treeCalculators.push_back(new Anderberg());
199                 }else if (pDataArray->Estimators[i] == "kulczynski") { 
200                     treeCalculators.push_back(new Kulczynski());
201                 }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
202                     treeCalculators.push_back(new KulczynskiCody());
203                 }else if (pDataArray->Estimators[i] == "lennon") { 
204                     treeCalculators.push_back(new Lennon());
205                 }else if (pDataArray->Estimators[i] == "morisitahorn") { 
206                     treeCalculators.push_back(new MorHorn());
207                 }else if (pDataArray->Estimators[i] == "braycurtis") { 
208                     treeCalculators.push_back(new BrayCurtis());
209                 }else if (pDataArray->Estimators[i] == "whittaker") { 
210                     treeCalculators.push_back(new Whittaker());
211                 }else if (pDataArray->Estimators[i] == "odum") { 
212                     treeCalculators.push_back(new Odum());
213                 }else if (pDataArray->Estimators[i] == "canberra") { 
214                     treeCalculators.push_back(new Canberra());
215                 }else if (pDataArray->Estimators[i] == "structeuclidean") { 
216                     treeCalculators.push_back(new StructEuclidean());
217                 }else if (pDataArray->Estimators[i] == "structchord") { 
218                     treeCalculators.push_back(new StructChord());
219                 }else if (pDataArray->Estimators[i] == "hellinger") { 
220                     treeCalculators.push_back(new Hellinger());
221                 }else if (pDataArray->Estimators[i] == "manhattan") { 
222                     treeCalculators.push_back(new Manhattan());
223                 }else if (pDataArray->Estimators[i] == "structpearson") { 
224                     treeCalculators.push_back(new StructPearson());
225                 }else if (pDataArray->Estimators[i] == "soergel") { 
226                     treeCalculators.push_back(new Soergel());
227                 }else if (pDataArray->Estimators[i] == "spearman") { 
228                     treeCalculators.push_back(new Spearman());
229                 }else if (pDataArray->Estimators[i] == "structkulczynski") { 
230                     treeCalculators.push_back(new StructKulczynski());
231                 }else if (pDataArray->Estimators[i] == "speciesprofile") { 
232                     treeCalculators.push_back(new SpeciesProfile());
233                 }else if (pDataArray->Estimators[i] == "hamming") { 
234                     treeCalculators.push_back(new Hamming());
235                 }else if (pDataArray->Estimators[i] == "structchi2") { 
236                     treeCalculators.push_back(new StructChi2());
237                 }else if (pDataArray->Estimators[i] == "gower") { 
238                     treeCalculators.push_back(new Gower());
239                 }else if (pDataArray->Estimators[i] == "memchi2") { 
240                     treeCalculators.push_back(new MemChi2());
241                 }else if (pDataArray->Estimators[i] == "memchord") { 
242                     treeCalculators.push_back(new MemChord());
243                 }else if (pDataArray->Estimators[i] == "memeuclidean") { 
244                     treeCalculators.push_back(new MemEuclidean());
245                 }else if (pDataArray->Estimators[i] == "mempearson") { 
246                     treeCalculators.push_back(new MemPearson());
247                 }
248             }
249         }
250         
251         pDataArray->calcDists.resize(treeCalculators.size());
252         
253                 vector<SharedRAbundVector*> subset;
254                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
255                         
256                         for (int l = 0; l < k; l++) {
257                                 
258                                 if (k != l) { //we dont need to similiarity of a groups to itself
259                                         subset.clear(); //clear out old pair of sharedrabunds
260                                         //add new pair of sharedrabunds
261                                         subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
262                                         
263                                         for(int i=0;i<treeCalculators.size();i++) {
264                                                 
265                                                 //if this calc needs all groups to calculate the pair load all groups
266                                                 if (treeCalculators[i]->getNeedsAll()) { 
267                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
268                                                         for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
269                                                                 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
270                                                         }
271                                                 }
272                                                 
273                                                 vector<double> tempdata = treeCalculators[i]->getValues(subset); //saves the calculator outputs
274                                                 
275                                                 if (pDataArray->m->control_pressed) { return 1; }
276                                                 
277                                                 seqDist temp(l, k, -(tempdata[0]-1.0));
278                                                 pDataArray->calcDists[i].push_back(temp);
279                                         }
280                                 }
281                         }
282                 }
283         
284         for(int i=0;i<treeCalculators.size();i++){  delete treeCalculators[i]; }
285                 
286                 return 0;
287                 
288         }
289         catch(exception& e) {
290                 pDataArray->m->errorOut(e, "TreeGroupsCommand", "MyTreeSharedThreadFunction");
291                 exit(1);
292         }
293
294 #endif
295
296
297         
298 #endif
299
300