]> git.donarmstrong.com Git - mothur.git/blob - matrixoutputcommand.h
added Jensen-Shannon calc. working on get.communitytype command. fixed bug in get...
[mothur.git] / matrixoutputcommand.h
1 #ifndef MATRIXOUTPUTCOMMAND_H
2 #define MATRIXOUTPUTCOMMAND_H
3
4 /*
5  *  matrixoutputcommand.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 5/20/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */ 
12 #include "command.hpp"
13 #include "inputdata.h"
14 #include "groupmap.h"
15 #include "validcalculator.h"
16 #include "sharedsobscollectsummary.h"
17 #include "sharedchao1.h"
18 #include "sharedace.h"
19 #include "sharednseqs.h"
20 #include "sharedjabund.h"
21 #include "sharedsorabund.h"
22 #include "sharedjclass.h"
23 #include "sharedsorclass.h"
24 #include "sharedjest.h"
25 #include "sharedsorest.h"
26 #include "sharedthetayc.h"
27 #include "sharedthetan.h"
28 #include "sharedkstest.h"
29 #include "whittaker.h"
30 #include "sharedochiai.h"
31 #include "sharedanderbergs.h"
32 #include "sharedkulczynski.h"
33 #include "sharedkulczynskicody.h"
34 #include "sharedlennon.h"
35 #include "sharedmorisitahorn.h"
36 #include "sharedbraycurtis.h"
37 #include "sharedjackknife.h"
38 #include "whittaker.h"
39 #include "odum.h"
40 #include "canberra.h"
41 #include "structeuclidean.h"
42 #include "structchord.h"
43 #include "hellinger.h"
44 #include "manhattan.h"
45 #include "structpearson.h"
46 #include "soergel.h"
47 #include "spearman.h"
48 #include "structkulczynski.h"
49 #include "structchi2.h"
50 #include "speciesprofile.h"
51 #include "hamming.h"
52 #include "gower.h"
53 #include "memchi2.h"
54 #include "memchord.h"
55 #include "memeuclidean.h"
56 #include "mempearson.h"
57 #include "sharedjsd.h"
58
59
60 // aka. dist.shared()
61
62 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. 
63         The user can select the labels they wish to use as well as the groups they would like included.
64         They can also use as many or as few calculators as they wish. */
65         
66
67 class MatrixOutputCommand : public Command {
68         
69 public:
70         MatrixOutputCommand(string);
71         MatrixOutputCommand();  
72         ~MatrixOutputCommand();
73         
74         vector<string> setParameters();
75         string getCommandName()                 { return "dist.shared";                         }
76         string getCommandCategory()             { return "OTU-Based Approaches";        }
77         
78         string getHelpString(); 
79     string getOutputPattern(string);    
80         string getCitation() { return "http://www.mothur.org/wiki/Dist.shared"; }
81         string getDescription()         { return "generate a distance matrix that describes the dissimilarity among multiple groups"; }
82
83         
84         int execute(); 
85         void help() { m->mothurOut(getHelpString()); }  
86         
87 private:
88         struct linePair {
89                 int start;
90                 int end;
91         };
92         vector<linePair> lines;
93         
94         void printSims(ostream&, vector< vector<double> >&);
95         int process(vector<SharedRAbundVector*>);
96         
97         vector<Calculator*> matrixCalculators;
98         //vector< vector<float> > simMatrix;
99         InputData* input;
100         vector<SharedRAbundVector*> lookup;
101         string exportFileName, output, sharedfile;
102         int numGroups, processors, iters, subsampleSize;
103         ofstream out;
104
105         bool abort, allLines, subsample;
106         set<string> labels; //holds labels to be used
107         string outputFile, calc, groups, label, outputDir, mode;
108         vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
109         int process(vector<SharedRAbundVector*>, string, string);
110         int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
111
112 };
113         
114 /**************************************************************************************************/
115 //custom data structure for threads to use.
116 // This is passed by void pointer so it can be any data type
117 // that can be passed using a single void pointer (LPVOID).
118 struct distSharedData {
119     vector<SharedRAbundVector*> thisLookup;
120     vector< vector<seqDist> > calcDists;
121     vector<string>  Estimators;
122         unsigned long long start;
123         unsigned long long end;
124         MothurOut* m;
125     int count;
126         
127         distSharedData(){}
128         distSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
129                 m = mout;
130                 start = st;
131                 end = en;
132         Estimators = est;
133         thisLookup = lu;
134         count = 0;
135         }
136 };
137 /**************************************************************************************************/
138 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
139 #else
140 static DWORD WINAPI MyDistSharedThreadFunction(LPVOID lpParam){ 
141         distSharedData* pDataArray;
142         pDataArray = (distSharedData*)lpParam;
143         
144         try {
145         
146         vector<Calculator*> matrixCalculators;
147         ValidCalculators validCalculator;
148         for (int i=0; i<pDataArray->Estimators.size(); i++) {
149             if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { 
150                 if (pDataArray->Estimators[i] == "sharedsobs") { 
151                     matrixCalculators.push_back(new SharedSobsCS());
152                 }else if (pDataArray->Estimators[i] == "sharedchao") { 
153                     matrixCalculators.push_back(new SharedChao1());
154                 }else if (pDataArray->Estimators[i] == "sharedace") { 
155                     matrixCalculators.push_back(new SharedAce());
156                 }else if (pDataArray->Estimators[i] == "jabund") {      
157                     matrixCalculators.push_back(new JAbund());
158                 }else if (pDataArray->Estimators[i] == "sorabund") { 
159                     matrixCalculators.push_back(new SorAbund());
160                 }else if (pDataArray->Estimators[i] == "jclass") { 
161                     matrixCalculators.push_back(new Jclass());
162                 }else if (pDataArray->Estimators[i] == "sorclass") { 
163                     matrixCalculators.push_back(new SorClass());
164                 }else if (pDataArray->Estimators[i] == "jest") { 
165                     matrixCalculators.push_back(new Jest());
166                 }else if (pDataArray->Estimators[i] == "sorest") { 
167                     matrixCalculators.push_back(new SorEst());
168                 }else if (pDataArray->Estimators[i] == "thetayc") { 
169                     matrixCalculators.push_back(new ThetaYC());
170                 }else if (pDataArray->Estimators[i] == "thetan") { 
171                     matrixCalculators.push_back(new ThetaN());
172                 }else if (pDataArray->Estimators[i] == "kstest") { 
173                     matrixCalculators.push_back(new KSTest());
174                 }else if (pDataArray->Estimators[i] == "sharednseqs") { 
175                     matrixCalculators.push_back(new SharedNSeqs());
176                 }else if (pDataArray->Estimators[i] == "ochiai") { 
177                     matrixCalculators.push_back(new Ochiai());
178                 }else if (pDataArray->Estimators[i] == "anderberg") { 
179                     matrixCalculators.push_back(new Anderberg());
180                 }else if (pDataArray->Estimators[i] == "kulczynski") { 
181                     matrixCalculators.push_back(new Kulczynski());
182                 }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
183                     matrixCalculators.push_back(new KulczynskiCody());
184                 }else if (pDataArray->Estimators[i] == "lennon") { 
185                     matrixCalculators.push_back(new Lennon());
186                 }else if (pDataArray->Estimators[i] == "morisitahorn") { 
187                     matrixCalculators.push_back(new MorHorn());
188                 }else if (pDataArray->Estimators[i] == "braycurtis") { 
189                     matrixCalculators.push_back(new BrayCurtis());
190                 }else if (pDataArray->Estimators[i] == "whittaker") { 
191                     matrixCalculators.push_back(new Whittaker());
192                 }else if (pDataArray->Estimators[i] == "odum") { 
193                     matrixCalculators.push_back(new Odum());
194                 }else if (pDataArray->Estimators[i] == "canberra") { 
195                     matrixCalculators.push_back(new Canberra());
196                 }else if (pDataArray->Estimators[i] == "structeuclidean") { 
197                     matrixCalculators.push_back(new StructEuclidean());
198                 }else if (pDataArray->Estimators[i] == "structchord") { 
199                     matrixCalculators.push_back(new StructChord());
200                 }else if (pDataArray->Estimators[i] == "hellinger") { 
201                     matrixCalculators.push_back(new Hellinger());
202                 }else if (pDataArray->Estimators[i] == "manhattan") { 
203                     matrixCalculators.push_back(new Manhattan());
204                 }else if (pDataArray->Estimators[i] == "structpearson") { 
205                     matrixCalculators.push_back(new StructPearson());
206                 }else if (pDataArray->Estimators[i] == "soergel") { 
207                     matrixCalculators.push_back(new Soergel());
208                 }else if (pDataArray->Estimators[i] == "spearman") { 
209                     matrixCalculators.push_back(new Spearman());
210                 }else if (pDataArray->Estimators[i] == "structkulczynski") { 
211                     matrixCalculators.push_back(new StructKulczynski());
212                 }else if (pDataArray->Estimators[i] == "speciesprofile") { 
213                     matrixCalculators.push_back(new SpeciesProfile());
214                 }else if (pDataArray->Estimators[i] == "hamming") { 
215                     matrixCalculators.push_back(new Hamming());
216                 }else if (pDataArray->Estimators[i] == "structchi2") { 
217                     matrixCalculators.push_back(new StructChi2());
218                 }else if (pDataArray->Estimators[i] == "gower") { 
219                     matrixCalculators.push_back(new Gower());
220                 }else if (pDataArray->Estimators[i] == "memchi2") { 
221                     matrixCalculators.push_back(new MemChi2());
222                 }else if (pDataArray->Estimators[i] == "memchord") { 
223                     matrixCalculators.push_back(new MemChord());
224                 }else if (pDataArray->Estimators[i] == "memeuclidean") { 
225                     matrixCalculators.push_back(new MemEuclidean());
226                 }else if (pDataArray->Estimators[i] == "mempearson") { 
227                     matrixCalculators.push_back(new MemPearson());
228                 }else if (pDataArray->Estimators[i] == "jsd") {
229                     matrixCalculators.push_back(new JSD());
230                 }
231
232             }
233         }
234         
235         pDataArray->calcDists.resize(matrixCalculators.size());
236                         
237                 vector<SharedRAbundVector*> subset;
238                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
239                         pDataArray->count++;
240                         for (int l = 0; l < k; l++) {
241                                 
242                                 if (k != l) { //we dont need to similiarity of a groups to itself
243                                         subset.clear(); //clear out old pair of sharedrabunds
244                                         //add new pair of sharedrabunds
245                                         subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
246                                         
247                                         for(int i=0;i<matrixCalculators.size();i++) {
248                                                 
249                                                 //if this calc needs all groups to calculate the pair load all groups
250                                                 if (matrixCalculators[i]->getNeedsAll()) { 
251                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
252                                                         for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
253                                                                 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
254                                                         }
255                                                 }
256                                                 
257                                                 vector<double> tempdata = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
258                                                 
259                                                 if (pDataArray->m->control_pressed) { return 1; }
260                                                 
261                                                 seqDist temp(l, k, tempdata[0]);
262                                                 pDataArray->calcDists[i].push_back(temp);
263                                         }
264                                 }
265                         }
266                 }
267         
268         for(int i=0;i<matrixCalculators.size();i++){  delete matrixCalculators[i]; }
269                 
270                 return 0;
271                 
272         }
273         catch(exception& e) {
274                 pDataArray->m->errorOut(e, "MatrixOutputCommand", "MyDistSharedThreadFunction");
275                 exit(1);
276         }
277
278 #endif
279         
280 #endif
281