]> git.donarmstrong.com Git - mothur.git/blob - matrixoutputcommand.h
update .gitignore
[mothur.git] / matrixoutputcommand.h
1 #ifndef MATRIXOUTPUTCOMMAND_H
2 #define MATRIXOUTPUTCOMMAND_H
3
4 /*
5  *  matrixoutputcommand.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 5/20/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */ 
12 #include "command.hpp"
13 #include "inputdata.h"
14 #include "groupmap.h"
15 #include "validcalculator.h"
16 #include "sharedsobscollectsummary.h"
17 #include "sharedchao1.h"
18 #include "sharedace.h"
19 #include "sharednseqs.h"
20 #include "sharedjabund.h"
21 #include "sharedsorabund.h"
22 #include "sharedjclass.h"
23 #include "sharedsorclass.h"
24 #include "sharedjest.h"
25 #include "sharedsorest.h"
26 #include "sharedthetayc.h"
27 #include "sharedthetan.h"
28 #include "sharedkstest.h"
29 #include "whittaker.h"
30 #include "sharedochiai.h"
31 #include "sharedanderbergs.h"
32 #include "sharedkulczynski.h"
33 #include "sharedkulczynskicody.h"
34 #include "sharedlennon.h"
35 #include "sharedmorisitahorn.h"
36 #include "sharedbraycurtis.h"
37 #include "sharedjackknife.h"
38 #include "whittaker.h"
39 #include "odum.h"
40 #include "canberra.h"
41 #include "structeuclidean.h"
42 #include "structchord.h"
43 #include "hellinger.h"
44 #include "manhattan.h"
45 #include "structpearson.h"
46 #include "soergel.h"
47 #include "spearman.h"
48 #include "structkulczynski.h"
49 #include "structchi2.h"
50 #include "speciesprofile.h"
51 #include "hamming.h"
52 #include "gower.h"
53 #include "memchi2.h"
54 #include "memchord.h"
55 #include "memeuclidean.h"
56 #include "mempearson.h"
57 #include "sharedjsd.h"
58 #include "sharedrjsd.h"
59
60
61 // aka. dist.shared()
62
63 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. 
64         The user can select the labels they wish to use as well as the groups they would like included.
65         They can also use as many or as few calculators as they wish. */
66         
67
68 class MatrixOutputCommand : public Command {
69         
70 public:
71         MatrixOutputCommand(string);
72         MatrixOutputCommand();  
73         ~MatrixOutputCommand();
74         
75         vector<string> setParameters();
76         string getCommandName()                 { return "dist.shared";                         }
77         string getCommandCategory()             { return "OTU-Based Approaches";        }
78         
79         string getHelpString(); 
80     string getOutputPattern(string);    
81         string getCitation() { return "http://www.mothur.org/wiki/Dist.shared"; }
82         string getDescription()         { return "generate a distance matrix that describes the dissimilarity among multiple groups"; }
83
84         
85         int execute(); 
86         void help() { m->mothurOut(getHelpString()); }  
87         
88 private:
89         struct linePair {
90                 int start;
91                 int end;
92         };
93         vector<linePair> lines;
94         
95         void printSims(ostream&, vector< vector<double> >&);
96         int process(vector<SharedRAbundVector*>);
97         
98         vector<Calculator*> matrixCalculators;
99         //vector< vector<float> > simMatrix;
100         InputData* input;
101         vector<SharedRAbundVector*> lookup;
102         string exportFileName, output, sharedfile;
103         int numGroups, processors, iters, subsampleSize;
104         ofstream out;
105
106         bool abort, allLines, subsample;
107         set<string> labels; //holds labels to be used
108         string outputFile, calc, groups, label, outputDir, mode;
109         vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
110         int process(vector<SharedRAbundVector*>, string, string);
111         int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
112
113 };
114         
115 /**************************************************************************************************/
116 //custom data structure for threads to use.
117 // This is passed by void pointer so it can be any data type
118 // that can be passed using a single void pointer (LPVOID).
119 struct distSharedData {
120     vector<SharedRAbundVector*> thisLookup;
121     vector< vector<seqDist> > calcDists;
122     vector<string>  Estimators;
123         unsigned long long start;
124         unsigned long long end;
125         MothurOut* m;
126     int count;
127         
128         distSharedData(){}
129         distSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
130                 m = mout;
131                 start = st;
132                 end = en;
133         Estimators = est;
134         thisLookup = lu;
135         count = 0;
136         }
137 };
138 /**************************************************************************************************/
139 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
140 #else
141 static DWORD WINAPI MyDistSharedThreadFunction(LPVOID lpParam){ 
142         distSharedData* pDataArray;
143         pDataArray = (distSharedData*)lpParam;
144         
145         try {
146         
147         vector<Calculator*> matrixCalculators;
148         ValidCalculators validCalculator;
149         for (int i=0; i<pDataArray->Estimators.size(); i++) {
150             if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { 
151                 if (pDataArray->Estimators[i] == "sharedsobs") { 
152                     matrixCalculators.push_back(new SharedSobsCS());
153                 }else if (pDataArray->Estimators[i] == "sharedchao") { 
154                     matrixCalculators.push_back(new SharedChao1());
155                 }else if (pDataArray->Estimators[i] == "sharedace") { 
156                     matrixCalculators.push_back(new SharedAce());
157                 }else if (pDataArray->Estimators[i] == "jabund") {      
158                     matrixCalculators.push_back(new JAbund());
159                 }else if (pDataArray->Estimators[i] == "sorabund") { 
160                     matrixCalculators.push_back(new SorAbund());
161                 }else if (pDataArray->Estimators[i] == "jclass") { 
162                     matrixCalculators.push_back(new Jclass());
163                 }else if (pDataArray->Estimators[i] == "sorclass") { 
164                     matrixCalculators.push_back(new SorClass());
165                 }else if (pDataArray->Estimators[i] == "jest") { 
166                     matrixCalculators.push_back(new Jest());
167                 }else if (pDataArray->Estimators[i] == "sorest") { 
168                     matrixCalculators.push_back(new SorEst());
169                 }else if (pDataArray->Estimators[i] == "thetayc") { 
170                     matrixCalculators.push_back(new ThetaYC());
171                 }else if (pDataArray->Estimators[i] == "thetan") { 
172                     matrixCalculators.push_back(new ThetaN());
173                 }else if (pDataArray->Estimators[i] == "kstest") { 
174                     matrixCalculators.push_back(new KSTest());
175                 }else if (pDataArray->Estimators[i] == "sharednseqs") { 
176                     matrixCalculators.push_back(new SharedNSeqs());
177                 }else if (pDataArray->Estimators[i] == "ochiai") { 
178                     matrixCalculators.push_back(new Ochiai());
179                 }else if (pDataArray->Estimators[i] == "anderberg") { 
180                     matrixCalculators.push_back(new Anderberg());
181                 }else if (pDataArray->Estimators[i] == "kulczynski") { 
182                     matrixCalculators.push_back(new Kulczynski());
183                 }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
184                     matrixCalculators.push_back(new KulczynskiCody());
185                 }else if (pDataArray->Estimators[i] == "lennon") { 
186                     matrixCalculators.push_back(new Lennon());
187                 }else if (pDataArray->Estimators[i] == "morisitahorn") { 
188                     matrixCalculators.push_back(new MorHorn());
189                 }else if (pDataArray->Estimators[i] == "braycurtis") { 
190                     matrixCalculators.push_back(new BrayCurtis());
191                 }else if (pDataArray->Estimators[i] == "whittaker") { 
192                     matrixCalculators.push_back(new Whittaker());
193                 }else if (pDataArray->Estimators[i] == "odum") { 
194                     matrixCalculators.push_back(new Odum());
195                 }else if (pDataArray->Estimators[i] == "canberra") { 
196                     matrixCalculators.push_back(new Canberra());
197                 }else if (pDataArray->Estimators[i] == "structeuclidean") { 
198                     matrixCalculators.push_back(new StructEuclidean());
199                 }else if (pDataArray->Estimators[i] == "structchord") { 
200                     matrixCalculators.push_back(new StructChord());
201                 }else if (pDataArray->Estimators[i] == "hellinger") { 
202                     matrixCalculators.push_back(new Hellinger());
203                 }else if (pDataArray->Estimators[i] == "manhattan") { 
204                     matrixCalculators.push_back(new Manhattan());
205                 }else if (pDataArray->Estimators[i] == "structpearson") { 
206                     matrixCalculators.push_back(new StructPearson());
207                 }else if (pDataArray->Estimators[i] == "soergel") { 
208                     matrixCalculators.push_back(new Soergel());
209                 }else if (pDataArray->Estimators[i] == "spearman") { 
210                     matrixCalculators.push_back(new Spearman());
211                 }else if (pDataArray->Estimators[i] == "structkulczynski") { 
212                     matrixCalculators.push_back(new StructKulczynski());
213                 }else if (pDataArray->Estimators[i] == "speciesprofile") { 
214                     matrixCalculators.push_back(new SpeciesProfile());
215                 }else if (pDataArray->Estimators[i] == "hamming") { 
216                     matrixCalculators.push_back(new Hamming());
217                 }else if (pDataArray->Estimators[i] == "structchi2") { 
218                     matrixCalculators.push_back(new StructChi2());
219                 }else if (pDataArray->Estimators[i] == "gower") { 
220                     matrixCalculators.push_back(new Gower());
221                 }else if (pDataArray->Estimators[i] == "memchi2") { 
222                     matrixCalculators.push_back(new MemChi2());
223                 }else if (pDataArray->Estimators[i] == "memchord") { 
224                     matrixCalculators.push_back(new MemChord());
225                 }else if (pDataArray->Estimators[i] == "memeuclidean") { 
226                     matrixCalculators.push_back(new MemEuclidean());
227                 }else if (pDataArray->Estimators[i] == "mempearson") { 
228                     matrixCalculators.push_back(new MemPearson());
229                 }else if (pDataArray->Estimators[i] == "jsd") {
230                     matrixCalculators.push_back(new JSD());
231                 }else if (pDataArray->Estimators[i] == "rjsd") {
232                     matrixCalculators.push_back(new RJSD());
233                 }
234
235             }
236         }
237         
238         pDataArray->calcDists.resize(matrixCalculators.size());
239                         
240                 vector<SharedRAbundVector*> subset;
241                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
242                         pDataArray->count++;
243                         for (int l = 0; l < k; l++) {
244                                 
245                                 if (k != l) { //we dont need to similiarity of a groups to itself
246                                         subset.clear(); //clear out old pair of sharedrabunds
247                                         //add new pair of sharedrabunds
248                                         subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
249                                         
250                                         for(int i=0;i<matrixCalculators.size();i++) {
251                                                 
252                                                 //if this calc needs all groups to calculate the pair load all groups
253                                                 if (matrixCalculators[i]->getNeedsAll()) { 
254                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
255                                                         for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
256                                                                 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
257                                                         }
258                                                 }
259                                                 
260                                                 vector<double> tempdata = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
261                                                 
262                                                 if (pDataArray->m->control_pressed) { return 1; }
263                                                 
264                                                 seqDist temp(l, k, tempdata[0]);
265                                                 pDataArray->calcDists[i].push_back(temp);
266                                         }
267                                 }
268                         }
269                 }
270         
271         for(int i=0;i<matrixCalculators.size();i++){  delete matrixCalculators[i]; }
272                 
273                 return 0;
274                 
275         }
276         catch(exception& e) {
277                 pDataArray->m->errorOut(e, "MatrixOutputCommand", "MyDistSharedThreadFunction");
278                 exit(1);
279         }
280
281 #endif
282         
283 #endif
284