1 #ifndef MATRIXOUTPUTCOMMAND_H
2 #define MATRIXOUTPUTCOMMAND_H
5 * matrixoutputcommand.h
8 * Created by Sarah Westcott on 5/20/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12 #include "command.hpp"
13 #include "inputdata.h"
15 #include "validcalculator.h"
16 #include "sharedsobscollectsummary.h"
17 #include "sharedchao1.h"
18 #include "sharedace.h"
19 #include "sharednseqs.h"
20 #include "sharedjabund.h"
21 #include "sharedsorabund.h"
22 #include "sharedjclass.h"
23 #include "sharedsorclass.h"
24 #include "sharedjest.h"
25 #include "sharedsorest.h"
26 #include "sharedthetayc.h"
27 #include "sharedthetan.h"
28 #include "sharedkstest.h"
29 #include "whittaker.h"
30 #include "sharedochiai.h"
31 #include "sharedanderbergs.h"
32 #include "sharedkulczynski.h"
33 #include "sharedkulczynskicody.h"
34 #include "sharedlennon.h"
35 #include "sharedmorisitahorn.h"
36 #include "sharedbraycurtis.h"
37 #include "sharedjackknife.h"
38 #include "whittaker.h"
41 #include "structeuclidean.h"
42 #include "structchord.h"
43 #include "hellinger.h"
44 #include "manhattan.h"
45 #include "structpearson.h"
48 #include "structkulczynski.h"
49 #include "structchi2.h"
50 #include "speciesprofile.h"
55 #include "memeuclidean.h"
56 #include "mempearson.h"
61 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
62 The user can select the labels they wish to use as well as the groups they would like included.
63 They can also use as many or as few calculators as they wish. */
66 class MatrixOutputCommand : public Command {
69 MatrixOutputCommand(string);
70 MatrixOutputCommand();
71 ~MatrixOutputCommand();
73 vector<string> setParameters();
74 string getCommandName() { return "dist.shared"; }
75 string getCommandCategory() { return "OTU-Based Approaches"; }
76 string getOutputFileNameTag(string, string);
77 string getHelpString();
78 string getCitation() { return "http://www.mothur.org/wiki/Dist.shared"; }
79 string getDescription() { return "generate a distance matrix that describes the dissimilarity among multiple groups"; }
83 void help() { m->mothurOut(getHelpString()); }
90 vector<linePair> lines;
92 void printSims(ostream&, vector< vector<double> >&);
93 int process(vector<SharedRAbundVector*>);
95 vector<Calculator*> matrixCalculators;
96 //vector< vector<float> > simMatrix;
98 vector<SharedRAbundVector*> lookup;
99 string exportFileName, output, sharedfile;
100 int numGroups, processors, iters, subsampleSize;
103 bool abort, allLines, subsample;
104 set<string> labels; //holds labels to be used
105 string outputFile, calc, groups, label, outputDir, mode;
106 vector<string> Estimators, Groups, outputNames; //holds estimators to be used
107 int process(vector<SharedRAbundVector*>, string, string);
108 int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
112 /**************************************************************************************************/
113 //custom data structure for threads to use.
114 // This is passed by void pointer so it can be any data type
115 // that can be passed using a single void pointer (LPVOID).
116 struct distSharedData {
117 vector<SharedRAbundVector*> thisLookup;
118 vector< vector<seqDist> > calcDists;
119 vector<string> Estimators;
120 unsigned long long start;
121 unsigned long long end;
125 distSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
133 /**************************************************************************************************/
134 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
136 static DWORD WINAPI MyDistSharedThreadFunction(LPVOID lpParam){
137 distSharedData* pDataArray;
138 pDataArray = (distSharedData*)lpParam;
142 vector<Calculator*> matrixCalculators;
143 ValidCalculators validCalculator;
144 for (int i=0; i<pDataArray->Estimators.size(); i++) {
145 if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) {
146 if (pDataArray->Estimators[i] == "sharedsobs") {
147 matrixCalculators.push_back(new SharedSobsCS());
148 }else if (pDataArray->Estimators[i] == "sharedchao") {
149 matrixCalculators.push_back(new SharedChao1());
150 }else if (pDataArray->Estimators[i] == "sharedace") {
151 matrixCalculators.push_back(new SharedAce());
152 }else if (pDataArray->Estimators[i] == "jabund") {
153 matrixCalculators.push_back(new JAbund());
154 }else if (pDataArray->Estimators[i] == "sorabund") {
155 matrixCalculators.push_back(new SorAbund());
156 }else if (pDataArray->Estimators[i] == "jclass") {
157 matrixCalculators.push_back(new Jclass());
158 }else if (pDataArray->Estimators[i] == "sorclass") {
159 matrixCalculators.push_back(new SorClass());
160 }else if (pDataArray->Estimators[i] == "jest") {
161 matrixCalculators.push_back(new Jest());
162 }else if (pDataArray->Estimators[i] == "sorest") {
163 matrixCalculators.push_back(new SorEst());
164 }else if (pDataArray->Estimators[i] == "thetayc") {
165 matrixCalculators.push_back(new ThetaYC());
166 }else if (pDataArray->Estimators[i] == "thetan") {
167 matrixCalculators.push_back(new ThetaN());
168 }else if (pDataArray->Estimators[i] == "kstest") {
169 matrixCalculators.push_back(new KSTest());
170 }else if (pDataArray->Estimators[i] == "sharednseqs") {
171 matrixCalculators.push_back(new SharedNSeqs());
172 }else if (pDataArray->Estimators[i] == "ochiai") {
173 matrixCalculators.push_back(new Ochiai());
174 }else if (pDataArray->Estimators[i] == "anderberg") {
175 matrixCalculators.push_back(new Anderberg());
176 }else if (pDataArray->Estimators[i] == "kulczynski") {
177 matrixCalculators.push_back(new Kulczynski());
178 }else if (pDataArray->Estimators[i] == "kulczynskicody") {
179 matrixCalculators.push_back(new KulczynskiCody());
180 }else if (pDataArray->Estimators[i] == "lennon") {
181 matrixCalculators.push_back(new Lennon());
182 }else if (pDataArray->Estimators[i] == "morisitahorn") {
183 matrixCalculators.push_back(new MorHorn());
184 }else if (pDataArray->Estimators[i] == "braycurtis") {
185 matrixCalculators.push_back(new BrayCurtis());
186 }else if (pDataArray->Estimators[i] == "whittaker") {
187 matrixCalculators.push_back(new Whittaker());
188 }else if (pDataArray->Estimators[i] == "odum") {
189 matrixCalculators.push_back(new Odum());
190 }else if (pDataArray->Estimators[i] == "canberra") {
191 matrixCalculators.push_back(new Canberra());
192 }else if (pDataArray->Estimators[i] == "structeuclidean") {
193 matrixCalculators.push_back(new StructEuclidean());
194 }else if (pDataArray->Estimators[i] == "structchord") {
195 matrixCalculators.push_back(new StructChord());
196 }else if (pDataArray->Estimators[i] == "hellinger") {
197 matrixCalculators.push_back(new Hellinger());
198 }else if (pDataArray->Estimators[i] == "manhattan") {
199 matrixCalculators.push_back(new Manhattan());
200 }else if (pDataArray->Estimators[i] == "structpearson") {
201 matrixCalculators.push_back(new StructPearson());
202 }else if (pDataArray->Estimators[i] == "soergel") {
203 matrixCalculators.push_back(new Soergel());
204 }else if (pDataArray->Estimators[i] == "spearman") {
205 matrixCalculators.push_back(new Spearman());
206 }else if (pDataArray->Estimators[i] == "structkulczynski") {
207 matrixCalculators.push_back(new StructKulczynski());
208 }else if (pDataArray->Estimators[i] == "speciesprofile") {
209 matrixCalculators.push_back(new SpeciesProfile());
210 }else if (pDataArray->Estimators[i] == "hamming") {
211 matrixCalculators.push_back(new Hamming());
212 }else if (pDataArray->Estimators[i] == "structchi2") {
213 matrixCalculators.push_back(new StructChi2());
214 }else if (pDataArray->Estimators[i] == "gower") {
215 matrixCalculators.push_back(new Gower());
216 }else if (pDataArray->Estimators[i] == "memchi2") {
217 matrixCalculators.push_back(new MemChi2());
218 }else if (pDataArray->Estimators[i] == "memchord") {
219 matrixCalculators.push_back(new MemChord());
220 }else if (pDataArray->Estimators[i] == "memeuclidean") {
221 matrixCalculators.push_back(new MemEuclidean());
222 }else if (pDataArray->Estimators[i] == "mempearson") {
223 matrixCalculators.push_back(new MemPearson());
228 pDataArray->calcDists.resize(matrixCalculators.size());
230 vector<SharedRAbundVector*> subset;
231 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
233 for (int l = 0; l < k; l++) {
235 if (k != l) { //we dont need to similiarity of a groups to itself
236 subset.clear(); //clear out old pair of sharedrabunds
237 //add new pair of sharedrabunds
238 subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]);
240 for(int i=0;i<matrixCalculators.size();i++) {
242 //if this calc needs all groups to calculate the pair load all groups
243 if (matrixCalculators[i]->getNeedsAll()) {
244 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
245 for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
246 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
250 vector<double> tempdata = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
252 if (pDataArray->m->control_pressed) { return 1; }
254 seqDist temp(l, k, tempdata[0]);
255 pDataArray->calcDists[i].push_back(temp);
261 for(int i=0;i<matrixCalculators.size();i++){ delete matrixCalculators[i]; }
266 catch(exception& e) {
267 pDataArray->m->errorOut(e, "MatrixOutputCommand", "MyDistSharedThreadFunction");