1 #ifndef MATRIXOUTPUTCOMMAND_H
2 #define MATRIXOUTPUTCOMMAND_H
5 * matrixoutputcommand.h
8 * Created by Sarah Westcott on 5/20/09.
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12 #include "command.hpp"
13 #include "inputdata.h"
15 #include "validcalculator.h"
16 #include "sharedsobscollectsummary.h"
17 #include "sharedchao1.h"
18 #include "sharedace.h"
19 #include "sharednseqs.h"
20 #include "sharedjabund.h"
21 #include "sharedsorabund.h"
22 #include "sharedjclass.h"
23 #include "sharedsorclass.h"
24 #include "sharedjest.h"
25 #include "sharedsorest.h"
26 #include "sharedthetayc.h"
27 #include "sharedthetan.h"
28 #include "sharedkstest.h"
29 #include "whittaker.h"
30 #include "sharedochiai.h"
31 #include "sharedanderbergs.h"
32 #include "sharedkulczynski.h"
33 #include "sharedkulczynskicody.h"
34 #include "sharedlennon.h"
35 #include "sharedmorisitahorn.h"
36 #include "sharedbraycurtis.h"
37 #include "sharedjackknife.h"
38 #include "whittaker.h"
41 #include "structeuclidean.h"
42 #include "structchord.h"
43 #include "hellinger.h"
44 #include "manhattan.h"
45 #include "structpearson.h"
48 #include "structkulczynski.h"
49 #include "structchi2.h"
50 #include "speciesprofile.h"
55 #include "memeuclidean.h"
56 #include "mempearson.h"
57 #include "sharedjsd.h"
58 #include "sharedrjsd.h"
63 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups.
64 The user can select the labels they wish to use as well as the groups they would like included.
65 They can also use as many or as few calculators as they wish. */
68 class MatrixOutputCommand : public Command {
71 MatrixOutputCommand(string);
72 MatrixOutputCommand();
73 ~MatrixOutputCommand();
75 vector<string> setParameters();
76 string getCommandName() { return "dist.shared"; }
77 string getCommandCategory() { return "OTU-Based Approaches"; }
79 string getHelpString();
80 string getOutputPattern(string);
81 string getCitation() { return "http://www.mothur.org/wiki/Dist.shared"; }
82 string getDescription() { return "generate a distance matrix that describes the dissimilarity among multiple groups"; }
86 void help() { m->mothurOut(getHelpString()); }
93 vector<linePair> lines;
95 void printSims(ostream&, vector< vector<double> >&);
96 int process(vector<SharedRAbundVector*>);
98 vector<Calculator*> matrixCalculators;
99 //vector< vector<float> > simMatrix;
101 vector<SharedRAbundVector*> lookup;
102 string exportFileName, output, sharedfile;
103 int numGroups, processors, iters, subsampleSize;
106 bool abort, allLines, subsample;
107 set<string> labels; //holds labels to be used
108 string outputFile, calc, groups, label, outputDir, mode;
109 vector<string> Estimators, Groups, outputNames; //holds estimators to be used
110 int process(vector<SharedRAbundVector*>, string, string);
111 int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
115 /**************************************************************************************************/
116 //custom data structure for threads to use.
117 // This is passed by void pointer so it can be any data type
118 // that can be passed using a single void pointer (LPVOID).
119 struct distSharedData {
120 vector<SharedRAbundVector*> thisLookup;
121 vector< vector<seqDist> > calcDists;
122 vector<string> Estimators;
123 unsigned long long start;
124 unsigned long long end;
129 distSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
138 /**************************************************************************************************/
139 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
141 static DWORD WINAPI MyDistSharedThreadFunction(LPVOID lpParam){
142 distSharedData* pDataArray;
143 pDataArray = (distSharedData*)lpParam;
147 vector<Calculator*> matrixCalculators;
148 ValidCalculators validCalculator;
149 for (int i=0; i<pDataArray->Estimators.size(); i++) {
150 if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) {
151 if (pDataArray->Estimators[i] == "sharedsobs") {
152 matrixCalculators.push_back(new SharedSobsCS());
153 }else if (pDataArray->Estimators[i] == "sharedchao") {
154 matrixCalculators.push_back(new SharedChao1());
155 }else if (pDataArray->Estimators[i] == "sharedace") {
156 matrixCalculators.push_back(new SharedAce());
157 }else if (pDataArray->Estimators[i] == "jabund") {
158 matrixCalculators.push_back(new JAbund());
159 }else if (pDataArray->Estimators[i] == "sorabund") {
160 matrixCalculators.push_back(new SorAbund());
161 }else if (pDataArray->Estimators[i] == "jclass") {
162 matrixCalculators.push_back(new Jclass());
163 }else if (pDataArray->Estimators[i] == "sorclass") {
164 matrixCalculators.push_back(new SorClass());
165 }else if (pDataArray->Estimators[i] == "jest") {
166 matrixCalculators.push_back(new Jest());
167 }else if (pDataArray->Estimators[i] == "sorest") {
168 matrixCalculators.push_back(new SorEst());
169 }else if (pDataArray->Estimators[i] == "thetayc") {
170 matrixCalculators.push_back(new ThetaYC());
171 }else if (pDataArray->Estimators[i] == "thetan") {
172 matrixCalculators.push_back(new ThetaN());
173 }else if (pDataArray->Estimators[i] == "kstest") {
174 matrixCalculators.push_back(new KSTest());
175 }else if (pDataArray->Estimators[i] == "sharednseqs") {
176 matrixCalculators.push_back(new SharedNSeqs());
177 }else if (pDataArray->Estimators[i] == "ochiai") {
178 matrixCalculators.push_back(new Ochiai());
179 }else if (pDataArray->Estimators[i] == "anderberg") {
180 matrixCalculators.push_back(new Anderberg());
181 }else if (pDataArray->Estimators[i] == "kulczynski") {
182 matrixCalculators.push_back(new Kulczynski());
183 }else if (pDataArray->Estimators[i] == "kulczynskicody") {
184 matrixCalculators.push_back(new KulczynskiCody());
185 }else if (pDataArray->Estimators[i] == "lennon") {
186 matrixCalculators.push_back(new Lennon());
187 }else if (pDataArray->Estimators[i] == "morisitahorn") {
188 matrixCalculators.push_back(new MorHorn());
189 }else if (pDataArray->Estimators[i] == "braycurtis") {
190 matrixCalculators.push_back(new BrayCurtis());
191 }else if (pDataArray->Estimators[i] == "whittaker") {
192 matrixCalculators.push_back(new Whittaker());
193 }else if (pDataArray->Estimators[i] == "odum") {
194 matrixCalculators.push_back(new Odum());
195 }else if (pDataArray->Estimators[i] == "canberra") {
196 matrixCalculators.push_back(new Canberra());
197 }else if (pDataArray->Estimators[i] == "structeuclidean") {
198 matrixCalculators.push_back(new StructEuclidean());
199 }else if (pDataArray->Estimators[i] == "structchord") {
200 matrixCalculators.push_back(new StructChord());
201 }else if (pDataArray->Estimators[i] == "hellinger") {
202 matrixCalculators.push_back(new Hellinger());
203 }else if (pDataArray->Estimators[i] == "manhattan") {
204 matrixCalculators.push_back(new Manhattan());
205 }else if (pDataArray->Estimators[i] == "structpearson") {
206 matrixCalculators.push_back(new StructPearson());
207 }else if (pDataArray->Estimators[i] == "soergel") {
208 matrixCalculators.push_back(new Soergel());
209 }else if (pDataArray->Estimators[i] == "spearman") {
210 matrixCalculators.push_back(new Spearman());
211 }else if (pDataArray->Estimators[i] == "structkulczynski") {
212 matrixCalculators.push_back(new StructKulczynski());
213 }else if (pDataArray->Estimators[i] == "speciesprofile") {
214 matrixCalculators.push_back(new SpeciesProfile());
215 }else if (pDataArray->Estimators[i] == "hamming") {
216 matrixCalculators.push_back(new Hamming());
217 }else if (pDataArray->Estimators[i] == "structchi2") {
218 matrixCalculators.push_back(new StructChi2());
219 }else if (pDataArray->Estimators[i] == "gower") {
220 matrixCalculators.push_back(new Gower());
221 }else if (pDataArray->Estimators[i] == "memchi2") {
222 matrixCalculators.push_back(new MemChi2());
223 }else if (pDataArray->Estimators[i] == "memchord") {
224 matrixCalculators.push_back(new MemChord());
225 }else if (pDataArray->Estimators[i] == "memeuclidean") {
226 matrixCalculators.push_back(new MemEuclidean());
227 }else if (pDataArray->Estimators[i] == "mempearson") {
228 matrixCalculators.push_back(new MemPearson());
229 }else if (pDataArray->Estimators[i] == "jsd") {
230 matrixCalculators.push_back(new JSD());
231 }else if (pDataArray->Estimators[i] == "rjsd") {
232 matrixCalculators.push_back(new RJSD());
238 pDataArray->calcDists.resize(matrixCalculators.size());
240 vector<SharedRAbundVector*> subset;
241 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
243 for (int l = 0; l < k; l++) {
245 if (k != l) { //we dont need to similiarity of a groups to itself
246 subset.clear(); //clear out old pair of sharedrabunds
247 //add new pair of sharedrabunds
248 subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]);
250 for(int i=0;i<matrixCalculators.size();i++) {
252 //if this calc needs all groups to calculate the pair load all groups
253 if (matrixCalculators[i]->getNeedsAll()) {
254 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
255 for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
256 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
260 vector<double> tempdata = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
262 if (pDataArray->m->control_pressed) { return 1; }
264 seqDist temp(l, k, tempdata[0]);
265 pDataArray->calcDists[i].push_back(temp);
271 for(int i=0;i<matrixCalculators.size();i++){ delete matrixCalculators[i]; }
276 catch(exception& e) {
277 pDataArray->m->errorOut(e, "MatrixOutputCommand", "MyDistSharedThreadFunction");