]> git.donarmstrong.com Git - mothur.git/blob - matrixoutputcommand.h
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / matrixoutputcommand.h
1 #ifndef MATRIXOUTPUTCOMMAND_H
2 #define MATRIXOUTPUTCOMMAND_H
3
4 /*
5  *  matrixoutputcommand.h
6  *  Mothur
7  *
8  *  Created by Sarah Westcott on 5/20/09.
9  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10  *
11  */ 
12 #include "command.hpp"
13 #include "inputdata.h"
14 #include "groupmap.h"
15 #include "validcalculator.h"
16 #include "sharedsobscollectsummary.h"
17 #include "sharedchao1.h"
18 #include "sharedace.h"
19 #include "sharednseqs.h"
20 #include "sharedjabund.h"
21 #include "sharedsorabund.h"
22 #include "sharedjclass.h"
23 #include "sharedsorclass.h"
24 #include "sharedjest.h"
25 #include "sharedsorest.h"
26 #include "sharedthetayc.h"
27 #include "sharedthetan.h"
28 #include "sharedkstest.h"
29 #include "whittaker.h"
30 #include "sharedochiai.h"
31 #include "sharedanderbergs.h"
32 #include "sharedkulczynski.h"
33 #include "sharedkulczynskicody.h"
34 #include "sharedlennon.h"
35 #include "sharedmorisitahorn.h"
36 #include "sharedbraycurtis.h"
37 #include "sharedjackknife.h"
38 #include "whittaker.h"
39 #include "odum.h"
40 #include "canberra.h"
41 #include "structeuclidean.h"
42 #include "structchord.h"
43 #include "hellinger.h"
44 #include "manhattan.h"
45 #include "structpearson.h"
46 #include "soergel.h"
47 #include "spearman.h"
48 #include "structkulczynski.h"
49 #include "structchi2.h"
50 #include "speciesprofile.h"
51 #include "hamming.h"
52 #include "gower.h"
53 #include "memchi2.h"
54 #include "memchord.h"
55 #include "memeuclidean.h"
56 #include "mempearson.h"
57
58
59 // aka. dist.shared()
60
61 /* This command create a tree file for each similarity calculator at distance level, using various calculators to find the similiarity between groups. 
62         The user can select the labels they wish to use as well as the groups they would like included.
63         They can also use as many or as few calculators as they wish. */
64         
65
66 class MatrixOutputCommand : public Command {
67         
68 public:
69         MatrixOutputCommand(string);
70         MatrixOutputCommand();  
71         ~MatrixOutputCommand();
72         
73         vector<string> setParameters();
74         string getCommandName()                 { return "dist.shared";                         }
75         string getCommandCategory()             { return "OTU-Based Approaches";        }
76         string getOutputFileNameTag(string, string);
77         string getHelpString(); 
78         string getCitation() { return "http://www.mothur.org/wiki/Dist.shared"; }
79         string getDescription()         { return "generate a distance matrix that describes the dissimilarity among multiple groups"; }
80
81         
82         int execute(); 
83         void help() { m->mothurOut(getHelpString()); }  
84         
85 private:
86         struct linePair {
87                 int start;
88                 int end;
89         };
90         vector<linePair> lines;
91         
92         void printSims(ostream&, vector< vector<double> >&);
93         int process(vector<SharedRAbundVector*>);
94         
95         vector<Calculator*> matrixCalculators;
96         //vector< vector<float> > simMatrix;
97         InputData* input;
98         vector<SharedRAbundVector*> lookup;
99         string exportFileName, output, sharedfile;
100         int numGroups, processors, iters, subsampleSize;
101         ofstream out;
102
103         bool abort, allLines, subsample;
104         set<string> labels; //holds labels to be used
105         string outputFile, calc, groups, label, outputDir, mode;
106         vector<string>  Estimators, Groups, outputNames; //holds estimators to be used
107         int process(vector<SharedRAbundVector*>, string, string);
108         int driver(vector<SharedRAbundVector*>, int, int, vector< vector<seqDist> >&);
109
110 };
111         
112 /**************************************************************************************************/
113 //custom data structure for threads to use.
114 // This is passed by void pointer so it can be any data type
115 // that can be passed using a single void pointer (LPVOID).
116 struct distSharedData {
117     vector<SharedRAbundVector*> thisLookup;
118     vector< vector<seqDist> > calcDists;
119     vector<string>  Estimators;
120         unsigned long long start;
121         unsigned long long end;
122         MothurOut* m;
123         
124         distSharedData(){}
125         distSharedData(MothurOut* mout, unsigned long long st, unsigned long long en, vector<string> est, vector<SharedRAbundVector*> lu) {
126                 m = mout;
127                 start = st;
128                 end = en;
129         Estimators = est;
130         thisLookup = lu;
131         }
132 };
133 /**************************************************************************************************/
134 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
135 #else
136 static DWORD WINAPI MyDistSharedThreadFunction(LPVOID lpParam){ 
137         distSharedData* pDataArray;
138         pDataArray = (distSharedData*)lpParam;
139         
140         try {
141         
142         vector<Calculator*> matrixCalculators;
143         ValidCalculators validCalculator;
144         for (int i=0; i<pDataArray->Estimators.size(); i++) {
145             if (validCalculator.isValidCalculator("matrix", pDataArray->Estimators[i]) == true) { 
146                 if (pDataArray->Estimators[i] == "sharedsobs") { 
147                     matrixCalculators.push_back(new SharedSobsCS());
148                 }else if (pDataArray->Estimators[i] == "sharedchao") { 
149                     matrixCalculators.push_back(new SharedChao1());
150                 }else if (pDataArray->Estimators[i] == "sharedace") { 
151                     matrixCalculators.push_back(new SharedAce());
152                 }else if (pDataArray->Estimators[i] == "jabund") {      
153                     matrixCalculators.push_back(new JAbund());
154                 }else if (pDataArray->Estimators[i] == "sorabund") { 
155                     matrixCalculators.push_back(new SorAbund());
156                 }else if (pDataArray->Estimators[i] == "jclass") { 
157                     matrixCalculators.push_back(new Jclass());
158                 }else if (pDataArray->Estimators[i] == "sorclass") { 
159                     matrixCalculators.push_back(new SorClass());
160                 }else if (pDataArray->Estimators[i] == "jest") { 
161                     matrixCalculators.push_back(new Jest());
162                 }else if (pDataArray->Estimators[i] == "sorest") { 
163                     matrixCalculators.push_back(new SorEst());
164                 }else if (pDataArray->Estimators[i] == "thetayc") { 
165                     matrixCalculators.push_back(new ThetaYC());
166                 }else if (pDataArray->Estimators[i] == "thetan") { 
167                     matrixCalculators.push_back(new ThetaN());
168                 }else if (pDataArray->Estimators[i] == "kstest") { 
169                     matrixCalculators.push_back(new KSTest());
170                 }else if (pDataArray->Estimators[i] == "sharednseqs") { 
171                     matrixCalculators.push_back(new SharedNSeqs());
172                 }else if (pDataArray->Estimators[i] == "ochiai") { 
173                     matrixCalculators.push_back(new Ochiai());
174                 }else if (pDataArray->Estimators[i] == "anderberg") { 
175                     matrixCalculators.push_back(new Anderberg());
176                 }else if (pDataArray->Estimators[i] == "kulczynski") { 
177                     matrixCalculators.push_back(new Kulczynski());
178                 }else if (pDataArray->Estimators[i] == "kulczynskicody") { 
179                     matrixCalculators.push_back(new KulczynskiCody());
180                 }else if (pDataArray->Estimators[i] == "lennon") { 
181                     matrixCalculators.push_back(new Lennon());
182                 }else if (pDataArray->Estimators[i] == "morisitahorn") { 
183                     matrixCalculators.push_back(new MorHorn());
184                 }else if (pDataArray->Estimators[i] == "braycurtis") { 
185                     matrixCalculators.push_back(new BrayCurtis());
186                 }else if (pDataArray->Estimators[i] == "whittaker") { 
187                     matrixCalculators.push_back(new Whittaker());
188                 }else if (pDataArray->Estimators[i] == "odum") { 
189                     matrixCalculators.push_back(new Odum());
190                 }else if (pDataArray->Estimators[i] == "canberra") { 
191                     matrixCalculators.push_back(new Canberra());
192                 }else if (pDataArray->Estimators[i] == "structeuclidean") { 
193                     matrixCalculators.push_back(new StructEuclidean());
194                 }else if (pDataArray->Estimators[i] == "structchord") { 
195                     matrixCalculators.push_back(new StructChord());
196                 }else if (pDataArray->Estimators[i] == "hellinger") { 
197                     matrixCalculators.push_back(new Hellinger());
198                 }else if (pDataArray->Estimators[i] == "manhattan") { 
199                     matrixCalculators.push_back(new Manhattan());
200                 }else if (pDataArray->Estimators[i] == "structpearson") { 
201                     matrixCalculators.push_back(new StructPearson());
202                 }else if (pDataArray->Estimators[i] == "soergel") { 
203                     matrixCalculators.push_back(new Soergel());
204                 }else if (pDataArray->Estimators[i] == "spearman") { 
205                     matrixCalculators.push_back(new Spearman());
206                 }else if (pDataArray->Estimators[i] == "structkulczynski") { 
207                     matrixCalculators.push_back(new StructKulczynski());
208                 }else if (pDataArray->Estimators[i] == "speciesprofile") { 
209                     matrixCalculators.push_back(new SpeciesProfile());
210                 }else if (pDataArray->Estimators[i] == "hamming") { 
211                     matrixCalculators.push_back(new Hamming());
212                 }else if (pDataArray->Estimators[i] == "structchi2") { 
213                     matrixCalculators.push_back(new StructChi2());
214                 }else if (pDataArray->Estimators[i] == "gower") { 
215                     matrixCalculators.push_back(new Gower());
216                 }else if (pDataArray->Estimators[i] == "memchi2") { 
217                     matrixCalculators.push_back(new MemChi2());
218                 }else if (pDataArray->Estimators[i] == "memchord") { 
219                     matrixCalculators.push_back(new MemChord());
220                 }else if (pDataArray->Estimators[i] == "memeuclidean") { 
221                     matrixCalculators.push_back(new MemEuclidean());
222                 }else if (pDataArray->Estimators[i] == "mempearson") { 
223                     matrixCalculators.push_back(new MemPearson());
224                 }
225             }
226         }
227         
228         pDataArray->calcDists.resize(matrixCalculators.size());
229                         
230                 vector<SharedRAbundVector*> subset;
231                 for (int k = pDataArray->start; k < pDataArray->end; k++) { // pass cdd each set of groups to compare
232                         
233                         for (int l = 0; l < k; l++) {
234                                 
235                                 if (k != l) { //we dont need to similiarity of a groups to itself
236                                         subset.clear(); //clear out old pair of sharedrabunds
237                                         //add new pair of sharedrabunds
238                                         subset.push_back(pDataArray->thisLookup[k]); subset.push_back(pDataArray->thisLookup[l]); 
239                                         
240                                         for(int i=0;i<matrixCalculators.size();i++) {
241                                                 
242                                                 //if this calc needs all groups to calculate the pair load all groups
243                                                 if (matrixCalculators[i]->getNeedsAll()) { 
244                                                         //load subset with rest of lookup for those calcs that need everyone to calc for a pair
245                                                         for (int w = 0; w < pDataArray->thisLookup.size(); w++) {
246                                                                 if ((w != k) && (w != l)) { subset.push_back(pDataArray->thisLookup[w]); }
247                                                         }
248                                                 }
249                                                 
250                                                 vector<double> tempdata = matrixCalculators[i]->getValues(subset); //saves the calculator outputs
251                                                 
252                                                 if (pDataArray->m->control_pressed) { return 1; }
253                                                 
254                                                 seqDist temp(l, k, tempdata[0]);
255                                                 pDataArray->calcDists[i].push_back(temp);
256                                         }
257                                 }
258                         }
259                 }
260         
261         for(int i=0;i<matrixCalculators.size();i++){  delete matrixCalculators[i]; }
262                 
263                 return 0;
264                 
265         }
266         catch(exception& e) {
267                 pDataArray->m->errorOut(e, "MatrixOutputCommand", "MyDistSharedThreadFunction");
268                 exit(1);
269         }
270
271 #endif
272         
273 #endif
274