]> git.donarmstrong.com Git - mothur.git/blob - shhhercommand.h
working on windows paralellization, added trimOligos class to be used by trim.flows...
[mothur.git] / shhhercommand.h
1 #ifndef SHHHER_H
2 #define SHHHER_H
3
4 /*
5  *  shhher.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 12/27/10.
9  *  Copyright 2010 Schloss Lab. All rights reserved.
10  *
11  */
12
13 #include "mothur.h"
14 #include "command.hpp"
15
16 //**********************************************************************************************************************
17
18 #define NUMBINS 1000
19 #define HOMOPS 10
20 #define MIN_COUNT 0.1
21 #define MIN_WEIGHT 0.1
22 #define MIN_TAU 0.0001
23 #define MIN_ITER 10
24 //**********************************************************************************************************************
25
26 class ShhherCommand : public Command {
27         
28 public:
29         ShhherCommand(string);
30         ShhherCommand();
31         ~ShhherCommand() {}
32         
33         vector<string> setParameters();
34         string getCommandName()                 { return "shhh.seqs";   }
35         string getCommandCategory()             { return "Hidden";              }
36         string getHelpString(); 
37         string getCitation() { return "no citation"; }
38         string getDescription()         { return "shhh.seqs"; }
39
40         
41         int execute(); 
42         void help() { m->mothurOut(getHelpString()); }          
43 private:
44         
45         int abort;
46         
47         string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName, compositeNamesFileName;
48
49         int processors, maxIters;
50         float cutoff, sigma, minDelta;
51         string flowOrder;
52         
53         vector<int> nSeqsBreaks;
54         vector<int> nOTUsBreaks;
55         vector<double> singleLookUp;
56         vector<double> jointLookUp;
57         
58         vector<string> seqNameVector;
59         vector<int> lengths;
60         vector<short> flowDataIntI;
61         vector<double> flowDataPrI;
62         map<string, int> nameMap;
63         vector<int> otuData;
64         vector<int> cumNumSeqs;
65         vector<int> nSeqsPerOTU;
66         vector<vector<int> > aaP;       //tMaster->aanP:        each row is a different otu / each col contains the sequence indices
67         vector<vector<int> > aaI;       //tMaster->aanI:        that are in each otu - can't differentiate between aaP and aaI 
68         vector<int> seqNumber;          //tMaster->anP:         the sequence id number sorted by OTU
69         vector<int> seqIndex;           //tMaster->anI;         the index that corresponds to seqNumber
70         vector<double> dist;            //adDist - distance of sequences to centroids
71         vector<short> change;           //did the centroid sequence change? 0 = no; 1 = yes
72         vector<int> centroids;          //the representative flowgram for each cluster m
73         vector<double> weight;
74         vector<double> singleTau;       //tMaster->adTau:       1-D Tau vector (1xnumSeqs)
75         vector<short> uniqueFlowgrams;
76         vector<int> uniqueCount;
77         vector<int> mapSeqToUnique;
78         vector<int> mapUniqueToSeq;
79         vector<int> uniqueLengths;
80
81         vector<string> outputNames;
82
83         int numSeqs, numUniques, numOTUs, numFlowCells;
84         
85         void getSingleLookUp();
86         void getJointLookUp();
87         void getFlowData();
88         void getUniques();
89         double getProbIntensity(int);
90         float calcPairwiseDist(int, int);
91         void flowDistParentFork(string, int, int);
92         
93         string createDistFile(int);
94         string createNamesFile();
95         string cluster(string, string);
96         
97         void getOTUData(string);
98         void initPyroCluster();
99         void fill();
100         void calcCentroids();
101         void calcCentroidsDriver(int, int);
102         double getDistToCentroid(int, int, int);
103         double getNewWeights();
104         double getLikelihood();
105         void checkCentroids();
106         void calcNewDistances();
107         void calcNewDistancesParent(int, int);
108         void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
109
110
111         void setOTUs();
112         void writeQualities(vector<int>);
113         void writeSequences(vector<int>);
114         void writeNames(vector<int>);
115         void writeGroups();
116         void writeClusters(vector<int>);
117
118         
119 #ifdef USE_MPI
120         string flowDistMPI(int, int);
121         void calcNewDistancesChildMPI(int, int, vector<int>&);
122
123         int pid, ncpus; 
124 #endif
125         
126 };
127
128 /**************************************************************************************************/
129 //custom data structure for threads to use.
130 // This is passed by void pointer so it can be any data type
131 // that can be passed using a single void pointer (LPVOID).
132 typedef struct flowDistParentForkData {
133         string distFileName; 
134         vector<int> mapUniqueToSeq;
135         vector<int> mapSeqToUnique;
136         vector<int> lengths;
137         vector<short> flowDataIntI;
138         vector<double> flowDataPrI;
139         vector<double> jointLookUp;
140         MothurOut* m;
141         int threadID, startSeq, stopSeq, numFlowCells;
142         float cutoff;
143         
144         flowDistParentForkData(){}
145         flowDistParentForkData(string d, vector<int> mapU, vector<int> mapS, vector<int> l, vector<short> flowD, vector<double> flowDa, vector<double> j, MothurOut* mout, int st, int sp, int n, float cut, int tid) {
146                 distFileName = d;
147                 mapUniqueToSeq = mapU;
148                 mapSeqToUnique = mapS;
149                 lengths = l;
150                 flowDataIntI = flowD;
151                 flowDataPrI = flowDa;
152                 jointLookUp = j;
153                 m = mout;
154                 startSeq = st;
155                 stopSeq = sp;
156                 numFlowCells = n;
157                 cutoff= cut;
158                 threadID = tid;
159         }
160 };
161
162 /**************************************************************************************************/
163 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
164 #else
165 static DWORD WINAPI MyflowDistParentForkThreadFunction(LPVOID lpParam){ 
166         flowDistParentForkData* pDataArray;
167         pDataArray = (flowDistParentForkData*)lpParam;
168         
169         try {
170                 ostringstream outStream;
171                 outStream.setf(ios::fixed, ios::floatfield);
172                 outStream.setf(ios::dec, ios::basefield);
173                 outStream.setf(ios::showpoint);
174                 outStream.precision(6);
175                 
176                 int begTime = time(NULL);
177                 double begClock = clock();
178                 string tempOut = "start and end = " + toString(pDataArray->startSeq) +'\t' + toString(pDataArray->stopSeq) + "-";
179                 cout << tempOut << endl;
180         
181                 for(int i=pDataArray->startSeq;i<pDataArray->stopSeq;i++){
182                         
183                         if (pDataArray->m->control_pressed) { break; }
184                         cout << "thread i = " << i << endl;
185                         for(int j=0;j<i;j++){
186                                 
187                                 cout << "thread j = " << j << endl;
188                                 float flowDistance = 0.0;
189                                 ////////////////// calcPairwiseDist ///////////////////
190                                 //needed because this is a static global function that can't see the classes internal functions
191                                 
192                                 int minLength = pDataArray->lengths[pDataArray->mapSeqToUnique[pDataArray->mapUniqueToSeq[i]]];
193                                 if(pDataArray->lengths[pDataArray->mapUniqueToSeq[j]] < minLength){     minLength = pDataArray->lengths[pDataArray->mapSeqToUnique[pDataArray->mapUniqueToSeq[j]]];     }
194                                 
195                                 int ANumFlowCells = pDataArray->mapUniqueToSeq[i] * pDataArray->numFlowCells;
196                                 int BNumFlowCells = pDataArray->mapUniqueToSeq[j] * pDataArray->numFlowCells;
197                                 
198                                 for(int k=0;k<minLength;k++){
199                                         
200                                         if (pDataArray->m->control_pressed) { break; }
201                                         
202                                         int flowAIntI = pDataArray->flowDataIntI[ANumFlowCells + k];
203                                         float flowAPrI = pDataArray->flowDataPrI[ANumFlowCells + k];
204                                         
205                                         int flowBIntI = pDataArray->flowDataIntI[BNumFlowCells + k];
206                                         float flowBPrI = pDataArray->flowDataPrI[BNumFlowCells + k];
207                                         flowDistance += pDataArray->jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
208                                 }
209                                 
210                                 flowDistance /= (float) minLength;
211                                 //cout << flowDistance << endl;
212                                 ////////////////// end of calcPairwiseDist ///////////////////
213                                                                 
214                                 if(flowDistance < 1e-6){
215                                         outStream << pDataArray->mapUniqueToSeq[i] << '\t' << pDataArray->mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
216                                 }
217                                 else if(flowDistance <= pDataArray->cutoff){
218                                         outStream << pDataArray->mapUniqueToSeq[i] << '\t' << pDataArray->mapUniqueToSeq[j] << '\t' << flowDistance << endl;
219                                 }
220                         }
221                         if(i % 100 == 0){
222                                 pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
223                                 pDataArray->m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
224                                 pDataArray->m->mothurOutEndLine();
225                         }
226                 }
227                 
228                 ofstream distFile(pDataArray->distFileName.c_str());
229                 distFile << outStream.str();            
230                 distFile.close();
231                 
232                 if (pDataArray->m->control_pressed) {}
233                 else {
234                         pDataArray->m->mothurOut(toString(pDataArray->stopSeq-1) + "\t" + toString(time(NULL) - begTime));
235                         pDataArray->m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
236                         pDataArray->m->mothurOutEndLine();
237                 }               
238                 
239         }
240         catch(exception& e) {
241                 pDataArray->m->errorOut(e, "ShhherCommand", "MyflowDistParentForkThreadFunction");
242                 exit(1);
243         }
244
245 #endif
246
247
248 #endif
249