8 * Created by Pat Schloss on 12/27/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
16 //**********************************************************************************************************************
21 #define MIN_WEIGHT 0.1
22 #define MIN_TAU 0.0001
24 //**********************************************************************************************************************
26 class ShhherCommand : public Command {
29 ShhherCommand(string);
33 vector<string> setParameters();
34 string getCommandName() { return "shhh.flows"; }
35 string getCommandCategory() { return "Sequence Processing"; }
36 string getHelpString();
37 string getCitation() { return "http://www.mothur.org/wiki/Shhh.flows"; }
38 string getDescription() { return "shhh.flows"; }
42 void help() { m->mothurOut(getHelpString()); }
47 string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName, compositeNamesFileName;
49 int processors, maxIters;
50 float cutoff, sigma, minDelta;
53 vector<int> nSeqsBreaks;
54 vector<int> nOTUsBreaks;
55 vector<double> singleLookUp;
56 vector<double> jointLookUp;
58 vector<string> seqNameVector;
60 vector<short> flowDataIntI;
61 vector<double> flowDataPrI;
62 map<string, int> nameMap;
64 vector<int> cumNumSeqs;
65 vector<int> nSeqsPerOTU;
66 vector<vector<int> > aaP; //tMaster->aanP: each row is a different otu / each col contains the sequence indices
67 vector<vector<int> > aaI; //tMaster->aanI: that are in each otu - can't differentiate between aaP and aaI
68 vector<int> seqNumber; //tMaster->anP: the sequence id number sorted by OTU
69 vector<int> seqIndex; //tMaster->anI; the index that corresponds to seqNumber
70 vector<double> dist; //adDist - distance of sequences to centroids
71 vector<short> change; //did the centroid sequence change? 0 = no; 1 = yes
72 vector<int> centroids; //the representative flowgram for each cluster m
73 vector<double> weight;
74 vector<double> singleTau; //tMaster->adTau: 1-D Tau vector (1xnumSeqs)
75 vector<short> uniqueFlowgrams;
76 vector<int> uniqueCount;
77 vector<int> mapSeqToUnique;
78 vector<int> mapUniqueToSeq;
79 vector<int> uniqueLengths;
81 vector<string> outputNames;
83 int numSeqs, numUniques, numOTUs, numFlowCells;
85 void getSingleLookUp();
86 void getJointLookUp();
89 double getProbIntensity(int);
90 float calcPairwiseDist(int, int);
91 void flowDistParentFork(string, int, int);
93 string createDistFile(int);
94 string createNamesFile();
95 string cluster(string, string);
97 void getOTUData(string);
98 void initPyroCluster();
100 void calcCentroids();
101 void calcCentroidsDriver(int, int);
102 double getDistToCentroid(int, int, int);
103 double getNewWeights();
104 double getLikelihood();
105 void checkCentroids();
106 void calcNewDistances();
107 void calcNewDistancesParent(int, int);
108 void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
112 void writeQualities(vector<int>);
113 void writeSequences(vector<int>);
114 void writeNames(vector<int>);
116 void writeClusters(vector<int>);
120 string flowDistMPI(int, int);
121 void calcNewDistancesChildMPI(int, int, vector<int>&);
128 /**************************************************************************************************/
129 //custom data structure for threads to use.
130 // This is passed by void pointer so it can be any data type
131 // that can be passed using a single void pointer (LPVOID).
132 struct flowDistParentForkData {
134 vector<int> mapUniqueToSeq;
135 vector<int> mapSeqToUnique;
137 vector<short> flowDataIntI;
138 vector<double> flowDataPrI;
139 vector<double> jointLookUp;
141 int threadID, startSeq, stopSeq, numFlowCells;
144 flowDistParentForkData(){}
145 flowDistParentForkData(string d, vector<int> mapU, vector<int> mapS, vector<int> l, vector<short> flowD, vector<double> flowDa, vector<double> j, MothurOut* mout, int st, int sp, int n, float cut, int tid) {
147 mapUniqueToSeq = mapU;
148 mapSeqToUnique = mapS;
150 flowDataIntI = flowD;
151 flowDataPrI = flowDa;
162 /**************************************************************************************************/
163 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
165 static DWORD WINAPI MyflowDistParentForkThreadFunction(LPVOID lpParam){
166 flowDistParentForkData* pDataArray;
167 pDataArray = (flowDistParentForkData*)lpParam;
170 ostringstream outStream;
171 outStream.setf(ios::fixed, ios::floatfield);
172 outStream.setf(ios::dec, ios::basefield);
173 outStream.setf(ios::showpoint);
174 outStream.precision(6);
176 int begTime = time(NULL);
177 double begClock = clock();
178 string tempOut = "start and end = " + toString(pDataArray->startSeq) +'\t' + toString(pDataArray->stopSeq) + "-";
179 cout << tempOut << endl;
181 for(int i=pDataArray->startSeq;i<pDataArray->stopSeq;i++){
183 if (pDataArray->m->control_pressed) { break; }
184 cout << "thread i = " << i << endl;
185 for(int j=0;j<i;j++){
187 cout << "thread j = " << j << endl;
188 float flowDistance = 0.0;
189 ////////////////// calcPairwiseDist ///////////////////
190 //needed because this is a static global function that can't see the classes internal functions
192 int minLength = pDataArray->lengths[pDataArray->mapSeqToUnique[pDataArray->mapUniqueToSeq[i]]];
193 if(pDataArray->lengths[pDataArray->mapUniqueToSeq[j]] < minLength){ minLength = pDataArray->lengths[pDataArray->mapSeqToUnique[pDataArray->mapUniqueToSeq[j]]]; }
195 int ANumFlowCells = pDataArray->mapUniqueToSeq[i] * pDataArray->numFlowCells;
196 int BNumFlowCells = pDataArray->mapUniqueToSeq[j] * pDataArray->numFlowCells;
198 for(int k=0;k<minLength;k++){
200 if (pDataArray->m->control_pressed) { break; }
202 int flowAIntI = pDataArray->flowDataIntI[ANumFlowCells + k];
203 float flowAPrI = pDataArray->flowDataPrI[ANumFlowCells + k];
205 int flowBIntI = pDataArray->flowDataIntI[BNumFlowCells + k];
206 float flowBPrI = pDataArray->flowDataPrI[BNumFlowCells + k];
207 flowDistance += pDataArray->jointLookUp[flowAIntI * NUMBINS + flowBIntI] - flowAPrI - flowBPrI;
210 flowDistance /= (float) minLength;
211 //cout << flowDistance << endl;
212 ////////////////// end of calcPairwiseDist ///////////////////
214 if(flowDistance < 1e-6){
215 outStream << pDataArray->mapUniqueToSeq[i] << '\t' << pDataArray->mapUniqueToSeq[j] << '\t' << 0.000000 << endl;
217 else if(flowDistance <= pDataArray->cutoff){
218 outStream << pDataArray->mapUniqueToSeq[i] << '\t' << pDataArray->mapUniqueToSeq[j] << '\t' << flowDistance << endl;
222 pDataArray->m->mothurOut(toString(i) + "\t" + toString(time(NULL) - begTime));
223 pDataArray->m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
224 pDataArray->m->mothurOutEndLine();
228 ofstream distFile(pDataArray->distFileName.c_str());
229 distFile << outStream.str();
232 if (pDataArray->m->control_pressed) {}
234 pDataArray->m->mothurOut(toString(pDataArray->stopSeq-1) + "\t" + toString(time(NULL) - begTime));
235 pDataArray->m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC));
236 pDataArray->m->mothurOutEndLine();
240 catch(exception& e) {
241 pDataArray->m->errorOut(e, "ShhherCommand", "MyflowDistParentForkThreadFunction");