8 * Created by Pat Schloss on 12/27/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
15 #include "globaldata.hpp"
17 class ShhherCommand : public Command {
20 ShhherCommand(string);
23 vector<string> getRequiredParameters();
24 vector<string> getValidParameters();
25 vector<string> getRequiredFiles();
26 map<string, vector<string> > getOutputFiles() { return outputTypes; }
31 GlobalData* globaldata;
34 map<string, vector<string> > outputTypes;
36 string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName;
38 int processors, maxIters;
39 float cutoff, sigma, minDelta;
41 vector<int> nSeqsBreaks;
42 vector<int> nOTUsBreaks;
43 vector<double> flowDataPrI;
44 vector<short> flowDataIntI;
46 vector<string> seqNameVector;
47 vector<double> singleLookUp;
48 vector<double> jointLookUp;
49 map<string, int> nameMap;
51 vector<int> cumNumSeqs;
52 vector<int> nSeqsPerOTU;
53 vector<vector<int> > aaP; //tMaster->aanP: each row is a different otu / each col contains the sequence indices
54 vector<int> seqNumber; //tMaster->anP: the sequence id number sorted by OTU
55 vector<vector<int> > aaI; //tMaster->aanI: that are in each otu - can't differentiate between aaP and aaI
56 vector<int> seqIndex; //tMaster->anI; the index that corresponds to seqNumber
57 vector<double> dist; //adDist - distance of sequences to centroids
58 vector<short> change; //did the centroid sequence change? 0 = no; 1 = yes
59 vector<int> centroids; //the representative flowgram for each cluster m
60 vector<double> weight;
61 vector<double> singleTau; //tMaster->adTau: 1-D Tau vector (1xnumSeqs)
62 vector<short> uniqueFlowgrams;
63 vector<int> uniqueCount;
64 vector<int> uniqueLengths;
65 vector<int> mapSeqToUnique;
66 vector<int> mapUniqueToSeq;
68 int numSeqs, numUniques, numOTUs, numFlowCells;
70 void getSingleLookUp();
71 void getJointLookUp();
74 double getProbIntensity(int);
75 float calcPairwiseDist(int, int);
76 void flowDistParentFork(string, int, int);
78 string createDistFile(int);
79 string createNamesFile();
80 string cluster(string, string);
82 void getOTUData(string);
83 void initPyroCluster();
86 void calcCentroidsDriver(int, int);
87 double getDistToCentroid(int, int, int);
88 double getNewWeights();
89 double getLikelihood();
90 void checkCentroids();
91 void calcNewDistances();
92 void calcNewDistancesParent(int, int);
93 void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
97 void writeQualities(vector<int>);
98 void writeSequences(vector<int>);
99 void writeNames(vector<int>);
101 void writeClusters(vector<int>);
105 string flowDistMPI(int, int);
106 void calcNewDistancesChildMPI(int, int, vector<int>&);