8 * Created by Pat Schloss on 12/27/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
15 #include "globaldata.hpp"
17 class ShhherCommand : public Command {
20 ShhherCommand(string);
23 vector<string> getRequiredParameters();
24 vector<string> getValidParameters();
25 vector<string> getRequiredFiles();
26 map<string, vector<string> > getOutputFiles() { return outputTypes; }
31 GlobalData* globaldata;
34 map<string, vector<string> > outputTypes;
36 string outputDir, flowFileName, flowFilesFileName, lookupFileName;
37 int processors, maxIters;
38 float cutoff, sigma, minDelta;
40 vector<int> nSeqsBreaks;
41 vector<int> nOTUsBreaks;
42 vector<double> flowDataPrI;
43 vector<short> flowDataIntI;
45 vector<string> seqNameVector;
46 vector<double> singleLookUp;
47 vector<double> jointLookUp;
48 map<string, int> nameMap;
50 vector<int> cumNumSeqs;
51 vector<int> nSeqsPerOTU;
52 vector<vector<int> > aaP; //tMaster->aanP: each row is a different otu / each col contains the sequence indices
53 vector<int> seqNumber; //tMaster->anP: the sequence id number sorted by OTU
54 vector<vector<int> > aaI; //tMaster->aanI: that are in each otu - can't differentiate between aaP and aaI
55 vector<int> seqIndex; //tMaster->anI; the index that corresponds to seqNumber
56 vector<double> dist; //adDist - distance of sequences to centroids
57 vector<short> change; //did the centroid sequence change? 0 = no; 1 = yes
58 vector<int> centroids; //the representative flowgram for each cluster m
59 vector<double> weight;
60 vector<double> singleTau; //tMaster->adTau: 1-D Tau vector (1xnumSeqs)
61 vector<short> uniqueFlowgrams;
62 vector<int> uniqueCount;
63 vector<int> uniqueLengths;
64 vector<int> mapSeqToUnique;
65 vector<int> mapUniqueToSeq;
67 int numSeqs, numUniques, numOTUs, numFlowCells;
69 void getSingleLookUp();
70 void getJointLookUp();
73 double getProbIntensity(int);
74 float calcPairwiseDist(int, int);
75 void flowDistParentFork(string, int, int);
77 string createDistFile(int);
78 string createNamesFile();
79 string cluster(string, string);
81 void getOTUData(string);
82 void initPyroCluster();
85 void calcCentroidsDriver(int, int);
86 double getDistToCentroid(int, int, int);
87 double getNewWeights();
88 double getLikelihood();
89 void checkCentroids();
90 void calcNewDistances();
91 void calcNewDistancesParent(int, int);
92 void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
96 void writeQualities(vector<int>);
97 void writeSequences(vector<int>);
98 void writeNames(vector<int>);
100 void writeClusters(vector<int>);
104 string flowDistMPI(int, int);
105 void calcNewDistancesChildMPI(int, int, vector<int>&);