8 * Created by Pat Schloss on 12/27/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
15 #include "globaldata.hpp"
17 class ShhherCommand : public Command {
20 ShhherCommand(string);
23 vector<string> getRequiredParameters();
24 vector<string> getValidParameters();
25 vector<string> getRequiredFiles();
26 map<string, vector<string> > getOutputFiles() { return outputTypes; }
31 GlobalData* globaldata;
34 map<string, vector<string> > outputTypes;
36 string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName;
38 int processors, maxIters;
39 float cutoff, sigma, minDelta;
41 vector<int> nSeqsBreaks;
42 vector<int> nOTUsBreaks;
43 vector<double> singleLookUp;
44 vector<double> jointLookUp;
46 vector<string> seqNameVector;
48 vector<short> flowDataIntI;
49 vector<double> flowDataPrI;
50 map<string, int> nameMap;
52 vector<int> cumNumSeqs;
53 vector<int> nSeqsPerOTU;
54 vector<vector<int> > aaP; //tMaster->aanP: each row is a different otu / each col contains the sequence indices
55 vector<vector<int> > aaI; //tMaster->aanI: that are in each otu - can't differentiate between aaP and aaI
56 vector<int> seqNumber; //tMaster->anP: the sequence id number sorted by OTU
57 vector<int> seqIndex; //tMaster->anI; the index that corresponds to seqNumber
58 vector<double> dist; //adDist - distance of sequences to centroids
59 vector<short> change; //did the centroid sequence change? 0 = no; 1 = yes
60 vector<int> centroids; //the representative flowgram for each cluster m
61 vector<double> weight;
62 vector<double> singleTau; //tMaster->adTau: 1-D Tau vector (1xnumSeqs)
63 vector<short> uniqueFlowgrams;
64 vector<int> uniqueCount;
65 vector<int> mapSeqToUnique;
66 vector<int> mapUniqueToSeq;
67 vector<int> uniqueLengths;
69 int numSeqs, numUniques, numOTUs, numFlowCells;
71 void getSingleLookUp();
72 void getJointLookUp();
75 double getProbIntensity(int);
76 float calcPairwiseDist(int, int);
77 void flowDistParentFork(string, int, int);
79 string createDistFile(int);
80 string createNamesFile();
81 string cluster(string, string);
83 void getOTUData(string);
84 void initPyroCluster();
87 void calcCentroidsDriver(int, int);
88 double getDistToCentroid(int, int, int);
89 double getNewWeights();
90 double getLikelihood();
91 void checkCentroids();
92 void calcNewDistances();
93 void calcNewDistancesParent(int, int);
94 void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
98 void writeQualities(vector<int>);
99 void writeSequences(vector<int>);
100 void writeNames(vector<int>);
102 void writeClusters(vector<int>);
106 string flowDistMPI(int, int);
107 void calcNewDistancesChildMPI(int, int, vector<int>&);