8 * Created by Pat Schloss on 12/27/10.
9 * Copyright 2010 Schloss Lab. All rights reserved.
14 #include "command.hpp"
17 class ShhherCommand : public Command {
20 ShhherCommand(string);
24 vector<string> setParameters();
25 string getCommandName() { return "shhh.seqs"; }
26 string getCommandCategory() { return "Hidden"; }
27 string getHelpString();
28 string getCitation() { return "no citation"; }
31 void help() { m->mothurOut(getHelpString()); }
36 string outputDir, flowFileName, flowFilesFileName, lookupFileName, compositeFASTAFileName, compositeNamesFileName;
38 int processors, maxIters;
39 float cutoff, sigma, minDelta;
42 vector<int> nSeqsBreaks;
43 vector<int> nOTUsBreaks;
44 vector<double> singleLookUp;
45 vector<double> jointLookUp;
47 vector<string> seqNameVector;
49 vector<short> flowDataIntI;
50 vector<double> flowDataPrI;
51 map<string, int> nameMap;
53 vector<int> cumNumSeqs;
54 vector<int> nSeqsPerOTU;
55 vector<vector<int> > aaP; //tMaster->aanP: each row is a different otu / each col contains the sequence indices
56 vector<vector<int> > aaI; //tMaster->aanI: that are in each otu - can't differentiate between aaP and aaI
57 vector<int> seqNumber; //tMaster->anP: the sequence id number sorted by OTU
58 vector<int> seqIndex; //tMaster->anI; the index that corresponds to seqNumber
59 vector<double> dist; //adDist - distance of sequences to centroids
60 vector<short> change; //did the centroid sequence change? 0 = no; 1 = yes
61 vector<int> centroids; //the representative flowgram for each cluster m
62 vector<double> weight;
63 vector<double> singleTau; //tMaster->adTau: 1-D Tau vector (1xnumSeqs)
64 vector<short> uniqueFlowgrams;
65 vector<int> uniqueCount;
66 vector<int> mapSeqToUnique;
67 vector<int> mapUniqueToSeq;
68 vector<int> uniqueLengths;
70 vector<string> outputNames;
72 int numSeqs, numUniques, numOTUs, numFlowCells;
74 void getSingleLookUp();
75 void getJointLookUp();
78 double getProbIntensity(int);
79 float calcPairwiseDist(int, int);
80 void flowDistParentFork(string, int, int);
82 string createDistFile(int);
83 string createNamesFile();
84 string cluster(string, string);
86 void getOTUData(string);
87 void initPyroCluster();
90 void calcCentroidsDriver(int, int);
91 double getDistToCentroid(int, int, int);
92 double getNewWeights();
93 double getLikelihood();
94 void checkCentroids();
95 void calcNewDistances();
96 void calcNewDistancesParent(int, int);
97 void calcNewDistancesChild(int, int, vector<int>&, vector<int>&, vector<double>&);
101 void writeQualities(vector<int>);
102 void writeSequences(vector<int>);
103 void writeNames(vector<int>);
105 void writeClusters(vector<int>);
109 string flowDistMPI(int, int);
110 void calcNewDistancesChildMPI(int, int, vector<int>&);