};
buildConfigurationList = 1DEB919308733D9F0010E9CD /* Build configuration list for PBXProject "Mothur" */;
compatibilityVersion = "Xcode 3.0";
- developmentRegion = English;
hasScannedForEncodings = 1;
knownRegions = (
English,
//sets smallCol and smallRow, returns distance
double ClusterClassic::getSmallCell() {
try {
-
+
smallDist = aboveCutoff;
smallRow = 1;
smallCol = 0;
rabund->set(smallRow, rabund->get(smallRow)+rabund->get(smallCol));
rabund->set(smallCol, 0);
+ for (int i = smallCol+1; i < rabund->size(); i++) {
+ rabund->set((i-1), rabund->get(i));
+ }
+ rabund->resize((rabund->size()-1));
rabund->setLabel(toString(smallDist));
// cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
list->set(smallRow, list->get(smallRow)+','+list->get(smallCol));
list->set(smallCol, "");
+ for (int i = smallCol+1; i < list->size(); i++) {
+ list->set((i-1), list->get(i));
+ }
+ list->resize((list->size()-1));
list->setLabel(toString(smallDist));
// cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
/***********************************************************************/
void ClusterClassic::update(double& cutOFF){
try {
-//cout << "before update " << endl;
//print();
getSmallCell();
int r, c;
r = smallRow; c = smallCol;
- //because we only store lt, we need to make sure we grab the right location
- //if (smallRow < smallCol) { c = smallRow; r = smallCol; } //smallRow is really our column value
- //else { r = smallRow; c = smallCol; } //smallRow is the row value
-
- //reset rows smallest distance
- //rowSmallDists[r].dist = aboveCutoff; rowSmallDists[r].row = 0; rowSmallDists[r].col = 0;
- //rowSmallDists[c].dist = aboveCutoff; rowSmallDists[c].row = 0; rowSmallDists[c].col = 0;
-
- //if your rows smallest distance is from smallRow or smallCol, reset
- //for(int i=0;i<nseqs;i++){
- //if ((rowSmallDists[i].row == r) || (rowSmallDists[i].row == c) || (rowSmallDists[i].col == r) || (rowSmallDists[i].col == c)) {
- // rowSmallDists[i].dist = aboveCutoff; rowSmallDists[i].row = 0; rowSmallDists[i].col = 0;
- //}
- //}
-
+
for(int i=0;i<nseqs;i++){
if(i != r && i != c){
double distRow, distCol, newDist;
if (i > r) { distRow = dMatrix[i][r]; }
else { distRow = dMatrix[r][i]; }
-
+
if (i > c) { distCol = dMatrix[i][c]; dMatrix[i][c] = aboveCutoff; } //like removeCell
else { distCol = dMatrix[c][i]; dMatrix[c][i] = aboveCutoff; }
-
+
if(method == "furthest"){
newDist = max(distRow, distCol);
}
else if (method == "nearest"){
newDist = min(distRow, distCol);
}
-
+ //cout << "newDist = " << newDist << endl;
if (i > r) { dMatrix[i][r] = newDist; }
else { dMatrix[r][i] = newDist; }
- //if (newDist < rowSmallDists[i].dist) { rowSmallDists[i].dist = newDist; rowSmallDists[i].col = r; rowSmallDists[i].row = i; }
}
- //cout << "rowsmall = " << i << '\t' << rowSmallDists[i].dist << endl;
}
clusterBins();
clusterNames();
-
- //find new small for 2 rows we just merged
- //colDist temp(0,0,100.0);
- //rowSmallDists[r] = temp;
-
- //for (int i = 0; i < dMatrix[r].size(); i++) {
- // if (dMatrix[r][i] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[r][i]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; }
- //}
- //for (int i = dMatrix[r].size()+1; i < dMatrix.size(); i++) {
- // if (dMatrix[i][dMatrix[r].size()] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[i][dMatrix[r].size()]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; }
- //}
- //rowSmallDists[c] = temp;
- //for (int i = 0; i < dMatrix[c].size(); i++) {
- // if (dMatrix[c][i] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[c][i]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; }
- //}
- //for (int i = dMatrix[c].size()+1; i < dMatrix.size(); i++) {
- // if (dMatrix[i][dMatrix[c].size()] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[i][dMatrix[c].size()]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; }
- //}
+ //resize each row
+ for(int i=0;i<nseqs;i++){
+ for(int j=c+1;j<dMatrix[i].size();j++){
+ dMatrix[i][j-1]=dMatrix[i][j];
+ }
+ }
+
+ //resize each col
+ for(int i=c+1;i<nseqs;i++){
+ for(int j=0;j < dMatrix[i-1].size();j++){
+ dMatrix[i-1][j]=dMatrix[i][j];
+ }
+ }
- //cout << "after update " << endl;
- //print();
+ nseqs--;
+ dMatrix.pop_back();
+
}
catch(exception& e) {
m->errorOut(e, "ClusterClassic", "update");
int estart = time(NULL);
- while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 0)){
+ while ((cluster->getSmallDist() < cutoff) && (cluster->getNSeqs() > 1)){
if (m->control_pressed) { delete cluster; delete list; delete rabund; sabundFile.close();rabundFile.close();listFile.close(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear(); return 0; }
cluster->update(cutoff);
#include "deuniqueseqscommand.h"
#include "pairwiseseqscommand.h"
#include "clusterdoturcommand.h"
+#include "subsamplecommand.h"
/*******************************************************/
commands["fastq.info"] = "fastq.info";
commands["deunique.seqs"] = "deunique.seqs";
commands["cluster.classic"] = "cluster.classic";
+ commands["sub.sample"] = "sub.sample";
commands["pairwise.seqs"] = "MPIEnabled";
commands["pipeline.pds"] = "MPIEnabled";
commands["classify.seqs"] = "MPIEnabled";
else if(commandName == "deunique.seqs") { command = new DeUniqueSeqsCommand(optionString); }
else if(commandName == "pairwise.seqs") { command = new PairwiseSeqsCommand(optionString); }
else if(commandName == "cluster.classic") { command = new ClusterDoturCommand(optionString); }
+ else if(commandName == "sub.sample") { command = new SubSampleCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "deunique.seqs") { pipecommand = new DeUniqueSeqsCommand(optionString); }
else if(commandName == "pairwise.seqs") { pipecommand = new PairwiseSeqsCommand(optionString); }
else if(commandName == "cluster.classic") { pipecommand = new ClusterDoturCommand(optionString); }
+ else if(commandName == "sub.sample") { pipecommand = new SubSampleCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "deunique.seqs") { shellcommand = new DeUniqueSeqsCommand(); }
else if(commandName == "pairwise.seqs") { shellcommand = new PairwiseSeqsCommand(); }
else if(commandName == "cluster.classic") { shellcommand = new ClusterDoturCommand(); }
+ else if(commandName == "sub.sample") { shellcommand = new SubSampleCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
virtual void resize(int) = 0;
virtual int size() = 0;
virtual void print(ostream&) = 0;
+ virtual void clear() = 0;
void setLabel(string l) { label = l; }
string getLabel() { return label; }
//**********************************************************************************************************************
vector<string> GetSeqsCommand::getValidParameters(){
try {
- string Array[] = {"fasta","name", "group", "alignreport", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
+ string Array[] = {"fasta","name", "group", "qfile","alignreport", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
return myArray;
}
outputTypes["group"] = tempOutNames;
outputTypes["alignreport"] = tempOutNames;
outputTypes["list"] = tempOutNames;
+ outputTypes["qfile"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
else {
//valid paramters for this command
- string Array[] = {"fasta","name", "group", "alignreport", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
+ string Array[] = {"fasta","name", "group", "alignreport", "qfile", "accnos", "dups", "list","taxonomy","outputdir","inputdir"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
outputTypes["group"] = tempOutNames;
outputTypes["alignreport"] = tempOutNames;
outputTypes["list"] = tempOutNames;
+ outputTypes["qfile"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
}
+
+ it = parameters.find("qfile");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["qfile"] = inputDir + it->second; }
+ }
}
if (taxfile == "not open") { abort = true; }
else if (taxfile == "not found") { taxfile = ""; }
+ qualfile = validParameter.validFile(parameters, "qfile", true);
+ if (qualfile == "not open") { abort = true; }
+ else if (qualfile == "not found") { qualfile = ""; }
+
string usedDups = "true";
string temp = validParameter.validFile(parameters, "dups", false); if (temp == "not found") { temp = "false"; usedDups = ""; }
dups = m->isTrue(temp);
- if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+ if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
if ((usedDups != "") && (namefile == "")) { m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine(); abort = true; }
void GetSeqsCommand::help(){
try {
- m->mothurOut("The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy or alignreport file.\n");
+ m->mothurOut("The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n");
m->mothurOut("It outputs a file containing only the sequences in the .accnos file.\n");
- m->mothurOut("The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, alignreport and dups. You must provide accnos and at least one of the other parameters.\n");
+ m->mothurOut("The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups. You must provide accnos and at least one of the other parameters.\n");
m->mothurOut("The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n");
m->mothurOut("The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
m->mothurOut("Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
if (alignfile != "") { readAlign(); }
if (listfile != "") { readList(); }
if (taxfile != "") { readTax(); }
+ if (qualfile != "") { readQual(); }
if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
}
}
//**********************************************************************************************************************
+int GetSeqsCommand::readQual(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(qualfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" + m->getExtension(qualfile);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+
+ ifstream in;
+ m->openInputFile(qualfile, in);
+ string name;
+
+ bool wroteSomething = false;
+
+
+ while(!in.eof()){
+ string saveName = "";
+ string name = "";
+ string scores = "";
+
+ in >> name;
+
+ if (name.length() != 0) {
+ saveName = name.substr(1);
+ while (!in.eof()) {
+ char c = in.get();
+ if (c == 10 || c == 13){ break; }
+ else { name += c; }
+ }
+ m->gobble(in);
+ }
+
+ while(in){
+ char letter= in.get();
+ if(letter == '>'){ in.putback(letter); break; }
+ else{ scores += letter; }
+ }
+
+ m->gobble(in);
+
+ if (names.count(saveName) != 0) {
+ wroteSomething = true;
+
+ out << name << endl << scores;
+ }
+
+ m->gobble(in);
+ }
+ in.close();
+ out.close();
+
+
+ if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); }
+ outputNames.push_back(outputFileName); outputTypes["qfile"].push_back(outputFileName);
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetSeqsCommand", "readQual");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
int GetSeqsCommand::readList(){
try {
string thisOutputDir = outputDir;
private:
set<string> names;
vector<string> outputNames;
- string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir;
+ string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
bool abort, dups;
map<string, vector<string> > outputTypes;
int readAccnos();
int readList();
int readTax();
+ int readQual();
};
*Ts,double *Tinitial,double *counter1);
void meanvar(double *pmatrix,int *g,int *nr,int *nc,double *storage);
void start(double *Imatrix,int *g,int *nr,int *nc,double *testing,
- double storage[][9]);
+ double**);
int metastat_main (char*, int, int, double, int, double**, int);
}
// Initialize the matrices
- size = row*col;
-//printf("size = %d\n.", size);
- double matrix[row][col];
- double pmatrix[size],pmatrix2[size],permuted[size];
- double storage[row][9];
-//printf("here\n.", size);
- for (i=0;i<row;i++){
- for (j =0;j<9;j++){
- storage[i][j]=0;
- }
- }
-
+ size = row*col;
+
+ double ** matrix, * pmatrix, * permuted, ** storage;
+ matrix = malloc(row*sizeof(double *));
+ storage = malloc(row*sizeof(double *));
+ for (i = 0; i<row; i++){
+ matrix[i] = malloc(col*sizeof(double));
+ }
+ for (i = 0; i<row;i++){
+ storage[i] = malloc(9*sizeof(double));
+ }
+
+ pmatrix = (double *) malloc(size*sizeof(double *));
+ permuted = (double *) malloc(size*sizeof(double *));
+
+
+
for(i=0; i<row; i++){
for(j=0; j<col;j++){
matrix[i][j]=data[i][j];
permutes = &B;
ptt_size = B*row;
-
+
//changing ptt_size to row
- double permuted_ttests[row], pvalues[row], tinitial[row];
+ double * permuted_ttests, * pvalues, * tinitial;
+ permuted_ttests = (double *) malloc(size*sizeof(double *));
+ pvalues = (double *) malloc(size*sizeof(double *));
+ tinitial = (double *) malloc(size*sizeof(double *));
for(i=0;i<row;i++){
permuted_ttests[i]=0;}
tinitial[i]=0; }
// Find the initial values for the matrix.
+
start(pmatrix,gvalue,nr,nc,tinitial,storage);
// Start the calculations.
if ( (col==2) || ((g-1)<8) || ((col-g+1) < 8) ){
- double fish[row], fish2[row];
+ double * fish, *fish2;
+ fish = (double *) malloc(size*sizeof(double *));
+ fish2 = (double *) malloc(size*sizeof(double *));
+
for(i=0;i<row;i++){
fish[i]=0;
fish2[i]=0;}
// f21 f22
int *nr, *nc, *ldtabl, *work;
- int nrow=2, ncol=2, ldtable=2, workspace=100000;
+ int nrow=2, ncol=2, ldtable=2;
+ int workspace=(row*sizeof(double *)+size*sizeof(double *));
double *expect, *prc, *emin,*prt,*pre;
double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
emin=&emin1;
prt=&prt1;
pre=&pre1;
-
+
fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
if (*pre>.999999999){
}
}
else{
-
+printf("here before testp\n");
testp(permuted_ttests, permutes, permuted,pmatrix, nc, nr, gvalue,tinitial,pvalues);
// Checks to make sure the matrix isn't sparse.
- double sparse[row], sparse2[row];
+ double * sparse, * sparse2;
+ sparse = (double *) malloc(size*sizeof(double *));
+ sparse2 = (double *) malloc(size*sizeof(double *));
+
for(i=0;i<row;i++){
sparse[i]=0;
sparse2[i]=0;}
emin=&emin1;
prt=&prt1;
pre=&pre1;
-
+printf("here before fexact2\n");
fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
if (*pre>.999999999){
}
// Calculates the mean of counts (not normalized)
- double temp[row][2];
-
+ double ** temp;
+ temp = malloc(row*sizeof(double *));
+ for (i = 0; i<row; i++){
+ temp[i] = malloc(col*sizeof(double));
+ }
+printf("here\n");
for(j=0;j<row;j++){
for(i=0;i<2;i++){
temp[j][i]=0;
}
void start(double *Imatrix,int *g,int *nr,int *nc,double *initial,
- double storage[][9]){
+ double** storage){
int i, a = *nr;
a*=4;
-
+
double store[a], tool[a], C1[*nr][3], C2[*nr][3];
double nrows,ncols,gvalue, xbardiff=0, denom=0;
-
+
nrows = (double) *nr;
ncols = (double) *nc;
gvalue= (double) *g;
-
+
meanvar(Imatrix,g,nr,nc,store);
-
+
for(i=0;i<=a-1;i++){
tool[i]=store[i];
}
- for (i=0; i<*nr;i++){
+
+ for (i=0; i<*nr;i++){
C1[i][0]=tool[i]; //mean group 1
storage[i][0]=C1[i][0];
C1[i][1]=tool[i+*nr+*nr]; // var group 1
storage[i][1]=C1[i][1];
C1[i][2]=C1[i][1]/(gvalue-1);
storage[i][2]=sqrt(C1[i][2]);
-
+ printf("here2\n");
C2[i][0]=tool[i+*nr]; // mean group 2
- storage[i][4]=C2[i][0];
+ storage[i][4]=C2[i][0];
C2[i][1]=tool[i+*nr+*nr+*nr]; // var group 2
- storage[i][5]=C2[i][1];
- C2[i][2]=C2[i][1]/(ncols-gvalue+1);
+ storage[i][5]=C2[i][1];
+ C2[i][2]=C2[i][1]/(ncols-gvalue+1);
storage[i][6]=sqrt(C2[i][2]);
+
}
+
for (i=0; i<*nr; i++){
xbardiff = C1[i][0]-C2[i][0];
denom = sqrt(C1[i][2]+C2[i][2]);
initial[i]=fabs(xbardiff/denom);
- }
+ }
}
if(needToUpdate == 1){ updateStats(); }
return maxRank;
}
+/***********************************************************************/
+void OrderVector::clear(){
+ numBins = 0;
+ maxRank = 0;
+ numSeqs = 0;
+ data.clear();
+}
/***********************************************************************/
void push_back(int);
void resize(int);
int size();
+ void clear();
void print(string, ostream&);
vector<int>::iterator begin();
vector<int>::iterator end();
else {
seqName = seqName.substr(1);
}
-
+
//m->getline(qFile, line);
//istringstream qualStream(line);
//seqLength = qScores.size();
+ /*while(!in.eof()){
+ string saveName = "";
+ string name = "";
+ string scores = "";
+
+ in >> name;
+ //cout << name << endl;
+ if (name.length() != 0) {
+ saveName = name.substr(1);
+ while (!in.eof()) {
+ char c = in.get();
+ if (c == 10 || c == 13){ break; }
+ else { name += c; }
+ }
+ m->gobble(in);
+ }
+
+ while(in){
+ char letter= in.get();
+ if(letter == '>'){ in.putback(letter); break; }
+ else{ scores += letter; }
+ }
+
+ //istringstream iss (scores,istringstream::in);
+
+ //int count = 0; int tempScore;
+ //while (iss) { iss >> tempScore; count++; }
+ //cout << saveName << '\t' << count << endl;
+
+ m->gobble(in);
+ }*/
+
+
+
for(int i=0;i<seqLength;i++){
qFile >> score;
qScores.push_back(score);
}
}
+/**************************************************************************************************/
/**************************************************************************************************/
return data[index];
}
+/***********************************************************************/
+void RAbundVector::clear(){
+ numBins = 0;
+ maxRank = 0;
+ numSeqs = 0;
+ data.clear();
+
+}
/***********************************************************************/
void RAbundVector::push_back(int binSize){
int sum();
int sum(int);
int numNZ();
+ void clear();
vector<int>::reverse_iterator rbegin();
vector<int>::reverse_iterator rend();
//**********************************************************************************************************************
vector<string> RemoveSeqsCommand::getValidParameters(){
try {
- string Array[] = {"fasta","name", "group", "alignreport", "accnos", "list","taxonomy","outputdir","inputdir", "dups" };
+ string Array[] = {"fasta","name", "group", "alignreport", "accnos", "qfile","list","taxonomy","outputdir","inputdir", "dups" };
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
return myArray;
}
outputTypes["group"] = tempOutNames;
outputTypes["alignreport"] = tempOutNames;
outputTypes["list"] = tempOutNames;
+ outputTypes["qfile"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
else {
//valid paramters for this command
- string Array[] = {"fasta","name", "group", "alignreport", "accnos", "list","taxonomy","outputdir","inputdir", "dups" };
+ string Array[] = {"fasta","name", "group", "alignreport", "accnos", "qfile", "list","taxonomy","outputdir","inputdir", "dups" };
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
outputTypes["group"] = tempOutNames;
outputTypes["alignreport"] = tempOutNames;
outputTypes["list"] = tempOutNames;
+ outputTypes["qfile"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
}
+
+ it = parameters.find("qfile");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["qfile"] = inputDir + it->second; }
+ }
}
taxfile = validParameter.validFile(parameters, "taxonomy", true);
if (taxfile == "not open") { abort = true; }
else if (taxfile == "not found") { taxfile = ""; }
+
+ qualfile = validParameter.validFile(parameters, "qfile", true);
+ if (qualfile == "not open") { abort = true; }
+ else if (qualfile == "not found") { qualfile = ""; }
string usedDups = "true";
}
dups = m->isTrue(temp);
- if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, alignreport or list."); m->mothurOutEndLine(); abort = true; }
+ if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
if ((usedDups != "") && (namefile == "")) { m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine(); abort = true; }
}
void RemoveSeqsCommand::help(){
try {
- m->mothurOut("The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy or alignreport file.\n");
+ m->mothurOut("The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n");
m->mothurOut("It outputs a file containing the sequences NOT in the .accnos file.\n");
- m->mothurOut("The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, alignreport and dups. You must provide accnos and at least one of the file parameters.\n");
+ m->mothurOut("The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups. You must provide accnos and at least one of the file parameters.\n");
m->mothurOut("The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n");
m->mothurOut("The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
m->mothurOut("Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
if (alignfile != "") { readAlign(); }
if (listfile != "") { readList(); }
if (taxfile != "") { readTax(); }
+ if (qualfile != "") { readQual(); }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
out.close();
if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
- outputTypes["fasta"].push_back(outputFileName);
+ outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName);
return 0;
}
}
//**********************************************************************************************************************
+int RemoveSeqsCommand::readQual(){
+ try {
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(qualfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" + m->getExtension(qualfile);
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+
+ ifstream in;
+ m->openInputFile(qualfile, in);
+ string name;
+
+ bool wroteSomething = false;
+
+
+ while(!in.eof()){
+ string saveName = "";
+ string name = "";
+ string scores = "";
+
+ in >> name;
+
+ if (name.length() != 0) {
+ saveName = name.substr(1);
+ while (!in.eof()) {
+ char c = in.get();
+ if (c == 10 || c == 13){ break; }
+ else { name += c; }
+ }
+ m->gobble(in);
+ }
+
+ while(in){
+ char letter= in.get();
+ if(letter == '>'){ in.putback(letter); break; }
+ else{ scores += letter; }
+ }
+
+ m->gobble(in);
+
+ if (names.count(saveName) == 0) {
+ wroteSomething = true;
+
+ out << name << endl << scores;
+ }
+
+ m->gobble(in);
+ }
+ in.close();
+ out.close();
+
+
+ if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
+ outputNames.push_back(outputFileName); outputTypes["qfile"].push_back(outputFileName);
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RemoveSeqsCommand", "readQual");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
int RemoveSeqsCommand::readList(){
try {
string thisOutputDir = outputDir;
out.close();
if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
- outputTypes["list"].push_back(outputFileName);
+ outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
return 0;
out.close();
if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
- outputTypes["name"].push_back(outputFileName);
+ outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
return 0;
}
out.close();
if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
- outputTypes["group"].push_back(outputFileName);
+ outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
return 0;
}
out.close();
if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
- outputTypes["taxonomy"].push_back(outputFileName);
+ outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
return 0;
}
out.close();
if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
- outputTypes["alignreport"].push_back(outputFileName);
+ outputTypes["alignreport"].push_back(outputFileName); outputNames.push_back(outputFileName);
return 0;
private:
set<string> names;
- string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir;
+ string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, qualfile, outputDir;
bool abort, dups;
vector<string> outputNames;
map<string, vector<string> > outputTypes;
void readAccnos();
int readList();
int readTax();
+ int readQual();
};
}
output << endl;
}
-
+/***********************************************************************/
+void SAbundVector::clear(){
+ numBins = 0;
+ maxRank = 0;
+ numSeqs = 0;
+ data.clear();
+}
/***********************************************************************/
void SAbundVector::print(ostream& output){
try {
int sum();
void resize(int);
int size();
+ void clear();
void print(ostream&);
void print(string, ostream&);
}
}
+/***********************************************************************/
+void SharedOrderVector::clear(){
+ numBins = 0;
+ maxRank = 0;
+ numSeqs = 0;
+ data.clear();
+}
/***********************************************************************/
void SharedOrderVector::resize(int){
bool operator()(const individual& i1, const individual& i2) {
return (i1.abundance > i2.abundance);
}
+ individual() { group = ""; bin = 0; abundance = 0; }
};
struct individualFloat {
bool operator()(const individual& i1, const individual& i2) {
return (i1.abundance > i2.abundance);
}
+ individualFloat() { group = ""; bin = 0; abundance = 0.0; }
};
vector<individual>::iterator end();
void push_back(int, int, string); //OTU, abundance, group MUST CALL UPDATE STATS AFTER PUSHBACK!!!
void updateStats();
+ void clear();
int getNumBins();
int getNumSeqs();
numSeqs += (newBinSize - oldBinSize);
}
catch(exception& e) {
- m->errorOut(e, "SharedRAbundVector", "set");
+ m->errorOut(e, "SharedRAbundFloatVector", "set");
exit(1);
}
}
+/***********************************************************************/
+void SharedRAbundFloatVector::clear(){
+ numBins = 0;
+ maxRank = 0;
+ numSeqs = 0;
+ data.clear();
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+ lookup.clear();
+}
/***********************************************************************/
float SharedRAbundFloatVector::getAbundance(int index){
return data[index].abundance;
void push_back(float, string); //abundance, groupname
void pop_back();
void resize(int);
+ void clear();
int size();
void print(ostream&);
}
/***********************************************************************/
+void SharedRAbundVector::clear(){
+ numBins = 0;
+ maxRank = 0;
+ numSeqs = 0;
+ data.clear();
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+ lookup.clear();
+}
+/***********************************************************************/
+
void SharedRAbundVector::push_back(int binSize, string groupName){
try {
individual newGuy;
OrderVector SharedRAbundVector::getOrderVector(map<string,int>* nameMap = NULL) {
try {
OrderVector ov;
-
- for(int i=0;i<data.size();i++){
+ for(int i=0;i<numBins;i++){
for(int j=0;j<data[i].abundance;j++){
ov.push_back(i);
}
}
random_shuffle(ov.begin(), ov.end());
-
+
ov.setLabel(label);
+
return ov;
}
catch(exception& e) {
void pop_back();
void resize(int);
int size();
+ void clear();
vector<individual>::reverse_iterator rbegin();
vector<individual>::reverse_iterator rend();
exit(1);
}
}
+/***********************************************************************/
+
+void SharedSAbundVector::clear(){
+ numBins = 0;
+ maxRank = 0;
+ numSeqs = 0;
+ data.clear();
+}
+
/***********************************************************************/
OrderVector SharedSAbundVector::getOrderVector(map<string,int>* hold = NULL){
try {
void pop_back();
void resize(int);
int size();
+ void clear();
void print(ostream&);
//**********************************************************************************************************************
vector<string> SubSampleCommand::getValidParameters(){
try {
- string Array[] = {"groups","label","outputdir","inputdir"};
+ string Array[] = {"fasta", "group", "list","shared","rabund", "name","sabund","size","groups","label","outputdir","inputdir"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
return myArray;
}
outputTypes["list"] = tempOutNames;
outputTypes["rabund"] = tempOutNames;
outputTypes["sabund"] = tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["name"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "SubSampleCommand", "GetRelAbundCommand");
//**********************************************************************************************************************
vector<string> SubSampleCommand::getRequiredParameters(){
try {
- vector<string> myArray;
+ string Array[] = {"fasta","list","shared","rabund", "sabund","or"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
return myArray;
}
catch(exception& e) {
//**********************************************************************************************************************
vector<string> SubSampleCommand::getRequiredFiles(){
try {
- string Array[] = {"shared","list","rabund","sabund","or"};
- vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ vector<string> myArray;
return myArray;
}
catch(exception& e) {
else {
//valid paramters for this command
- string AlignArray[] = {"groups","label","outputdir","inputdir"};
- vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+ string Array[] = {"fasta", "group", "list","shared","rabund", "sabund","name","size","groups","label","outputdir","inputdir"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
map<string,string> parameters = parser.getParameters();
ValidParameters validParameter;
//check to make sure all parameters are valid for command
- for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ map<string,string>::iterator it;
+ for (it = parameters.begin(); it != parameters.end(); it++) {
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
outputTypes["list"] = tempOutNames;
outputTypes["rabund"] = tempOutNames;
outputTypes["sabund"] = tempOutNames;
+ outputTypes["fasta"] = tempOutNames;
+ outputTypes["name"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
- outputDir = "";
- outputDir += m->hasPath(globaldata->inputFileName); //if user entered a file with a path then preserve it
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("list");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["list"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("fasta");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fasta"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("group");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["group"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("sabund");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["sabund"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("rabund");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["rabund"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("name");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["name"] = inputDir + it->second; }
+ }
}
- //make sure the user has already run the read.otu command
- if ((globaldata->getSharedFile() == "") && (globaldata->getListFile() == "") && (globaldata->getRabundFile() == "") && (globaldata->getSabundFile() == "")) { m->mothurOut("You must read a list, sabund, rabund or shared file before you can use the sub.sample command."); m->mothurOutEndLine(); abort = true; }
-
+ //check for required parameters
+ listfile = validParameter.validFile(parameters, "list", true);
+ if (listfile == "not open") { listfile = ""; abort = true; }
+ else if (listfile == "not found") { listfile = ""; }
+
+ sabundfile = validParameter.validFile(parameters, "sabund", true);
+ if (sabundfile == "not open") { sabundfile = ""; abort = true; }
+ else if (sabundfile == "not found") { sabundfile = ""; }
+
+ rabundfile = validParameter.validFile(parameters, "rabund", true);
+ if (rabundfile == "not open") { rabundfile = ""; abort = true; }
+ else if (rabundfile == "not found") { rabundfile = ""; }
+
+ fastafile = validParameter.validFile(parameters, "fasta", true);
+ if (fastafile == "not open") { fastafile = ""; abort = true; }
+ else if (fastafile == "not found") { fastafile = ""; }
+
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else if (sharedfile == "not found") { sharedfile = ""; }
+
+ namefile = validParameter.validFile(parameters, "name", true);
+ if (namefile == "not open") { namefile = ""; abort = true; }
+ else if (namefile == "not found") { namefile = ""; }
+
+ groupfile = validParameter.validFile(parameters, "group", true);
+ if (groupfile == "not open") { groupfile = ""; abort = true; }
+ else if (groupfile == "not found") { groupfile = ""; }
+
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
label = validParameter.validFile(parameters, "label", false);
else { allLines = 1; }
}
- //if the user has not specified any labels use the ones from read.otu
- if (label == "") {
- allLines = globaldata->allLines;
- labels = globaldata->labels;
- }
-
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = ""; pickedGroups = false; }
else {
globaldata->Groups = Groups;
}
+ string temp = validParameter.validFile(parameters, "size", false); if (temp == "not found"){ temp = "0"; }
+ convert(temp, size);
+
+ if ((namefile != "") && (fastafile == "")) { m->mothurOut("You may only use a namefile with a fastafile."); m->mothurOutEndLine(); abort = true; }
+
+ if ((fastafile == "") && (listfile == "") && (sabundfile == "") && (rabundfile == "") && (sharedfile == "")) {
+ m->mothurOut("You must provide a fasta, list, sabund, rabund or shared file as an input file."); m->mothurOutEndLine(); abort = true; }
+
+ if (pickedGroups && ((groupfile == "") && (sharedfile == ""))) {
+ m->mothurOut("You cannot pick groups without a valid group file or shared file."); m->mothurOutEndLine(); abort = true; }
+
+ if ((groupfile != "") && ((fastafile == "") && (listfile == ""))) {
+ m->mothurOut("Group file only valid with listfile or fastafile."); m->mothurOutEndLine(); abort = true; }
+
}
}
void SubSampleCommand::help(){
try {
- m->mothurOut("The get.relabund command can only be executed after a successful read.otu command of a list and group or shared file.\n");
- m->mothurOut("The get.relabund command parameters are groups, scale and label. No parameters are required.\n");
+ m->mothurOut("The sub.sample command is designed to be used as a way to normalize your data, or create a smaller set from your original set.\n");
+ m->mothurOut("The sub.sample command parameters are fasta, name, list, group, rabund, sabund, shared, groups, size and label. You must provide a fasta, list, sabund, rabund or shared file as an input file.\n");
m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
- m->mothurOut("The scale parameter allows you to select what scale you would like to use. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n");
- m->mothurOut("The get.relabund command should be in the following format: get.relabund(groups=yourGroups, label=yourLabels).\n");
- m->mothurOut("Example get.relabund(groups=A-B-C, scale=averagegroup).\n");
+ m->mothurOut("The size parameter allows you indicate the size of your subsample.\n");
+ m->mothurOut("The size parameter is not set: with shared file size=number of seqs in smallest sample, with all other files, 10% of number of seqs.\n");
+ m->mothurOut("The sub.sample command should be in the following format: sub.sample(list=yourListFile, group=yourGroupFile, groups=yourGroups, label=yourLabels).\n");
+ m->mothurOut("Example sub.sample(list=abrecovery.fn.list, group=abrecovery.groups, groups=B-C, size=20).\n");
m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
- m->mothurOut("The get.relabund command outputs a .relabund file.\n");
+ m->mothurOut("The sub.sample command outputs a .subsample file.\n");
m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
}
//**********************************************************************************************************************
-SubSampleCommand::~SubSampleCommand(){
-}
+SubSampleCommand::~SubSampleCommand(){}
//**********************************************************************************************************************
if (abort == true) { return 0; }
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "subsample" + m->getExtension(globaldata->inputFileName);
- ofstream out;
- m->openOutputFile(outputFileName, out);
- out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-
- string format = globaldata->getFormat();
-
- read = new ReadOTUFile(globaldata->inputFileName);
- read->read(&*globaldata);
- input = globaldata->ginput;
-
- if (format == "sharedfile") {
- lookup = input->getSharedRAbundVectors();
- outputTypes["shared"].push_back(outputFileName);
- getSubSampleShared(lookup, out);
- }else if (format == "list") {
- list = globaldata->glist;
- outputTypes["list"].push_back(outputFileName);
- //getSubSamplesList();
- }else if (format == "rabund") {
- rabund = globaldata->rabund;
- outputTypes["rabund"].push_back(outputFileName);
- //getSubSamplesRabund();
-
- }else if (format == "sabund") {
- sabund = globaldata->sabund;
- outputTypes["sabund"].push_back(outputFileName);
- //getSubSamplesSabund();
- }
-
- out.close();
-
- //reset groups parameter
- delete input; globaldata->ginput = NULL;
- delete read;
-
- if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); return 0;}
-
+ if (sharedfile != "") { getSubSampleShared(); }
+ //if (listfile != "") { getSubSampleList(); }
+ //if (rabund != "") { getSubSampleRabund(); }
+ //if (sabundfile != "") { getSubSampleSabund(); }
+ //if (fastafile != "") { getSubSampleFasta(); }
+
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- m->mothurOut(outputFileName); m->mothurOutEndLine(); outputNames.push_back(outputFileName);
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
m->mothurOutEndLine();
return 0;
}
}
//**********************************************************************************************************************
-int SubSampleCommand::getSubSampleShared(vector<SharedRAbundVector*>& thislookup, ofstream& filename) {
+int SubSampleCommand::getSubSampleShared() {
try {
+
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "subsample" + m->getExtension(sharedfile);
+
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+ outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+ InputData* input = new InputData(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
set<string> processedLabels;
set<string> userLabels = labels;
- string lastLabel = lookup[0]->getLabel();
//as long as you are not at the end of the file or done wih the lines you want
while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { out.close(); return 0; }
if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- //process lookup
-
-
+ processShared(lookup, out);
processedLabels.insert(lookup[0]->getLabel());
userLabels.erase(lookup[0]->getLabel());
lookup = input->getSharedRAbundVectors(lastLabel);
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- //process lookup
+ processShared(lookup, out);
+
processedLabels.insert(lookup[0]->getLabel());
userLabels.erase(lookup[0]->getLabel());
}
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { out.close(); return 0; }
//output error messages about any remaining user labels
set<string>::iterator it;
m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
- //process lookup
-
-
+ processShared(lookup, out);
for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
}
- //reset groups parameter
- globaldata->Groups.clear();
+ out.close();
return 0;
exit(1);
}
}
-
-
+//**********************************************************************************************************************
+int SubSampleCommand::processShared(vector<SharedRAbundVector*>& thislookup, ofstream& out) {
+ try {
+
+ if (pickedGroups) { eliminateZeroOTUS(thislookup); }
+
+ if (size == 0) { //user has not set size, set size = smallest samples size
+ size = thislookup[0]->getNumSeqs();
+ for (int i = 1; i < thislookup.size(); i++) {
+ int thisSize = thislookup[i]->getNumSeqs();
+
+ if (thisSize < size) { size = thisSize; }
+ }
+ }
+
+ int numBins = thislookup[0]->getNumBins();
+ for (int i = 0; i < thislookup.size(); i++) {
+ int thisSize = thislookup[i]->getNumSeqs();
+
+ if (thisSize != size) {
+
+ string thisgroup = thislookup[i]->getGroup();
+
+ OrderVector* order = new OrderVector();
+ for(int p=0;p<numBins;p++){
+ for(int j=0;j<thislookup[i]->getAbundance(p);j++){
+ order->push_back(p);
+ }
+ }
+ random_shuffle(order->begin(), order->end());
+
+ SharedRAbundVector* temp = new SharedRAbundVector(numBins);
+ temp->setLabel(thislookup[i]->getLabel());
+ temp->setGroup(thislookup[i]->getGroup());
+
+ delete thislookup[i];
+ thislookup[i] = temp;
+
+
+ for (int j = 0; j < size; j++) {
+ //get random number to sample from order between 0 and thisSize-1.
+ int myrand = (int)((float)(rand()) / (RAND_MAX / (thisSize-1) + 1));
+
+ int bin = order->get(myrand);
+
+ int abund = thislookup[i]->getAbundance(bin);
+ thislookup[i]->set(bin, (abund+1), thisgroup);
+ }
+ delete order;
+ }
+ }
+
+ //subsampling may have created some otus with no sequences in them
+ eliminateZeroOTUS(thislookup);
+
+ for (int i = 0; i < thislookup.size(); i++) {
+ out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
+ thislookup[i]->print(out);
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
try {
//for each bin
for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
-
+
//look at each sharedRabund and make sure they are not all zero
bool allZero = true;
for (int j = 0; j < thislookup.size(); j++) {
}
}
}
-
+
for (int j = 0; j < thislookup.size(); j++) { delete thislookup[j]; }
-
+ thislookup.clear();
+
thislookup = newLookup;
return 0;
-
+
}
catch(exception& e) {
m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
//**********************************************************************************************************************
+
*/
#include "command.hpp"
-#include "inputdata.h"
-#include "readotu.h"
+#include "globaldata.hpp"
#include "sharedrabundvector.h"
+#include "listvector.hpp"
+#include "rabundvector.hpp"
+#include "inputdata.h"
-class GlobalData;
class SubSampleCommand : public Command {
private:
GlobalData* globaldata;
- ReadOTUFile* read;
- InputData* input;
- vector<SharedRAbundVector*> lookup;
- ListVector* list;
- RAbundVector* rabund;
- SAbundVector* sabund;
- bool abort, allLines, pickedGroups;
+ bool abort, pickedGroups, allLines;
+ string listfile, groupfile, sharedfile, rabundfile, sabundfile, fastafile, namefile;
set<string> labels; //holds labels to be used
string groups, label, outputDir;
vector<string> Groups, outputNames;
map<string, vector<string> > outputTypes;
+ int size;
int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
- int getSubSampleShared(vector<SharedRAbundVector*>&, ofstream&);
+ int getSubSampleShared();
+ int processShared(vector<SharedRAbundVector*>&, ofstream&);
+
};
#endif