From 5a4ac4f954c4b4445bcee272f1f8220ddcc9c1e4 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Mon, 30 Jul 2012 15:25:39 -0400 Subject: [PATCH] fixed subsample name file name issue. added count parameter to cluster command. added read to readColumn and readMatrix that uses a count table. added functions to countTable class to work with clusters reads. --- cluster.cpp | 2 +- clustercommand.cpp | 119 +++++++++++++++++------- clustercommand.h | 5 +- counttable.cpp | 36 +++++++- counttable.h | 4 + createdatabasecommand.cpp | 2 +- getcurrentcommand.cpp | 2 +- mgclustercommand.cpp | 4 +- mothurout.cpp | 7 +- nameassignment.cpp | 5 +- nameassignment.hpp | 2 +- optionparser.cpp | 2 +- readcolumn.cpp | 120 +++++++++++++++++++++++- readcolumn.h | 1 + readmatrix.hpp | 2 + readphylip.cpp | 187 ++++++++++++++++++++++++++++++++++++++ readphylip.h | 1 + screenseqscommand.cpp | 5 +- setcurrentcommand.cpp | 18 +++- setcurrentcommand.h | 2 +- sparsedistancematrix.cpp | 2 +- subsamplecommand.cpp | 4 +- summarycommand.cpp | 2 +- 23 files changed, 478 insertions(+), 56 deletions(-) diff --git a/cluster.cpp b/cluster.cpp index 2a3e751..18133a2 100644 --- a/cluster.cpp +++ b/cluster.cpp @@ -64,7 +64,7 @@ void Cluster::update(double& cutOFF){ nRowCells = dMatrix->seqVec[smallRow].size(); vector foundCol(nColCells, 0); - //cout << "small cell: " << smallRow << '\t' << smallCol << endl; + //cout << dMatrix->getNNodes() << " small cell: " << smallRow << '\t' << smallCol << endl; int search; bool changed; diff --git a/clustercommand.cpp b/clustercommand.cpp index 56ce8bb..19eaf85 100644 --- a/clustercommand.cpp +++ b/clustercommand.cpp @@ -13,12 +13,14 @@ #include "readmatrix.hpp" #include "clusterdoturcommand.h" + //********************************************************************************************************************** vector ClusterCommand::setParameters(){ try { CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip); - CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname); - CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName",false,false); parameters.push_back(pcolumn); + CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname); + CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pcount); + CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName",false,false); parameters.push_back(pcolumn); CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision); CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod); @@ -42,7 +44,7 @@ vector ClusterCommand::setParameters(){ string ClusterCommand::getHelpString(){ try { string helpString = ""; - helpString += "The cluster command parameter options are phylip, column, name, method, cuttoff, hard, precision, sim, showabund and timing. Phylip or column and name are required, unless you have a valid current file.\n"; + helpString += "The cluster command parameter options are phylip, column, name, count, method, cuttoff, hard, precision, sim, showabund and timing. Phylip or column and name are required, unless you have a valid current file.\n"; helpString += "The cluster command should be in the following format: \n"; helpString += "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n"; helpString += "The acceptable cluster methods are furthest, nearest, average and weighted. If no method is provided then average is assumed.\n"; @@ -169,6 +171,11 @@ ClusterCommand::ClusterCommand(string option) { if (namefile == "not open") { abort = true; } else if (namefile == "not found") { namefile = ""; } else { m->setNameFile(namefile); } + + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { abort = true; countfile = ""; } + else if (countfile == "not found") { countfile = ""; } + else { m->setCountTableFile(countfile); } if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these? @@ -187,16 +194,22 @@ ClusterCommand::ClusterCommand(string option) { else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a cluster command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; } if (columnfile != "") { - if (namefile == "") { + if ((namefile == "") && (countfile == "")){ namefile = m->getNameFile(); if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); } else { - m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine(); - abort = true; + countfile = m->getCountTableFile(); + if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine(); + abort = true; + } } } } + if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; } + //check for optional parameter and set defaults // ...at some point should added some additional type checking... //get user cutoff and precision or use defaults @@ -253,6 +266,7 @@ int ClusterCommand::execute(){ //run unique.seqs for deconvolute results string inputString = "phylip=" + distfile; if (namefile != "") { inputString += ", name=" + namefile; } + else if (countfile != "") { inputString += ", count=" + countfile; } inputString += ", precision=" + toString(precision); inputString += ", method=" + method; if (hard) { inputString += ", hard=T"; } @@ -281,22 +295,30 @@ int ClusterCommand::execute(){ read->setCutoff(cutoff); NameAssignment* nameMap = NULL; + CountTable* ct = NULL; if(namefile != ""){ nameMap = new NameAssignment(namefile); nameMap->readMap(); - } + read->read(nameMap); + }else if (countfile != "") { + ct = new CountTable(); + ct->readTable(countfile); + read->read(ct); + } - read->read(nameMap); list = read->getListVector(); matrix = read->getDMatrix(); - rabund = new RAbundVector(list->getRAbundVector()); + + if(countfile != "") { + rabund = new RAbundVector(); + createRabund(ct, list, rabund); //creates an rabund that includes the counts for the unique list + delete ct; + }else { rabund = new RAbundVector(list->getRAbundVector()); } delete read; if (m->control_pressed) { //clean up - delete list; delete matrix; delete rabund; - sabundFile.close();rabundFile.close();listFile.close(); - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - return 0; + delete list; delete matrix; delete rabund; if(countfile == ""){rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } + listFile.close(); m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } //create cluster @@ -311,15 +333,19 @@ int ClusterCommand::execute(){ string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund"); string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund"); - string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list"); + string listFileName = fileroot+ tag + "."; + if (countfile != "") { listFileName += "unique_"; } + listFileName += getOutputFileNameTag("list"); - m->openOutputFile(sabundFileName, sabundFile); - m->openOutputFile(rabundFileName, rabundFile); + if (countfile == "") { + m->openOutputFile(sabundFileName, sabundFile); + m->openOutputFile(rabundFileName, rabundFile); + outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); + outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); + + } m->openOutputFile(listFileName, listFile); - - outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName); - outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName); - outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); + outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName); time_t estart = time(NULL); @@ -337,9 +363,8 @@ int ClusterCommand::execute(){ if (m->control_pressed) { //clean up delete list; delete matrix; delete rabund; delete cluster; - sabundFile.close();rabundFile.close();listFile.close(); - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - return 0; + if(countfile == "") {rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } + listFile.close(); m->mothurRemove((fileroot+ tag + ".list")); outputTypes.clear(); return 0; } if (print_start && m->isTrue(timing)) { @@ -393,9 +418,10 @@ int ClusterCommand::execute(){ delete list; delete rabund; delete cluster; - - sabundFile.close(); - rabundFile.close(); + if (countfile == "") { + sabundFile.close(); + rabundFile.close(); + } listFile.close(); if (saveCutoff != cutoff) { @@ -454,14 +480,17 @@ void ClusterCommand::printData(string label){ print_start = true; loops = 0; start = time(NULL); - - oldRAbund.setLabel(label); - if (m->isTrue(showabund)) { - oldRAbund.getSAbundVector().print(cout); - } - oldRAbund.print(rabundFile); - oldRAbund.getSAbundVector().print(sabundFile); - + + if (countfile == "") { + oldRAbund.print(rabundFile); + oldRAbund.getSAbundVector().print(sabundFile); + } + + oldRAbund.setLabel(label); + if (m->isTrue(showabund)) { + oldRAbund.getSAbundVector().print(cout); + } + oldList.setLabel(label); oldList.print(listFile); } @@ -473,3 +502,25 @@ void ClusterCommand::printData(string label){ } //********************************************************************************************************************** + +int ClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){ + try { + rabund->setLabel(list->getLabel()); + for(int i = 0; i < list->getNumBins(); i++) { + if (m->control_pressed) { break; } + vector binNames; + string bin = list->get(i); + m->splitAtComma(bin, binNames); + int total = 0; + for (int j = 0; j < binNames.size(); j++) { total += ct->getNumSeqs(binNames[j]); } + rabund->push_back(total); + } + return 0; + } + catch(exception& e) { + m->errorOut(e, "ClusterCommand", "createRabund"); + exit(1); + } + +} +//********************************************************************************************************************** diff --git a/clustercommand.h b/clustercommand.h index e3dd214..f70bb32 100644 --- a/clustercommand.h +++ b/clustercommand.h @@ -15,6 +15,7 @@ #include "listvector.hpp" #include "cluster.hpp" #include "sparsedistancematrix.h" +#include "counttable.h" /* The cluster() command: The cluster command outputs a .list , .rabund and .sabund files. @@ -52,7 +53,7 @@ private: bool abort, hard, sim; - string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, format, distfile; + string method, fileroot, tag, outputDir, phylipfile, columnfile, namefile, format, distfile, countfile; double cutoff; string showabund, timing; int precision, length; @@ -64,6 +65,8 @@ private: void printData(string label); vector outputNames; + + int createRabund(CountTable*&, ListVector*&, RAbundVector*&); }; #endif diff --git a/counttable.cpp b/counttable.cpp index f632731..a664228 100644 --- a/counttable.cpp +++ b/counttable.cpp @@ -158,7 +158,41 @@ int CountTable::getNumSeqs(string seqName) { } } /************************************************************/ -//returns names of seqs +//returns unique index for sequence like get in NameAssignment +int CountTable::get(string seqName) { + try { + + map::iterator it = indexNameMap.find(seqName); + if (it == indexNameMap.end()) { + m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true; + }else { return it->second; } + + return -1; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "get"); + exit(1); + } +} +/************************************************************/ +//create ListVector from uniques +ListVector CountTable::getListVector() { + try { + ListVector list(indexNameMap.size()); + for (map::iterator it = indexNameMap.begin(); it != indexNameMap.end(); it++) { + if (m->control_pressed) { break; } + list.set(it->second, it->first); + } + return list; + } + catch(exception& e) { + m->errorOut(e, "CountTable", "getListVector"); + exit(1); + } +} + +/************************************************************/ +//returns the names of all unique sequences in file vector CountTable::getNamesOfSeqs() { try { vector names; diff --git a/counttable.h b/counttable.h index ffc0816..8baff30 100644 --- a/counttable.h +++ b/counttable.h @@ -37,6 +37,7 @@ #include "mothurout.h" +#include "listvector.hpp" class CountTable { @@ -60,6 +61,9 @@ class CountTable { int getGroupIndex(string); //returns index in getGroupCounts vector of specific group vector getNamesOfSeqs(); int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in. + int get(string); //returns unique sequence index for reading distance matrices like NameAssignment + ListVector getListVector(); + int size() { return indexNameMap.size(); } private: string filename; diff --git a/createdatabasecommand.cpp b/createdatabasecommand.cpp index 5919f0c..4331c29 100644 --- a/createdatabasecommand.cpp +++ b/createdatabasecommand.cpp @@ -396,7 +396,7 @@ int CreateDatabaseCommand::execute(){ //sanity check if (totalAbund != classifyOtuSizes[index]) { - m->mothurOut("[ERROR: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break; + m->mothurOut("[WARNING]: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true; break; } //output repSeq diff --git a/getcurrentcommand.cpp b/getcurrentcommand.cpp index c3b5f0a..86046b6 100644 --- a/getcurrentcommand.cpp +++ b/getcurrentcommand.cpp @@ -140,7 +140,7 @@ int GetCurrentCommand::execute(){ m->setFlowFile(""); }else if (types[i] == "biom") { m->setBiomFile(""); - }else if (types[i] == "counttable") { + }else if (types[i] == "count") { m->setCountTableFile(""); }else if (types[i] == "processors") { m->setProcessors("1"); diff --git a/mgclustercommand.cpp b/mgclustercommand.cpp index dd98b37..4774504 100644 --- a/mgclustercommand.cpp +++ b/mgclustercommand.cpp @@ -268,7 +268,9 @@ int MGClusterCommand::execute(){ string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund"); string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund"); - string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list"); + string listFileName = fileroot+ tag + "."; + if (countfile != "") { listFileName += "unique_"; } + listFileName += getOutputFileNameTag("list"); if (countfile == "") { m->openOutputFile(sabundFileName, sabundFile); diff --git a/mothurout.cpp b/mothurout.cpp index dfcf25b..9704464 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -43,7 +43,7 @@ set MothurOut::getCurrentTypes() { types.insert("tree"); types.insert("flow"); types.insert("biom"); - types.insert("counttable"); + types.insert("count"); types.insert("processors"); return types; @@ -79,7 +79,7 @@ void MothurOut::printCurrentFiles() { if (treefile != "") { mothurOut("tree=" + treefile); mothurOutEndLine(); } if (flowfile != "") { mothurOut("flow=" + flowfile); mothurOutEndLine(); } if (biomfile != "") { mothurOut("biom=" + biomfile); mothurOutEndLine(); } - if (counttablefile != "") { mothurOut("counttable=" + counttablefile); mothurOutEndLine(); } + if (counttablefile != "") { mothurOut("count=" + counttablefile); mothurOutEndLine(); } if (processors != "1") { mothurOut("processors=" + processors); mothurOutEndLine(); } } @@ -1052,6 +1052,9 @@ int MothurOut::openInputFile(string fileName, ifstream& fileHandle){ int MothurOut::renameFile(string oldName, string newName){ try { + + if (oldName == newName) { return 0; } + ifstream inTest; int exist = openInputFile(newName, inTest, ""); inTest.close(); diff --git a/nameassignment.cpp b/nameassignment.cpp index 126c4e0..1e42111 100644 --- a/nameassignment.cpp +++ b/nameassignment.cpp @@ -9,14 +9,15 @@ NameAssignment::NameAssignment(string nameMapFile){ m->openInputFile(nameMapFile, fileHandle); } - +//********************************************************************************************************************** +NameAssignment::NameAssignment(){ m = MothurOut::getInstance(); } //********************************************************************************************************************** void NameAssignment::readMap(){ try{ string firstCol, secondCol, skip; // int index = 0; - + map::iterator itData; int rowIndex = 0; diff --git a/nameassignment.hpp b/nameassignment.hpp index 758a080..87f9777 100644 --- a/nameassignment.hpp +++ b/nameassignment.hpp @@ -7,7 +7,7 @@ class NameAssignment : public map { public: NameAssignment(string); - NameAssignment(){}; + NameAssignment(); void readMap(); ListVector getListVector(); int get(string); diff --git a/optionparser.cpp b/optionparser.cpp index e3850e5..a6d5c0a 100644 --- a/optionparser.cpp +++ b/optionparser.cpp @@ -124,7 +124,7 @@ map OptionParser::getParameters() { it->second = m->getTaxonomyFile(); }else if (it->first == "biom") { it->second = m->getBiomFile(); - }else if (it->first == "counttable") { + }else if (it->first == "count") { it->second = m->getCountTableFile(); }else { m->mothurOut("[ERROR]: mothur does not save a current file for " + it->first); m->mothurOutEndLine(); diff --git a/readcolumn.cpp b/readcolumn.cpp index 665dcab..83d11c9 100644 --- a/readcolumn.cpp +++ b/readcolumn.cpp @@ -54,7 +54,7 @@ int ReadColumnMatrix::read(NameAssignment* nameMap){ map::iterator itA = nameMap->find(firstName); map::iterator itB = nameMap->find(secondName); - + if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } @@ -144,6 +144,124 @@ int ReadColumnMatrix::read(NameAssignment* nameMap){ exit(1); } } +/***********************************************************************/ + +int ReadColumnMatrix::read(CountTable* countTable){ + try { + + string firstName, secondName; + float distance; + int nseqs = countTable->size(); + + DMatrix->resize(nseqs); + list = new ListVector(countTable->getListVector()); + + Progress* reading = new Progress("Reading matrix: ", nseqs * nseqs); + + int lt = 1; + int refRow = 0; //we'll keep track of one cell - Cell(refRow,refCol) - and see if it's transpose + int refCol = 0; //shows up later - Cell(refCol,refRow). If it does, then its a square matrix + + //need to see if this is a square or a triangular matrix... + + while(fileHandle && lt == 1){ //let's assume it's a triangular matrix... + + + fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + int itA = countTable->get(firstName); + int itB = countTable->get(secondName); + + if (m->control_pressed) { exit(1); } + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(distance < cutoff && itA != itB){ + if(itA > itB){ + PDistCell value(itA, distance); + + + if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... + refRow = itA; + refCol = itB; + DMatrix->addCell(itB, value); + } + else if(refRow == itA && refCol == itB){ + lt = 0; + } + else{ + DMatrix->addCell(itB, value); + } + } + else if(itA < itB){ + PDistCell value(itB, distance); + + if(refRow == refCol){ // in other words, if we haven't loaded refRow and refCol... + refRow = itA; + refCol = itB; + DMatrix->addCell(itA, value); + } + else if(refRow == itB && refCol == itA){ + lt = 0; + } + else{ + DMatrix->addCell(itA, value); + } + } + reading->update(itA * nseqs); + } + m->gobble(fileHandle); + } + + if(lt == 0){ // oops, it was square + + fileHandle.close(); //let's start over + DMatrix->clear(); //let's start over + + m->openInputFile(distFile, fileHandle); //let's start over + + while(fileHandle){ + fileHandle >> firstName >> secondName >> distance; + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + int itA = countTable->get(firstName); + int itB = countTable->get(secondName); + + + if (m->control_pressed) { exit(1); } + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(distance < cutoff && itA > itB){ + PDistCell value(itA, distance); + DMatrix->addCell(itB, value); + reading->update(itA * nseqs); + } + + m->gobble(fileHandle); + } + } + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + reading->finish(); + fileHandle.close(); + + list->setLabel("0"); + + return 1; + + } + catch(exception& e) { + m->errorOut(e, "ReadColumnMatrix", "read"); + exit(1); + } +} /***********************************************************************/ ReadColumnMatrix::~ReadColumnMatrix(){} diff --git a/readcolumn.h b/readcolumn.h index 415bef2..ff95aff 100644 --- a/readcolumn.h +++ b/readcolumn.h @@ -20,6 +20,7 @@ public: ReadColumnMatrix(string, bool); ~ReadColumnMatrix(); int read(NameAssignment*); + int read(CountTable*); private: ifstream fileHandle; string distFile; diff --git a/readmatrix.hpp b/readmatrix.hpp index 5ef8afa..90d5b43 100644 --- a/readmatrix.hpp +++ b/readmatrix.hpp @@ -13,6 +13,7 @@ #include "mothur.h" #include "listvector.hpp" #include "nameassignment.hpp" +#include "counttable.h" #include "sparsedistancematrix.h" class SparseMatrix; @@ -23,6 +24,7 @@ public: ReadMatrix(){ DMatrix = new SparseDistanceMatrix(); m = MothurOut::getInstance(); } virtual ~ReadMatrix() {} virtual int read(NameAssignment*){ return 1; } + virtual int read(CountTable*){ return 1; } void setCutoff(float c) { cutoff = c; } SparseDistanceMatrix* getDMatrix() { return DMatrix; } diff --git a/readphylip.cpp b/readphylip.cpp index d256f92..6de23e8 100644 --- a/readphylip.cpp +++ b/readphylip.cpp @@ -200,7 +200,194 @@ int ReadPhylipMatrix::read(NameAssignment* nameMap){ exit(1); } } +/***********************************************************************/ + +int ReadPhylipMatrix::read(CountTable* countTable){ + try { + + float distance; + int square, nseqs; + string name; + vector matrixNames; + + string numTest; + fileHandle >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + + matrixNames.push_back(name); + + if(countTable == NULL){ + list = new ListVector(nseqs); + list->set(0, name); + } + else{ list = new ListVector(countTable->getListVector()); } + + if (m->control_pressed) { return 0; } + + char d; + while((d=fileHandle.get()) != EOF){ + + if(isalnum(d)){ + square = 1; + fileHandle.putback(d); + for(int i=0;i> distance; + } + break; + } + if(d == '\n'){ + square = 0; + break; + } + } + + Progress* reading; + DMatrix->resize(nseqs); + + if(square == 0){ + + reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2); + + int index = 0; + + for(int i=1;icontrol_pressed) { fileHandle.close(); delete reading; return 0; } + + fileHandle >> name; + matrixNames.push_back(name); + + + //there's A LOT of repeated code throughout this method... + if(countTable == NULL){ + list->set(i, name); + + for(int j=0;jcontrol_pressed) { delete reading; fileHandle.close(); return 0; } + + fileHandle >> distance; + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(distance < cutoff){ + PDistCell value(i, distance); + DMatrix->addCell(j, value); + } + index++; + reading->update(index); + } + + } + else{ + for(int j=0;j> distance; + + if (m->control_pressed) { delete reading; fileHandle.close(); return 0; } + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(distance < cutoff){ + int iIndex = countTable->get(matrixNames[i]); + int jIndex = countTable->get(matrixNames[j]); + + if (m->control_pressed) { delete reading; fileHandle.close(); return 0; } + if (iIndex < jIndex) { + PDistCell value(jIndex, distance); + DMatrix->addCell(iIndex, value); + }else { + PDistCell value(iIndex, distance); + DMatrix->addCell(jIndex, value); + } + } + index++; + reading->update(index); + } + } + } + } + else{ + + reading = new Progress("Reading matrix: ", nseqs * nseqs); + + int index = nseqs; + + for(int i=1;i> name; + matrixNames.push_back(name); + + + + if(countTable == NULL){ + list->set(i, name); + for(int j=0;j> distance; + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(distance < cutoff && j < i){ + PDistCell value(i, distance); + DMatrix->addCell(j, value); + } + index++; + reading->update(index); + } + + } + else{ + for(int j=0;j> distance; + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(distance < cutoff && j < i){ + int iIndex = countTable->get(matrixNames[i]); + int jIndex = countTable->get(matrixNames[j]); + + if (m->control_pressed) { delete reading; fileHandle.close(); return 0; } + if (iIndex < jIndex) { + PDistCell value(jIndex, distance); + DMatrix->addCell(iIndex, value); + }else { + PDistCell value(iIndex, distance); + DMatrix->addCell(jIndex, value); + + } + } + index++; + reading->update(index); + } + } + } + } + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + reading->finish(); + delete reading; + + list->setLabel("0"); + fileHandle.close(); + + + return 1; + + } + catch(exception& e) { + m->errorOut(e, "ReadPhylipMatrix", "read"); + exit(1); + } +} /***********************************************************************/ ReadPhylipMatrix::~ReadPhylipMatrix(){} /***********************************************************************/ diff --git a/readphylip.h b/readphylip.h index 244cc47..c772032 100644 --- a/readphylip.h +++ b/readphylip.h @@ -20,6 +20,7 @@ public: ReadPhylipMatrix(string, bool); ~ReadPhylipMatrix(); int read(NameAssignment*); + int read(CountTable*); private: ifstream fileHandle; string distFile; diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index 7f385d3..6a9a613 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -48,8 +48,8 @@ string ScreenSeqsCommand::getHelpString(){ helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, alignreport, taxonomy, optimize, criteria and processors.\n"; helpString += "The fasta parameter is required.\n"; helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n"; - helpString += "The start parameter .... The default is -1.\n"; - helpString += "The end parameter .... The default is -1.\n"; + helpString += "The start parameter is used to set a position the \"good\" sequences must start by. The default is -1.\n"; + helpString += "The end parameter is used to set a position the \"good\" sequences must end after. The default is -1.\n"; helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n"; helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n"; helpString += "The minlength parameter allows you to set and minimum sequence length. \n"; @@ -1236,7 +1236,6 @@ int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& int length = MPIPos[start+i+1] - MPIPos[start+i]; char* buf4 = new char[length]; - memcpy(buf4, outputString.c_str(), length); MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); diff --git a/setcurrentcommand.cpp b/setcurrentcommand.cpp index 9673540..27d7b92 100644 --- a/setcurrentcommand.cpp +++ b/setcurrentcommand.cpp @@ -32,6 +32,7 @@ vector SetCurrentCommand::setParameters(){ CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptree); CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pshared); CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup); + CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount); CommandParameter prelabund("relabund", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(prelabund); CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(psff); CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos); @@ -53,7 +54,7 @@ string SetCurrentCommand::getHelpString(){ try { string helpString = ""; helpString += "The set.current command allows you to set the current files saved by mothur.\n"; - helpString += "The set.current command parameters are: clear, phylip, column, list, rabund, sabund, name, group, design, order, tree, shared, ordergroup, relabund, fasta, qfile, sff, oligos, accnos, biom and taxonomy.\n"; + helpString += "The set.current command parameters are: clear, phylip, column, list, rabund, sabund, name, group, design, order, tree, shared, ordergroup, relabund, fasta, qfile, sff, oligos, accnos, biom, count and taxonomy.\n"; helpString += "The clear paramter is used to indicate which file types you would like to clear values for, multiple types can be separated by dashes.\n"; helpString += "The set.current command should be in the following format: \n"; helpString += "set.current(fasta=yourFastaFile) or set.current(fasta=amazon.fasta, clear=name-accnos)\n"; @@ -210,6 +211,14 @@ SetCurrentCommand::SetCurrentCommand(string option) { if (path == "") { parameters["ordergroup"] = inputDir + it->second; } } + it = parameters.find("count"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["count"] = inputDir + it->second; } + } + it = parameters.find("relabund"); //user has given a template file if(it != parameters.end()){ @@ -318,6 +327,11 @@ SetCurrentCommand::SetCurrentCommand(string option) { if (groupfile == "not open") { m->mothurOut("Ignoring: " + parameters["group"]); m->mothurOutEndLine(); groupfile = ""; } else if (groupfile == "not found") { groupfile = ""; } if (groupfile != "") { m->setGroupFile(groupfile); } + + countfile = validParameter.validFile(parameters, "count", true); + if (countfile == "not open") { m->mothurOut("Ignoring: " + parameters["count"]); m->mothurOutEndLine(); countfile = ""; } + else if (countfile == "not found") { countfile = ""; } + if (countfile != "") { m->setCountTableFile(countfile); } designfile = validParameter.validFile(parameters, "design", true); if (designfile == "not open") { m->mothurOut("Ignoring: " + parameters["design"]); m->mothurOutEndLine(); designfile = ""; } @@ -460,6 +474,8 @@ int SetCurrentCommand::execute(){ m->setFlowFile(""); }else if (types[i] == "biom") { m->setBiomFile(""); + }else if (types[i] == "count") { + m->setCountTableFile(""); }else if (types[i] == "processors") { m->setProcessors("1"); }else if (types[i] == "all") { diff --git a/setcurrentcommand.h b/setcurrentcommand.h index becc4d4..e741399 100644 --- a/setcurrentcommand.h +++ b/setcurrentcommand.h @@ -39,7 +39,7 @@ private: string clearTypes; vector types; - string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile; + string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile, countfile; string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile; diff --git a/sparsedistancematrix.cpp b/sparsedistancematrix.cpp index 8035e13..7d50523 100644 --- a/sparsedistancematrix.cpp +++ b/sparsedistancematrix.cpp @@ -118,7 +118,7 @@ ull SparseDistanceMatrix::getSmallestCell(ull& row){ } } - random_shuffle(mins.begin(), mins.end()); //randomize the order of the iterators in the mins vector + //random_shuffle(mins.begin(), mins.end()); //randomize the order of the iterators in the mins vector row = mins[0].row; ull col = mins[0].col; diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp index 3544101..f9cb1e6 100644 --- a/subsamplecommand.cpp +++ b/subsamplecommand.cpp @@ -569,8 +569,8 @@ int SubSampleCommand::getSubSampleFasta() { delete uniqueCommand; m->mothurCalling = false; - m->renameFile(filenames["name"][0], outputNameFileName); - m->renameFile(filenames["fasta"][0], outputFileName); + m->renameFile(filenames["name"][0], outputNameFileName); + m->renameFile(filenames["fasta"][0], outputFileName); outputTypes["name"].push_back(outputNameFileName); outputNames.push_back(outputNameFileName); diff --git a/summarycommand.cpp b/summarycommand.cpp index 9a6aac8..7a167a6 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -777,7 +777,7 @@ vector SummaryCommand::createGroupSummaryFile(int numLines, int numCols, } temp.close(); - //m->mothurRemove(outputNames[i]); + m->mothurRemove(outputNames[i]); } -- 2.39.2