From: pschloss Date: Fri, 31 Dec 2010 16:35:11 +0000 (+0000) Subject: added shhh.seqs command X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=bf83aa09a52403a2faf1a1f82489d1e0c8c32283 added shhh.seqs command --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index febae68..72be773 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -27,6 +27,10 @@ 7E85BD1D11EB5E9B00FD37C0 /* qualityscores.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = qualityscores.cpp; sourceTree = ""; }; 7E962A40121F76B1007464B5 /* invsimpson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = invsimpson.h; sourceTree = ""; }; 7E962A41121F76B1007464B5 /* invsimpson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = invsimpson.cpp; sourceTree = ""; }; + 7E99CA1312C8B0970041246B /* shhhercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = shhhercommand.h; sourceTree = ""; }; + 7E99CA1412C8B0970041246B /* shhhercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = shhhercommand.cpp; sourceTree = ""; }; + 7E99D77C12CBAD780041246B /* untitled.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = untitled.h; path = ../untitled.h; sourceTree = SOURCE_ROOT; }; + 7E99D77D12CBAD780041246B /* untitled.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = untitled.cpp; path = ../untitled.cpp; sourceTree = SOURCE_ROOT; }; 7EA299BA11E384940022D8D3 /* sensspeccommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sensspeccommand.h; sourceTree = ""; }; 7EA299BB11E384940022D8D3 /* sensspeccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sensspeccommand.cpp; sourceTree = ""; }; 7EC61A0911BEA6AF00F668D9 /* weightedlinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = weightedlinkage.cpp; sourceTree = ""; }; @@ -577,6 +581,8 @@ 08FB7794FE84155DC02AAC07 /* mothur */ = { isa = PBXGroup; children = ( + 7E99D77C12CBAD780041246B /* untitled.h */, + 7E99D77D12CBAD780041246B /* untitled.cpp */, A768D95D1248FEAA008AB1D0 /* mothur */, A7639F8D1175DF35008F5578 /* makefile */, A7DA1FF0113FECD400BF472F /* alignment.cpp */, @@ -675,6 +681,8 @@ A7CB593B11402EF90010EB83 /* calculators */ = { isa = PBXGroup; children = ( + 7E99CA1312C8B0970041246B /* shhhercommand.h */, + 7E99CA1412C8B0970041246B /* shhhercommand.cpp */, A7DA200B113FECD400BF472F /* calculator.cpp */, A7DA200C113FECD400BF472F /* calculator.h */, A7DA1FED113FECD400BF472F /* ace.h */, diff --git a/commandfactory.cpp b/commandfactory.cpp index 5d9679a..7d47491 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -105,6 +105,7 @@ #include "consensusseqscommand.h" #include "trimflowscommand.h" #include "corraxescommand.h" +#include "shhhercommand.h" /*******************************************************/ @@ -229,6 +230,7 @@ CommandFactory::CommandFactory(){ commands["screen.seqs"] = "MPIEnabled"; commands["summary.seqs"] = "MPIEnabled"; commands["cluster.split"] = "MPIEnabled"; + commands["shhh.seqs"] = "MPIEnabled"; commands["sens.spec"] = "sens.spec"; commands["seq.error"] = "seq.error"; commands["quit"] = "MPIEnabled"; @@ -313,6 +315,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "reverse.seqs") { command = new ReverseSeqsCommand(optionString); } else if(commandName == "trim.seqs") { command = new TrimSeqsCommand(optionString); } else if(commandName == "trim.flows") { command = new TrimFlowsCommand(optionString); } + else if(commandName == "shhh.seqs") { command = new ShhherCommand(optionString); } else if(commandName == "chimera.seqs") { command = new ChimeraSeqsCommand(optionString); } else if(commandName == "list.seqs") { command = new ListSeqsCommand(optionString); } else if(commandName == "get.seqs") { command = new GetSeqsCommand(optionString); } @@ -437,6 +440,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "reverse.seqs") { pipecommand = new ReverseSeqsCommand(optionString); } else if(commandName == "trim.seqs") { pipecommand = new TrimSeqsCommand(optionString); } else if(commandName == "trim.flows") { pipecommand = new TrimFlowsCommand(optionString); } + else if(commandName == "shhh.seqs") { pipecommand = new ShhherCommand(optionString); } else if(commandName == "chimera.seqs") { pipecommand = new ChimeraSeqsCommand(optionString); } else if(commandName == "list.seqs") { pipecommand = new ListSeqsCommand(optionString); } else if(commandName == "get.seqs") { pipecommand = new GetSeqsCommand(optionString); } @@ -548,6 +552,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "reverse.seqs") { shellcommand = new ReverseSeqsCommand(); } else if(commandName == "trim.seqs") { shellcommand = new TrimSeqsCommand(); } else if(commandName == "trim.flows") { shellcommand = new TrimFlowsCommand(); } + else if(commandName == "shhh.seqs") { shellcommand = new ShhherCommand(); } else if(commandName == "chimera.seqs") { shellcommand = new ChimeraSeqsCommand(); } else if(commandName == "list.seqs") { shellcommand = new ListSeqsCommand(); } else if(commandName == "get.seqs") { shellcommand = new GetSeqsCommand(); } diff --git a/distancecommand.cpp b/distancecommand.cpp index 4cb9841..3711a1d 100644 --- a/distancecommand.cpp +++ b/distancecommand.cpp @@ -314,7 +314,7 @@ int DistanceCommand::execute(){ //delete filename; if (pid == 0) { //you are the root process - + //do your part string outputMyPart; diff --git a/flowdata.cpp b/flowdata.cpp index f40fdd6..a172a03 100644 --- a/flowdata.cpp +++ b/flowdata.cpp @@ -15,43 +15,49 @@ FlowData::FlowData(){} //********************************************************************************************************************** -FlowData::FlowData(ifstream& flowFile, float signal, float noise, int maxHomoP){ +FlowData::~FlowData(){ /* do nothing */ } + +//********************************************************************************************************************** + +FlowData::FlowData(int numFlows, float signal, float noise, int maxHomoP) : + numFlows(numFlows), signalIntensity(signal), noiseIntensity(noise), maxHomoP(maxHomoP){ try { m = MothurOut::getInstance(); + flowData.assign(numFlows, 0); baseFlow = "TACG"; seqName = ""; - numFlows = 0; locationString = ""; - seqLength = 0; + } + catch(exception& e) { + m->errorOut(e, "FlowData", "FlowData"); + exit(1); + } + +} + +//********************************************************************************************************************** + +bool FlowData::getNext(ifstream& flowFile){ + + try { string lengthString; string flowString; - flowFile >> seqName >> locationString >> lengthString >> flowString; - - convert(lengthString.substr(7), seqLength); - convert(flowString.substr(9), numFlows); - - flowData.resize(numFlows); + flowFile >> seqName >> endFlow; + for(int i=0;i> flowData[i]; } - if (seqName == "") { - m->mothurOut("Error reading quality file, name blank at position, " + toString(flowFile.tellg())); - m->mothurOutEndLine(); - } - else{ - seqName = seqName.substr(1); - for(int i=0;i> flowData[i]; } - } - - findDeadSpot(signal, noise, maxHomoP); + updateEndFlow(); translateFlow(); m->gobble(flowFile); + if(flowFile){ return 1; } + else { return 0; } } catch(exception& e) { - m->errorOut(e, "FlowData", "FlowData"); + m->errorOut(e, "FlowData", "getNext"); exit(1); } @@ -59,19 +65,20 @@ FlowData::FlowData(ifstream& flowFile, float signal, float noise, int maxHomoP){ //********************************************************************************************************************** -void FlowData::findDeadSpot(float signalIntensity, float noiseIntensity, int maxHomoP){ +void FlowData::updateEndFlow(){ try{ int currLength = 0; float maxIntensity = (float) maxHomoP + 0.49; - deadSpot = 0; - while(currLength < seqLength + 4){ + int deadSpot = 0; + + while(deadSpot < endFlow){ int signal = 0; int noise = 0; for(int i=0;i<4;i++){ - float intensity = flowData[i + 4 * deadSpot]; + float intensity = flowData[i + deadSpot]; if(intensity > signalIntensity){ signal++; @@ -79,18 +86,16 @@ void FlowData::findDeadSpot(float signalIntensity, float noiseIntensity, int max noise++; } } - currLength += (int)(intensity+0.5); } if(noise > 0 || signal == 0){ break; } - deadSpot++; + deadSpot += 4; } - deadSpot *= 4; - seqLength = currLength; - + endFlow = deadSpot; + } catch(exception& e) { m->errorOut(e, "FlowData", "findDeadSpot"); @@ -104,7 +109,7 @@ void FlowData::translateFlow(){ try{ sequence = ""; - for(int i=0;i maxFlows){ deadSpot = maxFlows; } + maxFlows = mF; + if(endFlow > maxFlows){ endFlow = maxFlows; } } catch(exception& e) { @@ -148,8 +153,8 @@ bool FlowData::hasMinFlows(int minFlows){ try{ bool pastMin = 0; - - if(deadSpot >= minFlows){ pastMin = 1; } + if(endFlow >= minFlows){ pastMin = 1; } + return pastMin; } catch(exception& e) { @@ -173,25 +178,12 @@ Sequence FlowData::getSequence(){ //********************************************************************************************************************** -int FlowData::getSeqLength(){ - - try{ - return seqLength; - } - catch(exception& e) { - m->errorOut(e, "FlowData", "getSeqLength"); - exit(1); - } -} - -//********************************************************************************************************************** - void FlowData::printFlows(ofstream& outFlowFile){ try{ - // outFlowFile << '>' << seqName << locationString << " length=" << seqLength << " numflows=" << maxFlows << endl; - outFlowFile << seqName << ' ' << deadSpot << ' ' << setprecision(2); +// outFlowFile << '>' << seqName << locationString << " length=" << seqLength << " numflows=" << maxFlows << endl; + outFlowFile << seqName << ' ' << endFlow << ' ' << setprecision(2); - for(int i=0;i' << seqName << locationString << " length=" << seqLength << " numflows=" << maxFlows << endl; - - outFlowFile << seqName << '|' << scrapCode << ' ' << deadSpot << ' ' << setprecision(2); + outFlowFile << seqName << '|' << scrapCode << ' ' << endFlow << ' ' << setprecision(2); for(int i=0;i' << seqName << endl; - outFASTA << sequence << endl; - - } - catch(exception& e) { - m->errorOut(e, "FlowData", "printFlows"); - exit(1); - } -} - - //********************************************************************************************************************** diff --git a/flowdata.h b/flowdata.h index dafeab3..09426f9 100644 --- a/flowdata.h +++ b/flowdata.h @@ -18,24 +18,25 @@ class FlowData { public: FlowData(); - FlowData(ifstream&, float, float, int); - ~FlowData(){}; + FlowData(int, float, float, int); + ~FlowData(); + bool getNext(ifstream&); + void capFlows(int); bool hasMinFlows(int); Sequence getSequence(); - - int getSeqLength(); + void printFlows(ofstream&); void printFlows(ofstream&, string); - void printFASTA(ofstream&); private: MothurOut* m; - - void findDeadSpot(float, float, int); - void translateFlow(); + void updateEndFlow(); + void translateFlow(); + float signalIntensity, noiseIntensity; + int maxHomoP; string seqName, locationString, sequence, baseFlow; - int numFlows, seqLength, deadSpot; + int numFlows, maxFlows, endFlow; vector flowData; }; diff --git a/makefile b/makefile index aa20f8c..5eb8daa 100644 --- a/makefile +++ b/makefile @@ -58,7 +58,7 @@ ifeq ($(strip $(USEREADLINE)),yes) -lncurses endif -USEMPI ?= no +USEMPI ?= yes ifeq ($(strip $(USEMPI)),yes) CXX = mpic++ @@ -93,7 +93,7 @@ mothur : $(OBJECTS) strip mothur install : mothur - cp mothur ../Release/mothur +# cp mothur ../Release/mothur %.o : %.c %.h $(COMPILE.c) $(OUTPUT_OPTION) $< diff --git a/mothur b/mothur index 83751b3..9feafa6 100755 Binary files a/mothur and b/mothur differ diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index 1a9b73f..3562dcc 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -331,8 +331,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){ if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; } //print common header - if (sfftxt) { printCommonHeader(outSfftxt, header); } - + if (sfftxt) { printCommonHeader(outSfftxt, header); } + if (flow) { outFlow << header.numFlowsPerRead << endl; } + //read through the sff file while (!in.eof()) { @@ -779,8 +780,12 @@ int SffInfoCommand::printQualSeqData(ofstream& out, seqRead& read, Header& heade int SffInfoCommand::printFlowSeqData(ofstream& out, seqRead& read, Header& header) { try { if(header.clipQualRight > header.clipQualLeft){ - out << ">" << header.name << " xy=" << header.xy << " length=" << (header.clipQualRight-header.clipQualLeft) << " numflows=" << read.flowgram.size() << endl; - for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << (read.flowgram[i]/(float)100) << ' '; } + + int rightIndex = 0; + for (int i = 0; i < header.clipQualRight; i++) { rightIndex += read.flowIndex[i]; } + + out << header.name << ' ' << rightIndex; + for (int i = 0; i < read.flowgram.size(); i++) { out << setprecision(2) << ' ' << (read.flowgram[i]/(float)100); } out << endl; } diff --git a/shhhercommand.cpp b/shhhercommand.cpp new file mode 100644 index 0000000..24301a2 --- /dev/null +++ b/shhhercommand.cpp @@ -0,0 +1,2122 @@ +/* + * shhher.cpp + * Mothur + * + * Created by Pat Schloss on 12/27/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "shhhercommand.h" + +#include "readcolumn.h" +#include "readmatrix.hpp" +#include "rabundvector.hpp" +#include "sabundvector.hpp" +#include "listvector.hpp" +#include "cluster.hpp" +#include "sparsematrix.hpp" + +//********************************************************************************************************************** + +#define NUMBINS 1000 +#define HOMOPS 10 +#define MIN_COUNT 0.1 +#define MIN_WEIGHT 0.1 +#define MIN_TAU 0.0001 +#define MIN_ITER 10 + +//********************************************************************************************************************** + +vector ShhherCommand::getValidParameters(){ + try { + string Array[] = { + "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors" + }; + + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "getValidParameters"); + exit(1); + } +} + +//********************************************************************************************************************** + +ShhherCommand::ShhherCommand(){ + try { + abort = true; + + //initialize outputTypes + vector tempOutNames; + outputTypes["pn.dist"] = tempOutNames; + + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "ShhherCommand"); + exit(1); + } +} + +//********************************************************************************************************************** + +vector ShhherCommand::getRequiredParameters(){ + try { + string Array[] = {"flow"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + return myArray; + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "getRequiredParameters"); + exit(1); + } +} + +//********************************************************************************************************************** + +vector ShhherCommand::getRequiredFiles(){ + try { + vector myArray; + return myArray; + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "getRequiredFiles"); + exit(1); + } +} + +//********************************************************************************************************************** + +ShhherCommand::ShhherCommand(string option) { + try { + +#ifdef USE_MPI + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &ncpus); + + if(pid == 0){ +#endif + + + abort = false; + + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + + //valid paramters for this command + string AlignArray[] = { + "file", "flow", "lookup", "cutoff", "sigma", "outputdir","inputdir", "processors" + }; + + vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //initialize outputTypes + vector tempOutNames; + outputTypes["pn.dist"] = tempOutNames; + // outputTypes["fasta"] = tempOutNames; + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("flow"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["flow"] = inputDir + it->second; } + } + + it = parameters.find("lookup"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["lookup"] = inputDir + it->second; } + } + + it = parameters.find("file"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["file"] = inputDir + it->second; } + } + } + + + //check for required parameters + flowFileName = validParameter.validFile(parameters, "flow", true); + flowFilesFileName = validParameter.validFile(parameters, "file", true); + if (flowFileName == "not found" && flowFilesFileName == "not found") { + m->mothurOut("values for either flow or file must be provided for the shhh.seqs command."); + m->mothurOutEndLine(); + abort = true; + } + else if (flowFileName == "not open" || flowFilesFileName == "not open") { abort = true; } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = ""; + outputDir += m->hasPath(flowFileName); //if user entered a file with a path then preserve it + } + + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + string temp; + temp = validParameter.validFile(parameters, "lookup", true); + if (temp == "not found") { lookupFileName = "LookUp_E123.pat"; } + else if(temp == "not open") { abort = true; } + else { lookupFileName = temp; } + + temp = validParameter.validFile(parameters, "processors", false);if (temp == "not found"){ temp = "1"; } + convert(temp, processors); + + temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.01"; } + convert(temp, cutoff); + + temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){ temp = "0.000001"; } + convert(temp, minDelta); + + temp = validParameter.validFile(parameters, "maxiter", false); if (temp == "not found"){ temp = "1000"; } + convert(temp, maxIters); + + temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found") { temp = "60"; } + convert(temp, sigma); + + globaldata = GlobalData::getInstance(); + } + +#ifdef USE_MPI + } +#endif + + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "ShhherCommand"); + exit(1); + } +} + +//********************************************************************************************************************** + +ShhherCommand::~ShhherCommand(){} + +//********************************************************************************************************************** + +void ShhherCommand::help(){ + try { + m->mothurOut("The shhher command reads a file containing flowgrams and creates a file of corrected sequences.\n"); + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "help"); + exit(1); + } +} + +//********************************************************************************************************************** +#ifdef USE_MPI +int ShhherCommand::execute(){ + try { + int tag = 1976; + MPI_Status status; + + double begClock = clock(); + unsigned long int begTime = time(NULL); + + cout.setf(ios::fixed, ios::floatfield); + cout.setf(ios::showpoint); + cout << setprecision(2); + + + if(pid == 0){ + + for(int i=1;i flowFileVector; + if(flowFilesFileName != "not found"){ + string fName; + + ifstream flowFilesFile; + m->openInputFile(flowFilesFileName, flowFilesFile); + while(flowFilesFile){ + flowFilesFile >> fName; + flowFileVector.push_back(fName); + m->gobble(flowFilesFile); + } + } + else{ + flowFileVector.push_back(flowFileName); + } + int numFiles = flowFileVector.size(); + + for(int i=1;i>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl; + cout << "Reading flowgrams..." << endl; + getFlowData(); + cout << "Identifying unique flowgrams..." << endl; + getUniques(); + + cout << "Calculating distances between flowgrams..." << endl; + char fileName[1024]; + strcpy(fileName, flowFileName.c_str()); + + for(int i=1;iappendFiles((distFileName + ".temp." + toString(i)), distFileName); + remove((distFileName + ".temp." + toString(i)).c_str()); + } + + string namesFileName = createNamesFile(); + + cout << "\nClustering flowgrams..." << endl; + string listFileName = cluster(distFileName, namesFileName); + // string listFileName = "PriestPot_C7.pn.list"; + // string listFileName = "test.mock_rep3.v69.pn.list"; + + getOTUData(listFileName); + initPyroCluster(); + + for(int i=1;i minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){ + + double cycClock = clock(); + unsigned long int cycTime = time(NULL); + fill(); + + int total = singleTau.size(); + for(int i=1;i tempCentroids(numOTUs, 0); + vector tempChange(numOTUs, 0); + + MPI_Recv(&tempCentroids[0], numOTUs, MPI_INT, i, tag, MPI_COMM_WORLD, &status); + MPI_Recv(&tempChange[0], numOTUs, MPI_SHORT, i, tag, MPI_COMM_WORLD, &status); + + for(int j=otuStart;j childSeqIndex(childTotal, 0); + vector childSingleTau(childTotal, 0); + vector childDist(numSeqs * numOTUs, 0); + vector otuIndex(childTotal, 0); + + MPI_Recv(&childSeqIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status); + MPI_Recv(&childSingleTau[0], childTotal, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status); + MPI_Recv(&childDist[0], numOTUs * numSeqs, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status); + MPI_Recv(&otuIndex[0], childTotal, MPI_INT, i, tag, MPI_COMM_WORLD, &status); + + int oldTotal = total; + total += childTotal; + singleTau.resize(total, 0); + seqIndex.resize(total, 0); + seqNumber.resize(total, 0); + + int childIndex = 0; + + for(int j=oldTotal;j minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){ + int live = 1; + for(int i=1;i otuCounts(numOTUs, 0); + for(int i=0;i otuIndex(total, 0); + calcNewDistancesChildMPI(startSeq, endSeq, otuIndex); + total = otuIndex.size(); + + MPI_Send(&total, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); + MPI_Send(&seqIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD); + MPI_Send(&singleTau[0], total, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD); + MPI_Send(&dist[0], numOTUs * numSeqs, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD); + MPI_Send(&otuIndex[0], total, MPI_INT, 0, tag, MPI_COMM_WORLD); + + MPI_Recv(&live, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); + } + } + } + + MPI_Barrier(MPI_COMM_WORLD); + return 0; + + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "execute"); + exit(1); + } +} + +/**************************************************************************************************/ + +string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){ + try{ + ostringstream outStream; + outStream.setf(ios::fixed, ios::floatfield); + outStream.setf(ios::dec, ios::basefield); + outStream.setf(ios::showpoint); + outStream.precision(6); + + int begTime = time(NULL); + double begClock = clock(); + + for(int i=startSeq;ierrorOut(e, "ShhherCommand", "flowDistParentFork"); + exit(1); + } +} + +#else +//********************************************************************************************************************** + +int ShhherCommand::execute(){ + try { + if (abort == true) { return 0; } + + cout.setf(ios::fixed, ios::floatfield); + cout.setf(ios::showpoint); + + getSingleLookUp(); + getJointLookUp(); + + + vector flowFileVector; + if(flowFilesFileName != "not found"){ + string fName; + + ifstream flowFilesFile; + m->openInputFile(flowFilesFileName, flowFilesFile); + while(flowFilesFile){ + flowFilesFile >> fName; + flowFileVector.push_back(fName); + m->gobble(flowFilesFile); + } + } + else{ + flowFileVector.push_back(flowFileName); + } + int numFiles = flowFileVector.size(); + + + for(int i=0;i>>>>\tProcessing " << flowFileName << " (file " << i+1 << " of " << numFiles << ")\t<<<<<" << endl; + cout << "Reading flowgrams..." << endl; + getFlowData(); + cout << "Identifying unique flowgrams..." << endl; + getUniques(); + + + cout << "Calculating distances between flowgrams..." << endl; + string distFileName = createDistFile(processors); + string namesFileName = createNamesFile(); + + cout << "\nClustering flowgrams..." << endl; + string listFileName = cluster(distFileName, namesFileName); + getOTUData(listFileName); + + initPyroCluster(); + + double maxDelta = 0; + int iter = 0; + + double begClock = clock(); + unsigned long int begTime = time(NULL); + + cout << "\nDenoising flowgrams..." << endl; + cout << "iter\tmaxDelta\tnLL\t\tcycletime" << endl; + + while((maxIters == 0 && maxDelta > minDelta) || iter < MIN_ITER || (maxDelta > minDelta && iter < maxIters)){ + + double cycClock = clock(); + unsigned long int cycTime = time(NULL); + fill(); + + calcCentroids(); + + maxDelta = getNewWeights(); + double nLL = getLikelihood(); + checkCentroids(); + + calcNewDistances(); + + iter++; + + cout << iter << '\t' << maxDelta << '\t' << setprecision(2) << nLL << '\t' << time(NULL) - cycTime << '\t' << setprecision(6) << (clock() - cycClock)/(double)CLOCKS_PER_SEC << endl; + } + + cout << "\nFinalizing..." << endl; + fill(); + setOTUs(); + + vector otuCounts(numOTUs, 0); + for(int i=0;ierrorOut(e, "ShhherCommand", "execute"); + exit(1); + } +} +#endif +/**************************************************************************************************/ + +void ShhherCommand::getFlowData(){ + try{ + ifstream flowFile; + m->openInputFile(flowFileName, flowFile); + + string seqName; + + int currentNumFlowCells; + + float intensity; + + flowFile >> numFlowCells; + int index = 0;//pcluster + while(!flowFile.eof()){ + flowFile >> seqName >> currentNumFlowCells; + lengths.push_back(currentNumFlowCells); + + seqNameVector.push_back(seqName); + nameMap[seqName] = index++;//pcluster + + for(int i=0;i> intensity; + if(intensity > 9.99) { intensity = 9.99; } + int intI = int(100 * intensity + 0.0001); + flowDataIntI.push_back(intI); + } + m->gobble(flowFile); + } + flowFile.close(); + + numSeqs = seqNameVector.size(); + + for(int i=0;ierrorOut(e, "ShhherCommand", "getFlowData"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::getSingleLookUp(){ + try{ + // these are the -log probabilities that a signal corresponds to a particular homopolymer length + singleLookUp.assign(HOMOPS * NUMBINS, 0); + + int index = 0; + ifstream lookUpFile; + m->openInputFile(lookupFileName, lookUpFile); + + for(int i=0;i> logFracFreq; + + for(int j=0;j> singleLookUp[index]; + index++; + } + } + lookUpFile.close(); + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "getSingleLookUp"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::getJointLookUp(){ + try{ + + // the most likely joint probability (-log) that two intenities have the same polymer length + jointLookUp.resize(NUMBINS * NUMBINS, 0); + + for(int i=0;ierrorOut(e, "ShhherCommand", "getJointLookUp"); + exit(1); + } +} + +/**************************************************************************************************/ + +float ShhherCommand::getProbIntensity(int intIntensity){ + try{ + float minNegLogProb = 10000000000; + + for(int i=0;ierrorOut(e, "ShhherCommand", "getProbIntensity"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::getUniques(){ + try{ + + + numUniques = 0; + uniqueFlowgrams.assign(numFlowCells * numSeqs, -1); + uniqueCount.assign(numSeqs, 0); // anWeights + uniqueLengths.assign(numSeqs, 0); + mapSeqToUnique.assign(numSeqs, -1); + mapUniqueToSeq.assign(numSeqs, -1); + + vector uniqueFlowDataIntI(numFlowCells * numSeqs, -1); + + for(int i=0;i current(numFlowCells); + for(int j=0;jerrorOut(e, "ShhherCommand", "getUniques"); + exit(1); + } +} + +/**************************************************************************************************/ + +float ShhherCommand::calcPairwiseDist(int seqA, int seqB){ + try{ + int minLength = lengths[mapSeqToUnique[seqA]]; + if(lengths[seqB] < minLength){ minLength = lengths[mapSeqToUnique[seqB]]; } + + int ANumFlowCells = seqA * numFlowCells; + int BNumFlowCells = seqB * numFlowCells; + + float dist = 0; + + for(int i=0;ierrorOut(e, "ShhherCommand", "calcPairwiseDist"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::flowDistParentFork(string distFileName, int startSeq, int stopSeq){ + try{ + + ostringstream outStream; + outStream.setf(ios::fixed, ios::floatfield); + outStream.setf(ios::dec, ios::basefield); + outStream.setf(ios::showpoint); + outStream.precision(6); + + int begTime = time(NULL); + double begClock = clock(); + + for(int i=startSeq;imothurOut(toString(i) + "\t" + toString(time(NULL) - begTime)); + m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC)); + m->mothurOutEndLine(); + } + } + m->mothurOut(toString(stopSeq-1) + "\t" + toString(time(NULL) - begTime)); + m->mothurOut("\t" + toString((clock()-begClock)/CLOCKS_PER_SEC)); + m->mothurOutEndLine(); + + ofstream distFile((distFileName + "temp." + toString(pid)).c_str()); + distFile << outStream.str(); + distFile.close(); + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "flowDistParentFork"); + exit(1); + } +} + +/**************************************************************************************************/ + +string ShhherCommand::createDistFile(int processors){ + try{ + string fDistFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.dist"; + + unsigned long int begTime = time(NULL); + double begClock = clock(); + + vector start; + vector end; + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1) { flowDistParentFork(fDistFileName, 0, numUniques); } + else{ //you have multiple processors + + if (numSeqs < processors){ processors = 1; } + + vector start(processors, 0); + vector end(processors, 0); + + for (int i = 0; i < processors; i++) { + start[i] = int(sqrt(float(i)/float(processors)) * numUniques); + end[i] = int(sqrt(float(i+1)/float(processors)) * numUniques); + } + + int process = 1; + vector processIDs; + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDs.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + flowDistParentFork(fDistFileName + toString(getpid()) + ".temp", start[process], end[process]); + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); + perror(" : "); + for (int i=0;iappendFiles((fDistFileName + toString(processIDs[i]) + ".temp"), fDistFileName); + remove((fDistFileName + toString(processIDs[i]) + ".temp").c_str()); + } + + } + +#else + flowDistParentFork(fDistFileName, 0, numUniques); +#endif + + m->mothurOutEndLine(); + + cout << "Total time: " << (time(NULL) - begTime) << "\t" << (clock() - begClock)/CLOCKS_PER_SEC << endl;; + + return fDistFileName; + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "createDistFile"); + exit(1); + } + +} + +/**************************************************************************************************/ + +string ShhherCommand::createNamesFile(){ + try{ + + vector duplicateNames(numUniques, ""); + for(int i=0;iopenOutputFile(nameFileName, nameFile); + + for(int i=0;ierrorOut(e, "ShhherCommand", "createNamesFile"); + exit(1); + } +} + +//********************************************************************************************************************** + +string ShhherCommand::cluster(string distFileName, string namesFileName){ + try { + + SparseMatrix* matrix; + ListVector* list; + RAbundVector* rabund; + + globaldata->setNameFile(namesFileName); + globaldata->setColumnFile(distFileName); + globaldata->setFormat("column"); + + ReadMatrix* read = new ReadColumnMatrix(distFileName); + read->setCutoff(cutoff); + + NameAssignment* clusterNameMap = new NameAssignment(namesFileName); + clusterNameMap->readMap(); + read->read(clusterNameMap); + + list = read->getListVector(); + matrix = read->getMatrix(); + + delete read; + delete clusterNameMap; + + rabund = new RAbundVector(list->getRAbundVector()); + + Cluster* cluster = new CompleteLinkage(rabund, list, matrix, cutoff, "furthest"); + string tag = cluster->getTag(); + + double clusterCutoff = cutoff; + while (matrix->getSmallDist() <= clusterCutoff && matrix->getNNodes() > 0){ + cluster->update(clusterCutoff); + float dist = matrix->getSmallDist(); + } + + list->setLabel(toString(cutoff)); + + string listFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.list"; + ofstream listFile; + m->openOutputFile(listFileName, listFile); + list->print(listFile); + listFile.close(); + + delete matrix; delete cluster; delete rabund; delete list; + + return listFileName; + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "cluster"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::getOTUData(string listFileName){ + try { + + ifstream listFile; + m->openInputFile(listFileName, listFile); + string label; + + listFile >> label >> numOTUs; + + otuData.assign(numSeqs, 0); + cumNumSeqs.assign(numOTUs, 0); + nSeqsPerOTU.assign(numOTUs, 0); + aaP.resize(numOTUs); + + string singleOTU = ""; + + for(int i=0;i> singleOTU; + + istringstream otuString(singleOTU); + + while(otuString){ + + string seqName = ""; + + for(int j=0;j::iterator nmIt = nameMap.find(seqName); + int index = nmIt->second; + + nameMap.erase(nmIt); + + otuData[index] = i; + nSeqsPerOTU[i]++; + aaP[i].push_back(index); + seqName = ""; + } + } + + map::iterator nmIt = nameMap.find(seqName); + + int index = nmIt->second; + nameMap.erase(nmIt); + + otuData[index] = i; + nSeqsPerOTU[i]++; + aaP[i].push_back(index); + + otuString.get(); + } + + sort(aaP[i].begin(), aaP[i].end()); + for(int j=0;jerrorOut(e, "ShhherCommand", "getOTUData"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::initPyroCluster(){ + try{ + dist.assign(numSeqs * numOTUs, 0); + change.assign(numOTUs, 1); + centroids.assign(numOTUs, -1); + weight.assign(numOTUs, 0); + singleTau.assign(numSeqs, 1.0); + + nSeqsBreaks.assign(processors+1, 0); + nOTUsBreaks.assign(processors+1, 0); + + nSeqsBreaks[0] = 0; + for(int i=0;ierrorOut(e, "ShhherCommand", "initPyroCluster"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::fill(){ + try { + int index = 0; + for(int i=0;ierrorOut(e, "ShhherCommand", "fill"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::calcCentroids(){ + try{ + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + + if(processors == 1) { + calcCentroidsDriver(0, numOTUs); + } + else{ //you have multiple processors + if (numOTUs < processors){ processors = 1; } + + int process = 1; + vector processIDs; + + //loop through and create all the processes you want + while (process != processors) { + int pid = vfork(); + + if (pid > 0) { + processIDs.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + calcCentroidsDriver(nOTUsBreaks[process], nOTUsBreaks[process+1]); + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); + perror(" : "); + for (int i=0;ierrorOut(e, "ShhherCommand", "calcCentroidsDriver"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::calcCentroidsDriver(int start, int finish){ + + //this function gets the most likely homopolymer length at a flow position for a group of sequences + //within an otu + + try{ + + for(int i=start;i 0 && count > MIN_COUNT){ + vector adF(nSeqsPerOTU[i]); + vector anL(nSeqsPerOTU[i]); + + for(int j=0;jerrorOut(e, "ShhherCommand", "calcCentroidsDriver"); + exit(1); + } +} + +/**************************************************************************************************/ + +double ShhherCommand::getDistToCentroid(int cent, int flow, int length){ + try{ + + int flowAValue = cent * numFlowCells; + int flowBValue = flow * numFlowCells; + + double dist = 0; + + for(int i=0;ierrorOut(e, "ShhherCommand", "getDistToCentroid"); + exit(1); + } +} + +/**************************************************************************************************/ + +double ShhherCommand::getNewWeights(){ + try{ + + double maxChange = 0; + + for(int i=0;i maxChange){ maxChange = difference; } + } + return maxChange; + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "getNewWeights"); + exit(1); + } +} + +/**************************************************************************************************/ + +double ShhherCommand::getLikelihood(){ + + try{ + + vector P(numSeqs, 0); + int effNumOTUs = 0; + + for(int i=0;i MIN_WEIGHT){ + effNumOTUs++; + } + } + + for(int i=0;ierrorOut(e, "ShhherCommand", "getNewWeights"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::checkCentroids(){ + try{ + vector unique(numOTUs, 1); + + for(int i=0;ierrorOut(e, "ShhherCommand", "checkCentroids"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::calcNewDistances(){ + try{ + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + + if(processors == 1) { + calcNewDistancesParent(0, numSeqs); + } + else{ //you have multiple processors + if (numSeqs < processors){ processors = 1; } + + vector > child_otuIndex(processors); + vector > child_seqIndex(processors); + vector > child_singleTau(processors); + vector totals(processors); + + int process = 1; + vector processIDs; + + //loop through and create all the processes you want + while (process != processors) { + int pid = vfork(); + + if (pid > 0) { + processIDs.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + calcNewDistancesChild(nSeqsBreaks[process], nSeqsBreaks[process+1], child_otuIndex[process], child_seqIndex[process], child_singleTau[process]); + totals[process] = child_otuIndex[process].size(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes. Error code: " + toString(pid)); m->mothurOutEndLine(); + perror(" : "); + for (int i=0;ierrorOut(e, "ShhherCommand", "calcNewDistances"); + exit(1); + } +} + +/**************************************************************************************************/ +#ifdef USE_MPI +void ShhherCommand::calcNewDistancesChildMPI(int startSeq, int stopSeq, vector& otuIndex){ + + try{ + vector newTau(numOTUs,0); + vector norms(numSeqs, 0); + otuIndex.resize(0); + seqIndex.resize(0); + singleTau.resize(0); + + + + for(int i=startSeq;i MIN_WEIGHT && change[j] == 1){ + dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]); + } + if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){ + offset = dist[indexOffset + j]; + } + } + + for(int j=0;j MIN_WEIGHT){ + newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j]; + norms[i] += newTau[j]; + } + else{ + newTau[j] = 0.0; + } + } + + for(int j=0;j MIN_TAU){ + otuIndex.push_back(j); + seqIndex.push_back(i); + singleTau.push_back(newTau[j]); + } + } + + } + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "calcNewDistancesChildMPI"); + exit(1); + } +} +#endif +/**************************************************************************************************/ + +void ShhherCommand::calcNewDistancesChild(int startSeq, int stopSeq, vector& child_otuIndex, vector& child_seqIndex, vector& child_singleTau){ + + try{ + vector newTau(numOTUs,0); + vector norms(numSeqs, 0); + child_otuIndex.resize(0); + child_seqIndex.resize(0); + child_singleTau.resize(0); + + for(int i=startSeq;i MIN_WEIGHT && change[j] == 1){ + dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]); + } + + if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){ + offset = dist[indexOffset + j]; + } + } + + for(int j=0;j MIN_WEIGHT){ + newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j]; + norms[i] += newTau[j]; + } + else{ + newTau[j] = 0.0; + } + } + + for(int j=0;j MIN_TAU){ + child_otuIndex.push_back(j); + child_seqIndex.push_back(i); + child_singleTau.push_back(newTau[j]); + } + } + } + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "calcNewDistancesChild"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::calcNewDistancesParent(int startSeq, int stopSeq){ + + try{ + + int total = 0; + vector newTau(numOTUs,0); + vector norms(numSeqs, 0); + nSeqsPerOTU.assign(numOTUs, 0); + + for(int i=startSeq;i MIN_WEIGHT && change[j] == 1){ + dist[indexOffset + j] = getDistToCentroid(centroids[j], i, lengths[i]); + } + + if(weight[j] > MIN_WEIGHT && dist[indexOffset + j] < offset){ + offset = dist[indexOffset + j]; + } + } + + for(int j=0;j MIN_WEIGHT){ + newTau[j] = exp(sigma * (-dist[indexOffset + j] + offset)) * weight[j]; + norms[i] += newTau[j]; + } + else{ + newTau[j] = 0.0; + } + } + + for(int j=0;j MIN_TAU){ + + int oldTotal = total; + + total++; + + singleTau.resize(total, 0); + seqNumber.resize(total, 0); + seqIndex.resize(total, 0); + + singleTau[oldTotal] = newTau[j]; + + aaP[j][nSeqsPerOTU[j]] = oldTotal; + aaI[j][nSeqsPerOTU[j]] = i; + nSeqsPerOTU[j]++; + } + } + } + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "calcNewDistancesParent"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::setOTUs(){ + + try { + vector bigTauMatrix(numOTUs * numSeqs, 0.0000); + + for(int i=0;i maxTau){ + maxTau = bigTauMatrix[i * numOTUs + j]; + maxOTU = j; + } + } + + otuData[i] = maxOTU; + } + + nSeqsPerOTU.assign(numOTUs, 0); + + for(int i=0;ierrorOut(e, "ShhherCommand", "calcNewDistances"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::writeQualities(vector otuCounts){ + + try { + string qualityFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.qual"; + + ofstream qualityFile; + m->openOutputFile(qualityFileName, qualityFile); + + qualityFile.setf(ios::fixed, ios::floatfield); + qualityFile.setf(ios::showpoint); + qualityFile << setprecision(6); + + vector > qualities(numOTUs); + vector pr(HOMOPS, 0); + + int index = 0; + + for(int i=0;i 0){ + qualities[i].assign(1024, -1); + + while(index < numFlowCells){ + double maxPrValue = 1e8; + short maxPrIndex = -1; + double count = 0.0000; + + pr.assign(HOMOPS, 0); + + for(int j=0;j MIN_COUNT){ + double U = 0.0000; + double norm = 0.0000; + + for(int s=0;s0.00){ + temp = log10(U); + } + else{ + temp = -10.1; + } + temp = floor(-10 * temp); + value = (int)floor(temp); + if(value > 100){ value = 100; } + + qualities[i][base] = (int)value; + base++; + } + } + + index++; + } + } + + + if(otuCounts[i] > 0){ + qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl; + + int j=4; //need to get past the first four bases + while(qualities[i][j] != -1){ + qualityFile << qualities[i][j] << ' '; + j++; + } + qualityFile << endl; + } + } + qualityFile.close(); + + } + catch(exception& e) { + m->errorOut(e, "ShhherCommand", "writeQualities"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::writeSequences(vector otuCounts){ + try { + + string bases = "TACG"; + + string fastaFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.fasta"; + ofstream fastaFile; + m->openOutputFile(fastaFileName, fastaFile); + + vector names(numOTUs, ""); + + for(int i=0;i 0){ + fastaFile << '>' << seqNameVector[aaI[i][0]] << endl; + + for(int j=8;jerrorOut(e, "ShhherCommand", "writeSequences"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::writeNames(vector otuCounts){ + try { + string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.final.names"; + ofstream nameFile; + m->openOutputFile(nameFileName, nameFile); + + for(int i=0;i 0){ + nameFile << seqNameVector[aaI[i][0]] << '\t' << seqNameVector[aaI[i][0]]; + + for(int j=1;jerrorOut(e, "ShhherCommand", "writeNames"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::writeGroups(){ + try { + string fileRoot = flowFileName.substr(0,flowFileName.find_last_of('.')); + string groupFileName = fileRoot + ".pn.groups"; + ofstream groupFile; + m->openOutputFile(groupFileName, groupFile); + + for(int i=0;ierrorOut(e, "ShhherCommand", "writeGroups"); + exit(1); + } +} + +/**************************************************************************************************/ + +void ShhherCommand::writeClusters(vector otuCounts){ + try { + string otuCountsFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".pn.counts"; + ofstream otuCountsFile; + m->openOutputFile(otuCountsFileName, otuCountsFile); + + string bases = "TACG"; + + for(int i=0;i 0){ + int index = centroids[i]; + + otuCountsFile << "ideal\t"; + for(int j=8;jerrorOut(e, "ShhherCommand", "writeClusters"); + exit(1); + } +} + +//********************************************************************************************************************** diff --git a/shhhercommand.h b/shhhercommand.h new file mode 100644 index 0000000..b26341e --- /dev/null +++ b/shhhercommand.h @@ -0,0 +1,113 @@ +#ifndef SHHHER_H +#define SHHHER_H + +/* + * shhher.h + * Mothur + * + * Created by Pat Schloss on 12/27/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "mothur.h" +#include "command.hpp" +#include "globaldata.hpp" + +class ShhherCommand : public Command { + +public: + ShhherCommand(string); + ShhherCommand(); + ~ShhherCommand(); + vector getRequiredParameters(); + vector getValidParameters(); + vector getRequiredFiles(); + map > getOutputFiles() { return outputTypes; } + int execute(); + void help(); + +private: + GlobalData* globaldata; + + int abort; + map > outputTypes; + + string outputDir, flowFileName, flowFilesFileName, lookupFileName; + int processors, maxIters; + float cutoff, sigma, minDelta; + + vector nSeqsBreaks; + vector nOTUsBreaks; + vector flowDataPrI; + vector flowDataIntI; + vector lengths; + vector seqNameVector; + vector singleLookUp; + vector jointLookUp; + map nameMap; + vector otuData; + vector cumNumSeqs; + vector nSeqsPerOTU; + vector > aaP; //tMaster->aanP: each row is a different otu / each col contains the sequence indices + vector seqNumber; //tMaster->anP: the sequence id number sorted by OTU + vector > aaI; //tMaster->aanI: that are in each otu - can't differentiate between aaP and aaI + vector seqIndex; //tMaster->anI; the index that corresponds to seqNumber + vector dist; //adDist - distance of sequences to centroids + vector change; //did the centroid sequence change? 0 = no; 1 = yes + vector centroids; //the representative flowgram for each cluster m + vector weight; + vector singleTau; //tMaster->adTau: 1-D Tau vector (1xnumSeqs) + vector uniqueFlowgrams; + vector uniqueCount; + vector uniqueLengths; + vector mapSeqToUnique; + vector mapUniqueToSeq; + + int numSeqs, numUniques, numOTUs, numFlowCells; + + void getSingleLookUp(); + void getJointLookUp(); + void getFlowData(); + void getUniques(); + float getProbIntensity(int); + float calcPairwiseDist(int, int); + void flowDistParentFork(string, int, int); + + string createDistFile(int); + string createNamesFile(); + string cluster(string, string); + + void getOTUData(string); + void initPyroCluster(); + void fill(); + void calcCentroids(); + void calcCentroidsDriver(int, int); + double getDistToCentroid(int, int, int); + double getNewWeights(); + double getLikelihood(); + void checkCentroids(); + void calcNewDistances(); + void calcNewDistancesParent(int, int); + void calcNewDistancesChild(int, int, vector&, vector&, vector&); + + + void setOTUs(); + void writeQualities(vector); + void writeSequences(vector); + void writeNames(vector); + void writeGroups(); + void writeClusters(vector); + + +#ifdef USE_MPI + string flowDistMPI(int, int); + void calcNewDistancesChildMPI(int, int, vector&); + + int pid, ncpus; +#endif + +}; + + +#endif \ No newline at end of file diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index a2c3a88..3b21d31 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -17,11 +17,11 @@ vector TrimFlowsCommand::getValidParameters(){ string Array[] = {"flow", "totalflows", "minflows", "fasta", "minlength", "maxlength", "maxhomop", "signal", "noise" "oligos", "pdiffs", "bdiffs", "tdiffs", - "allfiles", + "allfiles", "processors", + // "group", -// "processors", "outputdir","inputdir" }; @@ -112,10 +112,9 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { string AlignArray[] = {"flow", "totalflows", "minflows", "fasta", "minlength", "maxlength", "maxhomop", "signal", "noise", "oligos", "pdiffs", "bdiffs", "tdiffs", - "allfiles", + "allfiles", "processors", // "group", - // "processors", "outputdir","inputdir" }; @@ -136,8 +135,7 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { //initialize outputTypes vector tempOutNames; outputTypes["flow"] = tempOutNames; - // outputTypes["fasta"] = tempOutNames; - // outputTypes["group"] = tempOutNames; + outputTypes["fasta"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -152,13 +150,13 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { if (path == "") { parameters["flow"] = inputDir + it->second; } } -// it = parameters.find("oligos"); -// //user has given a template file -// if(it != parameters.end()){ -// path = m->hasPath(it->second); -// //if the user has not given a path then, add inputdir. else leave path alone. -// if (path == "") { parameters["oligos"] = inputDir + it->second; } -// } + it = parameters.find("oligos"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["oligos"] = inputDir + it->second; } + } // it = parameters.find("group"); @@ -223,21 +221,21 @@ TrimFlowsCommand::TrimFlowsCommand(string option) { temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found"){ temp = "0"; } convert(temp, pdiffs); - temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found"){ int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); } + temp = validParameter.validFile(parameters, "tdiffs", false); + if (temp == "not found"){ int tempTotal = pdiffs + bdiffs; temp = toString(tempTotal); } convert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs; } temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "T"; } allFiles = m->isTrue(temp); -// temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } -// convert(temp, processors); + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } + convert(temp, processors); - if(allFiles && oligoFileName == ""){ - m->mothurOut("You selected allfiles, but didn't enter an oligos file. Ignoring the allfiles request."); m->mothurOutEndLine(); - allFiles = 0; - } + if(oligoFileName == ""){ allFiles = 0; } + numFPrimers = 0; + numRPrimers = 0; } } @@ -265,9 +263,68 @@ int TrimFlowsCommand::execute(){ outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); } - driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName); + vector flowFilePos = getFlowFileBreaks(); + for (int i = 0; i < (flowFilePos.size()-1); i++) { + lines.push_back(new linePair(flowFilePos[i], flowFilePos[(i+1)])); + } + + vector > barcodePrimerComboFileNames; + if(oligoFileName != ""){ + getOligos(barcodePrimerComboFileNames); + } + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]); + }else{ + createProcessesCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames); + } +#else + driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]); +#endif + + if (m->control_pressed) { return 0; } + + if(allFiles){ + + ofstream output; + string flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files"; + m->openOutputFile(flowFilesFileName, output); + + for(int i=0;imothurOut("deleting: " + barcodePrimerComboFileNames[i][j] + '\n'); + remove(barcodePrimerComboFileNames[i][j].c_str()); + } + else{ +// m->mothurOut("saving: " + barcodePrimerComboFileNames[i][j] + '\n'); + output << barcodePrimerComboFileNames[i][j] << endl; + } + } + } + output.close(); + } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOutEndLine(); - return 0; + return 0; } catch(exception& e) { m->errorOut(e, "TrimSeqsCommand", "execute"); @@ -277,7 +334,7 @@ int TrimFlowsCommand::execute(){ //*************************************************************************************************************** -int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName){ +int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector > barcodePrimerComboFileNames, linePair* line){ try { @@ -288,41 +345,51 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN ofstream scrapFlowFile; m->openOutputFile(scrapFlowFileName, scrapFlowFile); scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint); - - ifstream flowFile; - m->openInputFile(flowFileName, flowFile); + + if(line->start == 4){ + scrapFlowFile << totalFlows << endl; + trimFlowFile << totalFlows << endl; + for(int i=0;iopenOutputFile(barcodePrimerComboFileNames[i][j], temp); + temp << totalFlows << endl; + temp.close(); + } + } + } + + FlowData flowData(numFlows, signal, noise, maxHomoP); ofstream fastaFile; if(fasta){ m->openOutputFile(fastaFileName, fastaFile); } - vector > barcodePrimerCombos; - if(oligoFileName != ""){ getOligos(barcodePrimerCombos); } - -// inFASTA.seekg(line->start); + ifstream flowFile; + m->openInputFile(flowFileName, flowFile); -// bool done = false; + flowFile.seekg(line->start); + int count = 0; + bool moreSeqs = 1; - while (flowFile) { + while(moreSeqs) { int success = 1; int currentSeqDiffs = 0; - string trashCode = ""; - - FlowData currFlow(flowFile, signal, noise, maxHomoP); - m->gobble(flowFile); - currFlow.capFlows(totalFlows); - Sequence currSeq = currFlow.getSequence(); + flowData.getNext(flowFile); + flowData.capFlows(totalFlows); - if(!currFlow.hasMinFlows(minFlows)){ //screen to see if sequence is of a minimum number of flows + Sequence currSeq = flowData.getSequence(); + if(!flowData.hasMinFlows(minFlows)){ //screen to see if sequence is of a minimum number of flows success = 0; trashCode += 'l'; } - if(minLength > 0 || maxLength > 0){ //screen to see if sequence is above and below a specific number of bases - int seqLength = currFlow.getSeqLength(); + if(minLength > 0 || maxLength > 0){ //screen to see if sequence is above and below a specific number of bases + int seqLength = currSeq.getNumBases(); if(seqLength < minLength || seqLength > maxLength){ success = 0; trashCode += 'l'; @@ -330,6 +397,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN } int primerIndex, barcodeIndex; + if(barcodes.size() != 0){ success = stripBarcode(currSeq, barcodeIndex); if(success > bdiffs) { trashCode += 'b'; } @@ -351,73 +419,48 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN if(trashCode.length() == 0){ - currFlow.printFlows(trimFlowFile); + flowData.printFlows(trimFlowFile); if(fasta){ - currFlow.printFASTA(fastaFile); + currSeq.printSequence(fastaFile); } if(barcodes.size() != 0 || primers.size() != 0){ - string fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + barcodePrimerCombos[barcodeIndex][primerIndex] + ".flow"; +// string fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + barcodePrimerCombos[barcodeIndex][primerIndex] + ".flow"; ofstream output; - m->openOutputFileAppend(fileName, output); - currFlow.printFlows(output); + m->openOutputFileAppend(barcodePrimerComboFileNames[barcodeIndex][primerIndex], output); + output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint); + + flowData.printFlows(output); output.close(); } } else{ - currFlow.printFlows(scrapFlowFile, trashCode); + flowData.printFlows(scrapFlowFile, trashCode); } -// if(barcodes.size() != 0){ -// string thisGroup = groupVector[groupBar]; -// int indexToFastaFile = groupBar; -// if (primers.size() != 0){ -// //does this primer have a group -// if (groupVector[groupPrime] != "") { -// thisGroup += "." + groupVector[groupPrime]; -// indexToFastaFile = combos[thisGroup]; -// } -// } -// outGroups << currSeq.getName() << '\t' << thisGroup << endl; -// if(allFiles){ -// ofstream outTemp; -// m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp); -// //currSeq.printSequence(*fastaFileNames[indexToFastaFile]); -// currSeq.printSequence(outTemp); -// outTemp.close(); -// -// if(qFileName != ""){ -// //currQual.printQScores(*qualFileNames[indexToFastaFile]); -// ofstream outTemp2; -// m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2); -// currQual.printQScores(outTemp2); -// outTemp2.close(); -// } -// } -// } -// } -// else{ -// currSeq.setName(currSeq.getName() + '|' + trashCode); -// currSeq.setUnaligned(origSeq); -// currSeq.setAligned(origSeq); -// currSeq.printSequence(scrapFASTA); -// } - count++; //report progress - if((count) % 1000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } + if((count) % 10000 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + unsigned long int pos = flowFile.tellg(); + + if ((pos == -1) || (pos >= line->end)) { break; } +#else + if (flowFile.eof()) { break; } +#endif } //report progress - if((count) % 1000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } - + if((count) % 10000 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); } trimFlowFile.close(); scrapFlowFile.close(); flowFile.close(); + if(fasta){ fastaFile.close(); } return count; } @@ -526,7 +569,7 @@ void TrimFlowsCommand::getOligos(vector >& outFlowFileNames){ outputNames.push_back(fileName); outputTypes["flow"].push_back(fileName); - outFlowFileNames[itBar->second][itPrimer->second] = comboGroupName; + outFlowFileNames[itBar->second][itPrimer->second] = fileName; ofstream temp; m->openOutputFile(fileName, temp); @@ -571,11 +614,9 @@ int TrimFlowsCommand::stripBarcode(Sequence& seq, int& group){ } //if you found the barcode or if you don't want to allow for diffs - // cout << success; if ((bdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it - // cout << endl; int maxLength = 0; @@ -687,11 +728,9 @@ int TrimFlowsCommand::stripForward(Sequence& seq, int& group){ } //if you found the barcode or if you don't want to allow for diffs - // cout << success; if ((pdiffs == 0) || (success == 0)) { return success; } else { //try aligning and see if you can find it - // cout << endl; int maxLength = 0; @@ -739,8 +778,6 @@ int TrimFlowsCommand::stripForward(Sequence& seq, int& group){ int newStart=0; int numDiff = countDiffs(oligo, temp); - // cout << oligo << '\t' << temp << '\t' << numDiff << endl; - if(numDiff < minDiff){ minDiff = numDiff; minCount = 1; @@ -887,17 +924,184 @@ int TrimFlowsCommand::countDiffs(string oligo, string seq){ } -//*************************************************************************************************************** - - - +/**************************************************************************************************/ +vector TrimFlowsCommand::getFlowFileBreaks() { + try{ + + vector filePos; + filePos.push_back(0); + + FILE * pFile; + unsigned long int size; + + //get num bytes in file + pFile = fopen (flowFileName.c_str(),"rb"); + if (pFile==NULL) perror ("Error opening file"); + else{ + fseek (pFile, 0, SEEK_END); + size=ftell (pFile); + fclose (pFile); + } + + //estimate file breaks + unsigned long int chunkSize = 0; + chunkSize = size / processors; + //file too small to divide by processors + if (chunkSize == 0) { processors = 1; filePos.push_back(size); return filePos; } + + //for each process seekg to closest file break and search for next '>' char. make that the filebreak + for (int i = 0; i < processors; i++) { + unsigned long int spot = (i+1) * chunkSize; + + ifstream in; + m->openInputFile(flowFileName, in); + in.seekg(spot); + + unsigned long int newSpot = spot; + string dummy = m->getline(in); + + + //there was not another sequence before the end of the file + unsigned long int sanityPos = in.tellg(); + +// if (sanityPos == -1) { break; } +// else { filePos.push_back(newSpot); } + if (sanityPos == -1) { break; } + else { filePos.push_back(sanityPos); } + + in.close(); + } + + //save end pos + filePos.push_back(size); + + //sanity check filePos + for (int i = 0; i < (filePos.size()-1); i++) { + if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; } + } + ifstream in; + m->openInputFile(flowFileName, in); + in >> numFlows; + m->gobble(in); + unsigned long int spot = in.tellg(); + filePos[0] = spot; + in.close(); + + processors = (filePos.size() - 1); + + return filePos; + } + catch(exception& e) { + m->errorOut(e, "TrimSeqsCommand", "getFlowFileBreaks"); + exit(1); + } +} +/**************************************************************************************************/ +int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector > barcodePrimerComboFileNames){ + try { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + int process = 1; + int exitCommand = 1; + processIDS.clear(); + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + + vector > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames; + for(int i=0;iopenOutputFile(tempBarcodePrimerComboFileNames[i][j], temp); + temp.close(); + + } + } + + driverCreateTrim(flowFileName, + (trimFlowFileName + toString(getpid()) + ".temp"), + (scrapFlowFileName + toString(getpid()) + ".temp"), + (fastaFileName + toString(getpid()) + ".temp"), + tempBarcodePrimerComboFileNames, lines[process]); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + //parent do my part + ofstream temp; + m->openOutputFile(trimFlowFileName, temp); + temp.close(); + m->openOutputFile(scrapFlowFileName, temp); + temp.close(); + + if(fasta){ + m->openOutputFile(fastaFileName, temp); + temp.close(); + } + + driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]); + //force parent to wait until all the processes are done + for (int i=0;imothurOutEndLine(); + for(int i=0;imothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine(); + + m->appendFiles((trimFlowFileName + toString(processIDS[i]) + ".temp"), trimFlowFileName); + remove((trimFlowFileName + toString(processIDS[i]) + ".temp").c_str()); +// m->mothurOut("\tDone with trim.flow file"); m->mothurOutEndLine(); + + m->appendFiles((scrapFlowFileName + toString(processIDS[i]) + ".temp"), scrapFlowFileName); + remove((scrapFlowFileName + toString(processIDS[i]) + ".temp").c_str()); +// m->mothurOut("\tDone with scrap.flow file"); m->mothurOutEndLine(); + + if(fasta){ + m->appendFiles((fastaFileName + toString(processIDS[i]) + ".temp"), fastaFileName); + remove((fastaFileName + toString(processIDS[i]) + ".temp").c_str()); +// m->mothurOut("\tDone with flow.fasta file"); m->mothurOutEndLine(); + } + if(allFiles){ + for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) { + for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) { + m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]); + remove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str()); + } + } + } + } + + return exitCommand; +#endif + } + catch(exception& e) { + m->errorOut(e, "TrimFlowsCommand", "createProcessesCreateTrim"); + exit(1); + } +} +//*************************************************************************************************************** diff --git a/trimflowscommand.h b/trimflowscommand.h index 215b1a2..15c698c 100644 --- a/trimflowscommand.h +++ b/trimflowscommand.h @@ -31,8 +31,6 @@ public: private: bool abort; -// GroupMap* groupMap; - struct linePair { unsigned long int start; unsigned long int end; @@ -41,11 +39,16 @@ private: int comboStarts; vector processIDS; //processid vector lines; - vector qLines; + + vector getFlowFileBreaks(); + int createProcessesCreateTrim(string, string, string, string, vector >); + int driverCreateTrim(string, string, string, string, vector >, linePair*); + + map > outputTypes; vector outputNames; set filesToRemove; - + void getOligos(vector >&); //a rewrite of what is in trimseqscommand.h @@ -54,12 +57,12 @@ private: bool stripReverse(Sequence&); //largely redundant with trimseqscommand.h bool compareDNASeq(string, string); //largely redundant with trimseqscommand.h int countDiffs(string, string); //largely redundant with trimseqscommand.h - bool allFiles; -// int processors; + int processors; int numFPrimers, numRPrimers; int totalFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs; + int numFlows; float signal, noise; bool fasta; @@ -77,12 +80,6 @@ private: map combos; //needed here? map groupToIndex; //needed here? - - int driverCreateTrim(string, string, string, string); - -// int createProcessesCreateTrim(string, string, string, string, string, string, string, vector, vector){}; - int setLines(string, string, vector&, vector&){}; - }; diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index b40b7e5..2000204 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -883,6 +883,7 @@ int TrimSeqsCommand::setLines(string filename, string qfilename, vector& outFASTAVec, vector& outQualVec){