From 5d176c0f8049654ec3ae5a869c9ee3cecb991dc6 Mon Sep 17 00:00:00 2001 From: westcott Date: Thu, 4 Mar 2010 14:50:33 +0000 Subject: [PATCH] changes to filter.seqs --- Mothur.xcodeproj/project.pbxproj | 3 + filterseqscommand.cpp | 218 ++++++++++++++++++++++++------- filterseqscommand.h | 18 ++- mothur.h | 2 - 4 files changed, 190 insertions(+), 51 deletions(-) diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index f5a6700..89ddb28 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -1431,6 +1431,8 @@ GCC_WARN_UNUSED_PARAMETER = NO; GCC_WARN_UNUSED_VALUE = YES; GCC_WARN_UNUSED_VARIABLE = YES; + LD_MAP_FILE_PATH = "$(TARGET_TEMP_DIR)/$(PRODUCT_NAME)-LinkMap-$(CURRENT_VARIANT)-$(CURRENT_ARCH).txt"; + LINK_WITH_STANDARD_LIBRARIES = YES; OTHER_CPLUSPLUSFLAGS = ( "-ansi", "-pedantic", @@ -1438,6 +1440,7 @@ "-lreadline", "-DUSE_READLINE", ); + OTHER_LDFLAGS = ""; PREBINDING = NO; SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.5.sdk"; SYMROOT = /Users/johnwestcott/Desktop; diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index 2fd1721..0e33ab2 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -22,7 +22,7 @@ FilterSeqsCommand::FilterSeqsCommand(string option) { else { //valid paramters for this command - string Array[] = {"fasta", "trump", "soft", "hard", "vertical", "outputdir","inputdir"}; + string Array[] = {"fasta", "trump", "soft", "hard", "vertical", "outputdir","inputdir", "processors"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -108,6 +108,9 @@ FilterSeqsCommand::FilterSeqsCommand(string option) { temp = validParameter.validFile(parameters, "soft", false); if (temp == "not found") { soft = 0; } else { soft = (float)atoi(temp.c_str()) / 100.0; } + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = "1"; } + convert(temp, processors); + hard = validParameter.validFile(parameters, "hard", true); if (hard == "not found") { hard = ""; } else if (hard == "not open") { abort = true; } @@ -115,13 +118,6 @@ FilterSeqsCommand::FilterSeqsCommand(string option) { numSeqs = 0; - if (abort == false) { - - if (soft != 0) { F.setSoft(soft); } - if (trump != '*') { F.setTrump(trump); } - - } - } } @@ -170,44 +166,10 @@ int FilterSeqsCommand::execute() { alignmentLength = testSeq.getAlignLength(); inFASTA.close(); - F.setLength(alignmentLength); - - if(soft != 0 || isTrue(vertical)){ - F.initialize(); - } - - if(hard.compare("") != 0) { F.doHard(hard); } - else { F.setFilter(string(alignmentLength, '1')); } - - if(trump != '*' || isTrue(vertical) || soft != 0){ - for (int i = 0; i < fastafileNames.size(); i++) { - ifstream in; - openInputFile(fastafileNames[i], in); - - while(!in.eof()){ //read through and create the filter... - - if (m->control_pressed) { in.close(); return 0; } - - Sequence seq(in); - if (seq.getName() != "") { - if(trump != '*'){ F.doTrump(seq); } - if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); } - numSeqs++; - cout.flush(); - } - } - in.close(); - } - - } - F.setNumSeqs(numSeqs); + ////////////create filter///////////////// + filter = createFilter(); - if(isTrue(vertical) == 1) { F.doVertical(); } - if(soft != 0) { F.doSoft(); } - - filter = F.getFilter(); - ofstream outFilter; string filterFile = outputDir + filterFileName + ".filter"; @@ -216,6 +178,9 @@ int FilterSeqsCommand::execute() { outFilter.close(); outputNames.push_back(filterFile); + + ////////////run filter///////////////// + numSeqs = 0; for (int i = 0; i < fastafileNames.size(); i++) { ifstream in; @@ -277,5 +242,170 @@ int FilterSeqsCommand::execute() { exit(1); } } +/**************************************************************************************/ +string FilterSeqsCommand::createFilter() { + try { + string filterString = ""; + + Filters F; + + if (soft != 0) { F.setSoft(soft); } + if (trump != '*') { F.setTrump(trump); } + + F.setLength(alignmentLength); + + if(soft != 0 || isTrue(vertical)){ + F.initialize(); + } + + if(hard.compare("") != 0) { F.doHard(hard); } + else { F.setFilter(string(alignmentLength, '1')); } + + numSeqs = 0; + + if(trump != '*' || isTrue(vertical) || soft != 0){ + for (int s = 0; s < fastafileNames.size(); s++) { + + for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + ifstream inFASTA; + openInputFile(fastafileNames[s], inFASTA); + int numFastaSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + inFASTA.close(); + + numSeqs += numFastaSeqs; + + lines.push_back(new linePair(0, numFastaSeqs)); + + driverCreateFilter(F, fastafileNames[s], lines[0]); + }else{ + vector positions; + + ifstream inFASTA; + openInputFile(fastafileNames[s], inFASTA); + + string input; + while(!inFASTA.eof()){ + input = getline(inFASTA); + if (input.length() != 0) { + if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); } + } + } + inFASTA.close(); + + int numFastaSeqs = positions.size(); + + numSeqs += numFastaSeqs; + + int numSeqsPerProcessor = numFastaSeqs / processors; + + for (int i = 0; i < processors; i++) { + long int startPos = positions[ i * numSeqsPerProcessor ]; + if(i == processors - 1){ + numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; + } + lines.push_back(new linePair(startPos, numSeqsPerProcessor)); + } + + createProcessesCreateFilter(F, fastafileNames[s]); + } + #else + ifstream inFASTA; + openInputFile(fastafileNames[s], inFASTA); + int numFastaSeqs=count(istreambuf_iterator(inFASTA),istreambuf_iterator(), '>'); + inFASTA.close(); + + numSeqs += numFastaSeqs; + + lines.push_back(new linePair(0, numFastaSeqs)); + + driverCreateFilter(F, lines[0], fastafileNames[s]); + #endif + + + } + } + + F.setNumSeqs(numSeqs); + + if(isTrue(vertical) == 1) { F.doVertical(); } + if(soft != 0) { F.doSoft(); } + + filterString = F.getFilter(); + + return filterString; + } + catch(exception& e) { + m->errorOut(e, "FilterSeqsCommand", "createFilter"); + exit(1); + } +} +/**************************************************************************************/ +int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* line) { + try { + + ifstream in; + openInputFile(filename, in); + + in.seekg(line->start); + + for(int i=0;inumSeqs;i++){ + + if (m->control_pressed) { in.close(); return 1; } + + Sequence seq(in); + if (seq.getName() != "") { + if(trump != '*'){ F.doTrump(seq); } + if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); } + cout.flush(); + } + } + + in.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "FilterSeqsCommand", "driverCreateFilter"); + exit(1); + } +} +/**************************************************************************************************/ +int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) { + try { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + int process = 0; + int exitCommand = 1; + vector processIDS; + + //loop through and create all the processes you want + while (process != processors) { + int pid = vfork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + driverCreateFilter(F, filename, lines[process]); + exit(0); + }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + } + + //force parent to wait until all the processes are done + for (int i=0;ierrorOut(e, "FilterSeqsCommand", "createProcessesCreateFilter"); + exit(1); + } +} /**************************************************************************************/ diff --git a/filterseqscommand.h b/filterseqscommand.h index e8af5ec..232b0ac 100644 --- a/filterseqscommand.h +++ b/filterseqscommand.h @@ -12,6 +12,7 @@ #include "command.hpp" #include "filters.h" +#include "mpi.h" class Sequence; class FilterSeqsCommand : public Command { @@ -23,19 +24,26 @@ public: void help(); private: + struct linePair { + int start; + int numSeqs; + linePair(long int i, int j) : start(i), numSeqs(j) {} + }; + vector lines; + + string vertical, filter, fasta, hard, outputDir, filterFileName; vector fastafileNames; - int alignmentLength; + int alignmentLength, processors; char trump; bool abort; float soft; int numSeqs; - Filters F; - - vector a, t, g, c, gap; - + string createFilter(); + int createProcessesCreateFilter(Filters&, string); + int driverCreateFilter(Filters&, string, linePair*); }; #endif diff --git a/mothur.h b/mothur.h index 26fd775..16a9844 100644 --- a/mothur.h +++ b/mothur.h @@ -58,8 +58,6 @@ #include #endif - //#include - //#include #else #include //allows unbuffered screen capture from stdin #include //get cwd -- 2.39.2