From 64eee3a595ae53817f52807d7393b22e74e31f56 Mon Sep 17 00:00:00 2001 From: pschloss Date: Sat, 6 Jun 2009 23:55:02 +0000 Subject: [PATCH] added trim.seqs command --- Mothur.xcodeproj/project.pbxproj | 6 + commandfactory.cpp | 2 + fastamap.cpp | 95 +++---- globaldata.cpp | 418 ++++++++++++++++--------------- globaldata.hpp | 9 +- reversecommand.cpp | 8 +- screenseqscommand.cpp | 8 +- seqsummarycommand.cpp | 8 +- sequence.cpp | 11 +- trimseqscommand.cpp | 248 ++++++++++++++++++ trimseqscommand.h | 44 ++++ validcommands.cpp | 1 + validparameter.cpp | 3 + 13 files changed, 580 insertions(+), 281 deletions(-) create mode 100644 trimseqscommand.cpp create mode 100644 trimseqscommand.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index bed134c..6ea76d7 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -143,6 +143,7 @@ 37E5F3E30F29FD4200F8D827 /* treenode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37E5F3E20F29FD4200F8D827 /* treenode.cpp */; }; 37E5F4920F2A3DA800F8D827 /* readtreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37E5F4910F2A3DA800F8D827 /* readtreecommand.cpp */; }; 7E09C5140FDA79C5002ECAE5 /* reversecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */; }; + 7E09C5360FDA7F65002ECAE5 /* trimseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E09C5350FDA7F65002ECAE5 /* trimseqscommand.cpp */; }; 7E412F490F8D21B600381DD0 /* slibshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E412F480F8D21B600381DD0 /* slibshuff.cpp */; }; 7E412FEA0F8D3E2C00381DD0 /* libshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E412FE90F8D3E2C00381DD0 /* libshuff.cpp */; }; 7E4130F80F8E58FA00381DD0 /* dlibshuff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E4130F60F8E58FA00381DD0 /* dlibshuff.cpp */; }; @@ -464,6 +465,8 @@ 37E5F4910F2A3DA800F8D827 /* readtreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readtreecommand.cpp; sourceTree = SOURCE_ROOT; }; 7E09C5120FDA79C5002ECAE5 /* reversecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reversecommand.h; sourceTree = ""; }; 7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = reversecommand.cpp; sourceTree = ""; }; + 7E09C5340FDA7F65002ECAE5 /* trimseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trimseqscommand.h; sourceTree = ""; }; + 7E09C5350FDA7F65002ECAE5 /* trimseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trimseqscommand.cpp; sourceTree = ""; }; 7E412F420F8D213C00381DD0 /* libshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = libshuff.h; sourceTree = SOURCE_ROOT; }; 7E412F470F8D21B600381DD0 /* slibshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slibshuff.h; sourceTree = SOURCE_ROOT; }; 7E412F480F8D21B600381DD0 /* slibshuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = slibshuff.cpp; sourceTree = SOURCE_ROOT; }; @@ -735,6 +738,8 @@ 37D928A90F2133E5001D4494 /* commands */ = { isa = PBXGroup; children = ( + 7E09C5340FDA7F65002ECAE5 /* trimseqscommand.h */, + 7E09C5350FDA7F65002ECAE5 /* trimseqscommand.cpp */, 7E09C5120FDA79C5002ECAE5 /* reversecommand.h */, 7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */, 37D927CD0F21331F001D4494 /* command.hpp */, @@ -1097,6 +1102,7 @@ 3799A9510FD6A58C00E33EDE /* seqsummarycommand.cpp in Sources */, 371B30B40FD7EE67000414CA /* screenseqscommand.cpp in Sources */, 7E09C5140FDA79C5002ECAE5 /* reversecommand.cpp in Sources */, + 7E09C5360FDA7F65002ECAE5 /* trimseqscommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index a559c81..8056227 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -48,6 +48,7 @@ #include "seqsummarycommand.h" #include "screenseqscommand.h" #include "reversecommand.h" +#include "trimseqscommand.h" /***********************************************************/ @@ -106,6 +107,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "summary.seqs") { command = new SeqSummaryCommand(); } else if(commandName == "screen.seqs") { command = new ScreenSeqsCommand(); } else if(commandName == "reverse.seqs") { command = new ReverseSeqsCommand(); } + else if(commandName == "trim.seqs") { command = new TrimSeqsCommand(); } else { command = new NoCommand(); } return command; diff --git a/fastamap.cpp b/fastamap.cpp index 1cc9abf..a0652d9 100644 --- a/fastamap.cpp +++ b/fastamap.cpp @@ -8,27 +8,33 @@ */ #include "fastamap.h" +#include "sequence.hpp" /*******************************************************************************/ + void FastaMap::readFastaFile(ifstream& in) { try { string name, sequence, line; sequence = ""; - int c; +// int c; string temp; //read through file - while ((c = in.get()) != EOF) { - name = ""; sequence = ""; - //is this a name - if (c == '>') { - name = readName(in); - sequence = readSequence(in); - }else { cout << "Error fasta in your file. Please correct." << endl; } +// while ((c = in.get()) != EOF) { +// name = ""; sequence = ""; +// //is this a name +// if (c == '>') { +// name = readName(in); +// sequence = readSequence(in); +// }else { cout << "Error fasta in your file. Please correct." << endl; } //store info in map //input sequence info into map + while(!in.eof()){ + Sequence currSeq(in); + name = currSeq.getName(); + sequence = currSeq.getUnaligned(); seqmap[name] = sequence; it = data.find(sequence); if (it == data.end()) { //it's unique. @@ -53,77 +59,27 @@ void FastaMap::readFastaFile(ifstream& in) { exit(1); } } -/*******************************************************************************/ -string FastaMap::readName(ifstream& in) { - try{ - string name = ""; - int c; - string temp; - - while ((c = in.get()) != EOF) { - //if c is not a line return - if (c != 10) { - name += c; - }else { break; } - } - - return name; - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readName. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the FastaMap class function readName. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} /*******************************************************************************/ -string FastaMap::readSequence(ifstream& in) { - try{ - string sequence = ""; - string line; - int pos, c; - - while (!in.eof()) { - //save position in file in case next line is a new name. - pos = in.tellg(); - line = ""; - in >> line; - //if you are at a new name - if (line[0] == '>') { - //put file pointer back since you are now at a new name - in.seekg(pos, ios::beg); - c = in.get(); //because you put it back to a newline char - break; - }else { sequence += line; } - } - - return sequence; - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the FastaMap class Function readSequence. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the FastaMap class function readSequence. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } -} -/*******************************************************************************/ + string FastaMap::getGroupName(string seq) { //pass a sequence name get its group return data[seq].groupname; } + /*******************************************************************************/ + string FastaMap::getNames(string seq) { //pass a sequence get the string of names in the group separated by ','s. return data[seq].names; } + /*******************************************************************************/ + int FastaMap::getGroupNumber(string seq) { //pass a sequence get the number of identical sequences. return data[seq].groupnumber; } + /*******************************************************************************/ + string FastaMap::getSequence(string name) { it2 = seqmap.find(name); if (it2 == seqmap.end()) { //it's not found @@ -132,7 +88,9 @@ string FastaMap::getSequence(string name) { return it2->second; } } + /*******************************************************************************/ + void FastaMap::push_back(string name, string seq) { it = data.find(seq); if (it == data.end()) { //it's unique. @@ -146,11 +104,15 @@ void FastaMap::push_back(string name, string seq) { seqmap[name] = seq; } + /*******************************************************************************/ + int FastaMap::sizeUnique(){ //returns datas size which is the number of unique sequences return data.size(); } + /*******************************************************************************/ + void FastaMap::printNamesFile(ostream& out){ //prints data try { // two column file created with groupname and them list of identical sequence names @@ -167,7 +129,9 @@ void FastaMap::printNamesFile(ostream& out){ //prints data exit(1); } } + /*******************************************************************************/ + void FastaMap::printCondensedFasta(ostream& out){ //prints data try { //creates a fasta file @@ -185,5 +149,6 @@ void FastaMap::printCondensedFasta(ostream& out){ //prints data exit(1); } } + /*******************************************************************************/ diff --git a/globaldata.cpp b/globaldata.cpp index 6138e2c..bf28cc6 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -6,7 +6,7 @@ /******************************************************/ GlobalData* GlobalData::getInstance() { - if( _uniqueInstance == 0 ) { + if( _uniqueInstance == 0) { _uniqueInstance = new GlobalData(); } return _uniqueInstance; @@ -23,7 +23,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ reset(); //clears out data from previous read - if ((commandName == "read.dist") || (commandName == "read.otu") || (commandName == "read.tree")) { + if((commandName == "read.dist") || (commandName == "read.otu") || (commandName == "read.tree")) { clear(); gGroupmap = NULL; gListVector = NULL; @@ -35,17 +35,17 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ } //saves help request - if (commandName =="help") { + if(commandName =="help") { helpRequest = optionText; } - if (commandName == "libshuff") { + if(commandName == "libshuff") { iters = "10000"; cutoff = "1.0"; } //set default value for cutoff - if (commandName == "dist.seqs") { cutoff = "1.0"; } + if(commandName == "dist.seqs") { cutoff = "1.0"; } string key, value; //reads in parameters and values @@ -54,72 +54,77 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ splitAtComma(value, optionText); splitAtEquals(key, value); - if (key == "phylip" ) { phylipfile = value; inputFileName = value; fileroot = value; format = "phylip"; } - if (key == "column" ) { columnfile = value; inputFileName = value; fileroot = value; format = "column"; } - if (key == "list" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; } - if (key == "rabund" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } - if (key == "sabund" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } - if (key == "fasta" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } - if (key == "tree" ) { treefile = value; inputFileName = value; fileroot = value; format = "tree"; } - if (key == "shared" ) { sharedfile = value; inputFileName = value; fileroot = value; format = "sharedfile"; } - if (key == "name" ) { namefile = value; } - if (key == "order" ) { orderfile = value; } - if (key == "group" ) { groupfile = value; } - if (key == "cutoff" ) { cutoff = value; } - if (key == "precision" ) { precision = value; } - if (key == "iters" ) { iters = value; } - if (key == "jumble" ) { jumble = value; } - if (key == "freq" ) { freq = value; } - if (key == "method" ) { method = value; } - if (key == "fileroot" ) { fileroot = value; } - if (key == "abund" ) { abund = value; } - if (key == "random" ) { randomtree = value; } - if (key == "calc") { calc = value; } - if (key == "step") { step = value; } - if (key == "form") { form = value; } - if (key == "sorted") { sorted = value; } - if (key == "vertical") { vertical = value; } - if (key == "trump") { trump = value; } - if (key == "hard") { hard = value; } - if (key == "soft") { soft = value; } - if (key == "scale") { scale = value; } - if (key == "countends" ) { countends = value; } - if (key == "processors" ) { processors = value; } - if (key == "size" ) { size = value; } - if (key == "candidate") { candidatefile = value; } - if (key == "search") { search = value; } - if (key == "ksize") { ksize = value; } - if (key == "align") { align = value; } - if (key == "match") { match = value; } - if (key == "mismatch") { mismatch = value; } - if (key == "gapopen") { gapopen = value; } - if (key == "gapextend" ) { gapextend = value; } - if (key == "start" ) { startPos = value; } - if (key == "end" ) { endPos = value; } - if (key == "maxambig" ) { maxAmbig = value; } - if (key == "maxhomop" ) { maxHomoPolymer = value; } - if (key == "minlength" ) { minLength = value; } - if (key == "maxlength" ) { maxLength = value; } - - if (key == "line") {//stores lines to be used in a vector + if(key == "phylip") { phylipfile = value; inputFileName = value; fileroot = value; format = "phylip"; } + if(key == "column") { columnfile = value; inputFileName = value; fileroot = value; format = "column"; } + if(key == "list") { listfile = value; inputFileName = value; fileroot = value; format = "list"; } + if(key == "rabund") { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } + if(key == "sabund") { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } + if(key == "fasta") { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } + if(key == "tree") { treefile = value; inputFileName = value; fileroot = value; format = "tree"; } + if(key == "shared") { sharedfile = value; inputFileName = value; fileroot = value; format = "sharedfile"; } + if(key == "name") { namefile = value; } + if(key == "order") { orderfile = value; } + if(key == "group") { groupfile = value; } + if(key == "cutoff") { cutoff = value; } + if(key == "precision") { precision = value; } + if(key == "iters") { iters = value; } + if(key == "jumble") { jumble = value; } + if(key == "freq") { freq = value; } + if(key == "method") { method = value; } + if(key == "fileroot") { fileroot = value; } + if(key == "abund") { abund = value; } + if(key == "random") { randomtree = value; } + if(key == "calc") { calc = value; } + if(key == "step") { step = value; } + if(key == "form") { form = value; } + if(key == "sorted") { sorted = value; } + if(key == "vertical") { vertical = value; } + if(key == "trump") { trump = value; } + if(key == "hard") { hard = value; } + if(key == "soft") { soft = value; } + if(key == "scale") { scale = value; } + if(key == "countends") { countends = value; } + if(key == "processors") { processors = value; } + if(key == "size") { size = value; } + if(key == "candidate") { candidatefile = value; } + if(key == "search") { search = value; } + if(key == "ksize") { ksize = value; } + if(key == "align") { align = value; } + if(key == "match") { match = value; } + if(key == "mismatch") { mismatch = value; } + if(key == "gapopen") { gapopen = value; } + if(key == "gapextend") { gapextend = value; } + if(key == "start") { startPos = value; } + if(key == "end") { endPos = value; } + if(key == "maxambig") { maxAmbig = value; } + if(key == "maxhomop") { maxHomoPolymer = value; } + if(key == "minlength") { minLength = value; } + if(key == "maxlength") { maxLength = value; } + if(key == "flip" ) { flip = value; } + if(key == "oligos" ) { oligoFile = value; } + if(key == "forward" ) { forMismatch = value; } + if(key == "reverse" ) { revMismatch = value; } + if(key == "barcode" ) { barMismatch = value; } + + if(key == "line") {//stores lines to be used in a vector lines.clear(); labels.clear(); line = value; label = ""; - if (line != "all") { splitAtDash(value, lines); allLines = 0; } + if(line != "all") { splitAtDash(value, lines); allLines = 0; } else { allLines = 1; } } - if (key == "label") {//stores lines to be used in a vector + if(key == "label") {//stores lines to be used in a vector labels.clear(); lines.clear(); label = value; line = ""; - if (label != "all") { splitAtDash(value, labels); allLines = 0; } + if(label != "all") { splitAtDash(value, labels); allLines = 0; } else { allLines = 1; } } - if (key == "groups") {//stores groups to be used in a vector + if(key == "groups") {//stores groups to be used in a vector Groups.clear(); groups = value; splitAtDash(value, Groups); @@ -127,76 +132,81 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ } - //saves the last parameter + //saves the last parameter ==> this seems silly... value = optionText; splitAtEquals(key, value); - if (key == "phylip" ) { phylipfile = value; inputFileName = value; fileroot = value; format = "phylip"; } - if (key == "column" ) { columnfile = value; inputFileName = value; fileroot = value; format = "column"; } - if (key == "list" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; } - if (key == "rabund" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } - if (key == "sabund" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } - if (key == "fasta" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } - if (key == "tree" ) { treefile = value; inputFileName = value; fileroot = value; format = "tree"; } - if (key == "shared" ) { sharedfile = value; inputFileName = value; fileroot = value; format = "sharedfile"; } - if (key == "name" ) { namefile = value; } - if (key == "order" ) { orderfile = value; } - if (key == "group" ) { groupfile = value; } - if (key == "cutoff" ) { cutoff = value; } - if (key == "precision" ) { precision = value; } - if (key == "iters" ) { iters = value; } - if (key == "jumble" ) { jumble = value; } - if (key == "freq" ) { freq = value; } - if (key == "method" ) { method = value; } - if (key == "fileroot" ) { fileroot = value; } - if (key == "abund" ) { abund = value; } - if (key == "random" ) { randomtree = value; } - if (key == "calc") { calc = value; } - if (key == "step") { step = value; } - if (key == "form") { form = value; } - if (key == "sorted") { sorted = value; } - if (key == "vertical") { vertical = value; } - if (key == "trump") { trump = value; } - if (key == "hard") { hard = value; } - if (key == "soft") { soft = value; } - if (key == "scale") { scale = value; } - if (key == "countends" ) { countends = value; } - if (key == "processors" ) { processors = value; } - if (key == "size" ) { size = value; } - if (key == "candidate") { candidatefile = value; } - if (key == "search") { search = value; } - if (key == "ksize") { ksize = value; } - if (key == "align") { align = value; } - if (key == "match") { match = value; } - if (key == "mismatch") { mismatch = value; } - if (key == "gapopen") { gapopen = value; } - if (key == "gapextend" ) { gapextend = value; } - if (key == "start" ) { startPos = value; } - if (key == "end" ) { endPos = value; } - if (key == "maxambig" ) { maxAmbig = value; } - if (key == "maxhomop" ) { maxHomoPolymer = value; } - if (key == "minlength" ) { minLength = value; } - if (key == "maxlength" ) { maxLength = value; } - + if(key == "phylip") { phylipfile = value; inputFileName = value; fileroot = value; format = "phylip"; } + if(key == "column") { columnfile = value; inputFileName = value; fileroot = value; format = "column"; } + if(key == "list") { listfile = value; inputFileName = value; fileroot = value; format = "list"; } + if(key == "rabund") { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } + if(key == "sabund") { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } + if(key == "fasta") { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } + if(key == "tree") { treefile = value; inputFileName = value; fileroot = value; format = "tree"; } + if(key == "shared") { sharedfile = value; inputFileName = value; fileroot = value; format = "sharedfile"; } + if(key == "name") { namefile = value; } + if(key == "order") { orderfile = value; } + if(key == "group") { groupfile = value; } + if(key == "cutoff") { cutoff = value; } + if(key == "precision") { precision = value; } + if(key == "iters") { iters = value; } + if(key == "jumble") { jumble = value; } + if(key == "freq") { freq = value; } + if(key == "method") { method = value; } + if(key == "fileroot") { fileroot = value; } + if(key == "abund") { abund = value; } + if(key == "random") { randomtree = value; } + if(key == "calc") { calc = value; } + if(key == "step") { step = value; } + if(key == "form") { form = value; } + if(key == "sorted") { sorted = value; } + if(key == "vertical") { vertical = value; } + if(key == "trump") { trump = value; } + if(key == "hard") { hard = value; } + if(key == "soft") { soft = value; } + if(key == "scale") { scale = value; } + if(key == "countends") { countends = value; } + if(key == "processors") { processors = value; } + if(key == "size") { size = value; } + if(key == "candidate") { candidatefile = value; } + if(key == "search") { search = value; } + if(key == "ksize") { ksize = value; } + if(key == "align") { align = value; } + if(key == "match") { match = value; } + if(key == "mismatch") { mismatch = value; } + if(key == "gapopen") { gapopen = value; } + if(key == "gapextend") { gapextend = value; } + if(key == "start") { startPos = value; } + if(key == "end") { endPos = value; } + if(key == "maxambig") { maxAmbig = value; } + if(key == "maxhomop") { maxHomoPolymer = value; } + if(key == "minlength") { minLength = value; } + if(key == "maxlength") { maxLength = value; } + if(key == "flip" ) { flip = value; } + if(key == "oligos" ) { oligoFile = value; } + if(key == "forward" ) { forMismatch = value; } + if(key == "reverse" ) { revMismatch = value; } + if(key == "barcode" ) { barMismatch = value; } + - if (key == "line") {//stores lines to be used in a vector + if(key == "line") {//stores lines to be used in a vector lines.clear(); labels.clear(); line = value; label = ""; - if (line != "all") { splitAtDash(value, lines); allLines = 0; } + if(line != "all") { splitAtDash(value, lines); allLines = 0; } else { allLines = 1; } } - if (key == "label") {//stores lines to be used in a vector + if(key == "label") {//stores lines to be used in a vector labels.clear(); lines.clear(); label = value; line = ""; - if (label != "all") { splitAtDash(value, labels); allLines = 0; } + if(label != "all") { splitAtDash(value, labels); allLines = 0; } else { allLines = 1; } } - if (key == "groups") {//stores groups to be used in a vector + if(key == "groups") {//stores groups to be used in a vector Groups.clear(); groups = value; splitAtDash(value, Groups); @@ -204,57 +214,57 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ } //set format for shared - if ((listfile != "") && (groupfile != "")) { format = "shared"; } - if ((phylipfile != "") && (groupfile != "")) { format = "matrix"; } + if((listfile != "") && (groupfile != "")) { format = "shared"; } + if((phylipfile != "") && (groupfile != "")) { format = "matrix"; } //input defaults for calculators - if (commandName == "collect.single") { + if(commandName == "collect.single") { - if ((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } + if((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } Estimators.clear(); splitAtDash(calc, Estimators); } - if (commandName == "rarefaction.single") { - if ((calc == "default") || (calc == "")) { calc = "sobs"; } + if(commandName == "rarefaction.single") { + if((calc == "default") || (calc == "")) { calc = "sobs"; } Estimators.clear(); splitAtDash(calc, Estimators); } - if (commandName == "collect.shared") { - if ((calc == "default") || (calc == "")) { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } + if(commandName == "collect.shared") { + if((calc == "default") || (calc == "")) { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } Estimators.clear(); splitAtDash(calc, Estimators); } - if (commandName == "summary.single") { - if ((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } + if(commandName == "summary.single") { + if((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } Estimators.clear(); splitAtDash(calc, Estimators); } - if (commandName == "summary.shared") { - if ((calc == "default") || (calc == "")) { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } + if(commandName == "summary.shared") { + if((calc == "default") || (calc == "")) { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } Estimators.clear(); splitAtDash(calc, Estimators); } - if (commandName == "rarefaction.shared") { - if ((calc == "default") || (calc == "")) { calc = "sharedobserved"; } + if(commandName == "rarefaction.shared") { + if((calc == "default") || (calc == "")) { calc = "sharedobserved"; } Estimators.clear(); splitAtDash(calc, Estimators); } - if (commandName == "dist.seqs") { - if ((calc == "default") || (calc == "")) { calc = "onegap"; } - if (countends == "") { countends = "T"; } + if(commandName == "dist.seqs") { + if((calc == "default") || (calc == "")) { calc = "onegap"; } + if(countends == "") { countends = "T"; } Estimators.clear(); splitAtDash(calc, Estimators); } - if (commandName == "venn") { - if ((calc == "default") || (calc == "")) { - if (format == "list") { calc = "sobs"; } + if(commandName == "venn") { + if((calc == "default") || (calc == "")) { + if(format == "list") { calc = "sobs"; } else { calc = "sharedsobs"; } } Estimators.clear(); splitAtDash(calc, Estimators); } - if ((commandName == "tree.shared") || (commandName == "bootstrap.shared") || (commandName == "dist.shared")) { - if ((calc == "default") || (calc == "")) { + if((commandName == "tree.shared") || (commandName == "bootstrap.shared") || (commandName == "dist.shared")) { + if((calc == "default") || (calc == "")) { calc = "jclass-thetayc"; } Estimators.clear(); @@ -268,11 +278,11 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ } - //if you have done a read.otu with a groupfile but don't want to use it anymore because you want to do single commands - if ((commandName == "collect.single") || (commandName == "rarefaction.single") || (commandName == "summary.single")) { - if (listfile != "") { format = "list"; } - else if (sabundfile != "") { format = "sabund"; } - else if (rabundfile != "") { format = "rabund"; } + //ifyou have done a read.otu with a groupfile but don't want to use it anymore because you want to do single commands + if((commandName == "collect.single") || (commandName == "rarefaction.single") || (commandName == "summary.single")) { + if(listfile != "") { format = "list"; } + else if(sabundfile != "") { format = "sabund"; } + else if(rabundfile != "") { format = "rabund"; } } } catch(exception& e) { @@ -288,53 +298,58 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ /******************************************************/ // These functions give you the option parameters of the commands -string GlobalData::getPhylipFile() { return phylipfile; } -string GlobalData::getColumnFile() { return columnfile; } -string GlobalData::getListFile() { return listfile; } -string GlobalData::getRabundFile() { return rabundfile; } -string GlobalData::getSabundFile() { return sabundfile; } -string GlobalData::getNameFile() { return namefile; } -string GlobalData::getGroupFile() { return groupfile; } -string GlobalData::getOrderFile() { return orderfile; } -string GlobalData::getTreeFile() { return treefile; } -string GlobalData::getSharedFile() { return sharedfile; } -string GlobalData::getFastaFile() { return fastafile; } -string GlobalData::getCutOff() { return cutoff; } -string GlobalData::getFormat() { return format; } -string GlobalData::getPrecision() { return precision; } -string GlobalData::getMethod() { return method; } -string GlobalData::getFileRoot() { return fileroot; } -string GlobalData::getIters() { return iters; } -string GlobalData::getJumble() { return jumble; } -string GlobalData::getFreq() { return freq; } -string GlobalData::getAbund() { return abund; } -string GlobalData::getRandomTree() { return randomtree; } -string GlobalData::getGroups() { return groups; } -string GlobalData::getStep() { return step; } -string GlobalData::getForm() { return form; } -string GlobalData::getSorted() { return sorted; } -string GlobalData::getVertical() { return vertical; } -string GlobalData::getTrump() { return trump; } -string GlobalData::getSoft() { return soft; } -string GlobalData::getHard() { return hard; } -string GlobalData::getScale() { return scale; } -string GlobalData::getCountEnds() { return countends; } -string GlobalData::getProcessors() { return processors; } -string GlobalData::getSize() { return size; } -string GlobalData::getCandidateFile() { return candidatefile;} -string GlobalData::getSearch() { return search; } -string GlobalData::getKSize() { return ksize; } -string GlobalData::getAlign() { return align; } -string GlobalData::getMatch() { return match; } -string GlobalData::getMismatch() { return mismatch; } -string GlobalData::getGapopen() { return gapopen; } -string GlobalData::getGapextend() { return gapextend; } -string GlobalData::getStartPos() { return startPos; } -string GlobalData::getEndPos() { return endPos; } -string GlobalData::getMaxAmbig() { return maxAmbig; } +string GlobalData::getPhylipFile() { return phylipfile; } +string GlobalData::getColumnFile() { return columnfile; } +string GlobalData::getListFile() { return listfile; } +string GlobalData::getRabundFile() { return rabundfile; } +string GlobalData::getSabundFile() { return sabundfile; } +string GlobalData::getNameFile() { return namefile; } +string GlobalData::getGroupFile() { return groupfile; } +string GlobalData::getOrderFile() { return orderfile; } +string GlobalData::getTreeFile() { return treefile; } +string GlobalData::getSharedFile() { return sharedfile; } +string GlobalData::getFastaFile() { return fastafile; } +string GlobalData::getCutOff() { return cutoff; } +string GlobalData::getFormat() { return format; } +string GlobalData::getPrecision() { return precision; } +string GlobalData::getMethod() { return method; } +string GlobalData::getFileRoot() { return fileroot; } +string GlobalData::getIters() { return iters; } +string GlobalData::getJumble() { return jumble; } +string GlobalData::getFreq() { return freq; } +string GlobalData::getAbund() { return abund; } +string GlobalData::getRandomTree() { return randomtree; } +string GlobalData::getGroups() { return groups; } +string GlobalData::getStep() { return step; } +string GlobalData::getForm() { return form; } +string GlobalData::getSorted() { return sorted; } +string GlobalData::getVertical() { return vertical; } +string GlobalData::getTrump() { return trump; } +string GlobalData::getSoft() { return soft; } +string GlobalData::getHard() { return hard; } +string GlobalData::getScale() { return scale; } +string GlobalData::getCountEnds() { return countends; } +string GlobalData::getProcessors() { return processors; } +string GlobalData::getSize() { return size; } +string GlobalData::getCandidateFile() { return candidatefile; } +string GlobalData::getSearch() { return search; } +string GlobalData::getKSize() { return ksize; } +string GlobalData::getAlign() { return align; } +string GlobalData::getMatch() { return match; } +string GlobalData::getMismatch() { return mismatch; } +string GlobalData::getGapopen() { return gapopen; } +string GlobalData::getGapextend() { return gapextend; } +string GlobalData::getStartPos() { return startPos; } +string GlobalData::getEndPos() { return endPos; } +string GlobalData::getMaxAmbig() { return maxAmbig; } string GlobalData::getMaxHomoPolymer() { return maxHomoPolymer; } -string GlobalData::getMinLength() { return minLength; } -string GlobalData::getMaxLength() { return maxLength; } +string GlobalData::getMinLength() { return minLength; } +string GlobalData::getMaxLength() { return maxLength; } +string GlobalData::getFlip() { return flip; } +string GlobalData::getOligosFile() { return oligoFile; } +string GlobalData::getForwardMismatch() { return forMismatch; } +string GlobalData::getReverseMismatch() { return revMismatch; } +string GlobalData::getBarcodeMismatch() { return barMismatch; } void GlobalData::setListFile(string file) { listfile = file; inputFileName = file; } @@ -417,7 +432,11 @@ void GlobalData::clear() { maxHomoPolymer = "-1"; minLength = "-1"; maxLength = "-1"; - + flip = "0"; + forMismatch = "0"; + revMismatch = "0"; + barMismatch = "0"; + oligoFile = ""; } //*******************************************************/ @@ -459,6 +478,11 @@ void GlobalData::reset() { maxHomoPolymer = "-1"; minLength = "-1"; maxLength = "-1"; + flip = "0"; + forMismatch = "0"; + revMismatch = "0"; + barMismatch = "0"; + oligoFile = ""; } /*******************************************************/ @@ -484,8 +508,8 @@ void GlobalData::parseTreeFile() { int c, comment; comment = 0; - //if you are not a nexus file - if ((c = filehandle.peek()) != '#') { + //ifyou are not a nexus file + if((c = filehandle.peek()) != '#') { while((c = filehandle.peek()) != ';') { while ((c = filehandle.peek()) != ';') { // get past comments @@ -501,8 +525,8 @@ void GlobalData::parseTreeFile() { readTreeString(filehandle); } - //if you are a nexus file - }else if ((c = filehandle.peek()) == '#') { + //ifyou are a nexus file + }else if((c = filehandle.peek()) == '#') { string holder = ""; // get past comments @@ -515,12 +539,12 @@ void GlobalData::parseTreeFile() { } filehandle >> holder; - //if there is no translate then you must read tree string otherwise use translate to get names + //ifthere is no translate then you must read tree string otherwise use translate to get names if(holder == "tree" && comment != 1){ //pass over the "tree rep.6878900 = " - while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;} + while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;} - if (c == EOF ) { break; } + if(c == EOF) { break; } filehandle.putback(c); //put back first ( of tree. readTreeString(filehandle); break; @@ -528,7 +552,7 @@ void GlobalData::parseTreeFile() { } //use nexus translation rather than parsing tree to save time - if ((holder == "translate") || (holder == "Translate")) { + if((holder == "translate") || (holder == "Translate")) { string number, name, h; h = ""; // so it enters the loop the first time @@ -541,7 +565,7 @@ void GlobalData::parseTreeFile() { name.erase(name.end()-1); //erase the comma Treenames.push_back(number); } - if (number == ";") { Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name + if(number == ";") { Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name } } @@ -564,8 +588,8 @@ void GlobalData::readTreeString(ifstream& filehandle) { string name; //k while((c = filehandle.peek()) != ';') { - //if you are a name - if ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space + //ifyou are a name + if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space name = ""; c = filehandle.get(); // k = c; @@ -584,7 +608,7 @@ void GlobalData::readTreeString(ifstream& filehandle) { //cout << " after putback" << k << endl; } - if (c == ':') { //read until you reach the end of the branch length + if(c == ':') { //read until you reach the end of the branch length while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) { c = filehandle.get(); // k = c; @@ -593,7 +617,7 @@ void GlobalData::readTreeString(ifstream& filehandle) { filehandle.putback(c); } c = filehandle.get(); - if (c == ';') { break; } + if(c == ';') { break; } // k = c; //cout << k << endl; diff --git a/globaldata.hpp b/globaldata.hpp index af5b383..2a57310 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -91,7 +91,12 @@ public: string getMaxHomoPolymer(); string getMinLength(); string getMaxLength(); - + string getFlip(); + string getForwardMismatch(); + string getReverseMismatch(); + string getBarcodeMismatch(); + string getOligosFile(); + void setListFile(string); void setGroupFile(string file); void setPhylipFile(string); @@ -121,7 +126,7 @@ public: private: - string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, line, label, randomtree, groups, cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form, sorted, trump, soft, hard, scale, countends, processors, candidatefile, search, ksize, align, match, size, mismatch, gapopen, gapextend, minLength, maxLength, startPos, endPos, maxAmbig, maxHomoPolymer; + string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, line, label, randomtree, groups, cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form, sorted, trump, soft, hard, scale, countends, processors, candidatefile, search, ksize, align, match, size, mismatch, gapopen, gapextend, minLength, maxLength, startPos, endPos, maxAmbig, maxHomoPolymer, flip, forMismatch, revMismatch, barMismatch, oligoFile; static GlobalData* _uniqueInstance; diff --git a/reversecommand.cpp b/reversecommand.cpp index 9be2915..15f63f6 100644 --- a/reversecommand.cpp +++ b/reversecommand.cpp @@ -19,11 +19,11 @@ ReverseSeqsCommand::ReverseSeqsCommand(){ if(globaldata->getFastaFile() == "") { cout << "you need to at least enter a fasta file name" << endl; } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the ReverseSeqsCommand class Function ReverseSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the ReverseSeqsCommand class function ReverseSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } @@ -57,11 +57,11 @@ int ReverseSeqsCommand::execute(){ } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the ReverseSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the ReverseSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp index 3ad8479..ed71559 100644 --- a/screenseqscommand.cpp +++ b/screenseqscommand.cpp @@ -18,11 +18,11 @@ ScreenSeqsCommand::ScreenSeqsCommand(){ if(globaldata->getFastaFile() == "") { cout << "you must provide a fasta formatted file" << endl; } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the ScreenSeqsCommand class function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } @@ -83,11 +83,11 @@ int ScreenSeqsCommand::execute(){ return 0; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the ScreenSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp index 2f24da5..3787cef 100644 --- a/seqsummarycommand.cpp +++ b/seqsummarycommand.cpp @@ -18,11 +18,11 @@ SeqSummaryCommand::SeqSummaryCommand(){ if(globaldata->getFastaFile() == "") { cout << "you need to at least enter a fasta file name" << endl; } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the SeqCoordCommand class Function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function SeqSummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the SeqCoordCommand class function SeqCoordCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the SeqSummaryCommand class function SeqSummaryCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } @@ -97,11 +97,11 @@ int SeqSummaryCommand::execute(){ return 0; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the SeqSummaryCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the SeqSummaryCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } diff --git a/sequence.cpp b/sequence.cpp index e997cc2..6a15eb5 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -30,12 +30,14 @@ Sequence::Sequence(string newName, string sequence) { //******************************************************************************************************************** Sequence::Sequence(ifstream& fastaFile){ + initialize(); + fastaFile >> name; + name = name.substr(1); + char c; + + while ((c = fastaFile.get()) != EOF) { if (c == 10){ break; } } // get rest of line if there's any crap there - string accession; // provided a file handle to a fasta-formatted sequence file, read in the next - fastaFile >> accession; // accession number and sequence we find... - setName(accession); - char letter; string sequence; @@ -50,7 +52,6 @@ Sequence::Sequence(ifstream& fastaFile){ if(letter == 'U'){letter = 'T';} sequence += letter; } - } if(sequence.find_first_of('-') != string::npos){ // if there are any gaps in the sequence, assume that it is diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp new file mode 100644 index 0000000..40b3b4f --- /dev/null +++ b/trimseqscommand.cpp @@ -0,0 +1,248 @@ +/* + * trimseqscommand.cpp + * Mothur + * + * Created by Pat Schloss on 6/6/09. + * Copyright 2009 Patrick D. Schloss. All rights reserved. + * + */ + +#include "sequence.hpp" +#include "trimseqscommand.h" + +//*************************************************************************************************************** + +TrimSeqsCommand::TrimSeqsCommand(){ + try { + + oligos = 0; + forwardPrimerMismatch = 0; + reversePrimerMismatch = 0; + barcodeMismatch = 0; + + totalBarcodeCount = 0; + matchBarcodeCount = 0; + + globaldata = GlobalData::getInstance(); + if(globaldata->getFastaFile() == ""){ + cout << "you need to at least enter a fasta file name" << endl; + } + + if(isTrue(globaldata->getFlip())) { flip = 1; } + + if(globaldata->getOligosFile() != ""){ + oligos = 1; + forwardPrimerMismatch = atoi(globaldata->getForwardMismatch().c_str()); + reversePrimerMismatch = atoi(globaldata->getReverseMismatch().c_str()); + barcodeMismatch = atoi(globaldata->getBarcodeMismatch().c_str()); + } + + if(!flip && !oligos) { cout << "what was the point?" << endl; } + + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the TrimSeqsCommand class function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//*************************************************************************************************************** + +TrimSeqsCommand::~TrimSeqsCommand(){ /* do nothing */ } + +//*************************************************************************************************************** + +int TrimSeqsCommand::execute(){ + try{ + getOligos(); + + ifstream inFASTA; + openInputFile(globaldata->getFastaFile(), inFASTA); + + ofstream outFASTA; + string trimSeqFile = getRootName(globaldata->getFastaFile()) + "trim.fasta"; + openOutputFile(trimSeqFile, outFASTA); + + ofstream outGroups; + string groupFile = getRootName(globaldata->getFastaFile()) + "groups"; + openOutputFile(groupFile, outGroups); + + ofstream scrapFASTA; + string scrapSeqFile = getRootName(globaldata->getFastaFile()) + "scrap.fasta"; + openOutputFile(scrapSeqFile, scrapFASTA); + + bool success; + + while(!inFASTA.eof()){ + Sequence currSeq(inFASTA); + string group; + string trashCode = ""; + + if(barcodes.size() != 0){ + success = stripBarcode(currSeq, group); + if(!success){ trashCode += 'b'; } + } + if(numFPrimers != 0){ + success = stripForward(currSeq); + if(!success){ trashCode += 'f'; } + } + if(numRPrimers != 0){ + success = stripReverse(currSeq); + if(!success){ trashCode += 'r'; } + } + if(flip){ currSeq.reverseComplement(); } // should go last + + if(trashCode.length() == 0){ + currSeq.printSequence(outFASTA); + outGroups << currSeq.getName() << '\t' << group << endl; + } + else{ + currSeq.setName(currSeq.getName() + '|' + trashCode); + currSeq.printSequence(scrapFASTA); + } + + gobble(inFASTA); + } + inFASTA.close(); + outFASTA.close(); + scrapFASTA.close(); + outGroups.close(); + + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the TrimSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//*************************************************************************************************************** + +void TrimSeqsCommand::getOligos(){ + + ifstream inOligos; + openInputFile(globaldata->getOligosFile(), inOligos); + + string type, oligo, group; + + while(!inOligos.eof()){ + inOligos >> type; + + if(type == "forward"){ + inOligos >> oligo; + forPrimer.push_back(oligo); + } + else if(type == "reverse"){ + inOligos >> oligo; + revPrimer.push_back(oligo); + } + else if(type == "barcode"){ + inOligos >> oligo >> group; + barcodes[oligo]=group; + } + else if(type[0] == '#'){ + char c; + while ((c = inOligos.get()) != EOF) { if (c == 10){ break; } } // get rest of line + } + + gobble(inOligos); + } + + numFPrimers = forPrimer.size(); + numRPrimers = revPrimer.size(); +} + +//*************************************************************************************************************** + +bool TrimSeqsCommand::stripBarcode(Sequence& seq, string& group){ + + string rawSequence = seq.getUnaligned(); + bool success = 0; //guilty until proven innocent + + for(map::iterator it=barcodes.begin();it!=barcodes.end();it++){ + string oligo = it->first; + + if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length + success = 0; + break; + } + + if (rawSequence.compare(0,oligo.length(),oligo) == 0){ + group = it->second; + seq.setUnaligned(rawSequence.substr(oligo.length())); + matchBarcodeCount++; + success = 1; + break; + } + } + totalBarcodeCount++; + return success; + +} + +//*************************************************************************************************************** + +bool TrimSeqsCommand::stripForward(Sequence& seq){ + + string rawSequence = seq.getUnaligned(); + bool success = 0; //guilty until proven innocent + + for(int i=0;i forPrimer, revPrimer; + map barcodes; + +}; + +#endif diff --git a/validcommands.cpp b/validcommands.cpp index a12bacc..936e706 100644 --- a/validcommands.cpp +++ b/validcommands.cpp @@ -49,6 +49,7 @@ ValidCommands::ValidCommands() { commands["summary.seqs"] = "summary.seqs"; commands["screen.seqs"] = "screen.seqs"; commands["reverse.seqs"] = "reverse.seqs"; + commands["trim.seqs"] = "trim.seqs"; commands["quit"] = "quit"; diff --git a/validparameter.cpp b/validparameter.cpp index 05cdb1a..bd16984 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -285,6 +285,9 @@ void ValidParameters::initCommandParameters() { string reverseseqsArray[] = {"fasta"}; commandParameters["reverse.seqs"] = addParameters(reverseseqsArray, sizeof(reverseseqsArray)/sizeof(string)); + string trimseqsArray[] = {"fasta", "flip", "oligos", "forward", "reverse", "barcode"}; + commandParameters["trim.seqs"] = addParameters(trimseqsArray, sizeof(trimseqsArray)/sizeof(string)); + string vennArray[] = {"groups","line","label","calc"}; commandParameters["venn"] = addParameters(vennArray, sizeof(vennArray)/sizeof(string)); -- 2.39.2