X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=filterseqscommand.cpp;h=68450fb15a7b140803aab2d54d8c5bc84e7ce688;hb=7e354c9abb09ea3cf5b500a16cc7f6dd79ccb6f5;hp=aff959ab9359ef0d199ac785d8c43a942ea59255;hpb=c196b6b4768ccb84955d773ff0f22e4994d1ba7b;p=mothur.git diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index aff959a..68450fb 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -3,158 +3,190 @@ * Mothur * * Created by Thomas Ryabin on 5/4/09. - * Copyright 2009 __MyCompanyName__. All rights reserved. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. * */ #include "filterseqscommand.h" -#include -#include +#include "sequence.hpp" /**************************************************************************************/ -void FilterSeqsCommand::doTrump() { - //trump = globaldata->getTrump(); -// -// for(int i = 0; i < db->size(); i++) { -// Sequence cur = db->get(i); -// string curAligned = cur.getAligned(); -// -// for(int j = 0; j < curAligned.length-1; j++) { -// string curChar = curAligned.substr(j, j+1); -// -// if(curChar.compare(trump) == 0) -// columnsToRemove[j] = true; -// } -// } + +FilterSeqsCommand::FilterSeqsCommand(string option){ + try { + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"fasta", "trump", "soft", "hard", "vertical"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //check for required parameters + fastafile = validParameter.validFile(parameters, "fasta", true); + if (fastafile == "not found") { mothurOut("fasta is a required parameter for the filter.seqs command."); mothurOutEndLine(); abort = true; } + else if (fastafile == "not open") { abort = true; } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + + string temp; + temp = validParameter.validFile(parameters, "trump", false); if (temp == "not found") { temp = "*"; } + trump = temp[0]; + + temp = validParameter.validFile(parameters, "soft", false); if (temp == "not found") { soft = 0; } + else { soft = (float)atoi(temp.c_str()) / 100.0; } + + hard = validParameter.validFile(parameters, "hard", true); if (hard == "not found") { hard = ""; } + else if (hard == "not open") { abort = true; } + + vertical = validParameter.validFile(parameters, "vertical", false); if (vertical == "not found") { vertical = "T"; } + + numSeqs = 0; + + if (abort == false) { + + if (soft != 0) { F.setSoft(soft); } + if (trump != '*') { F.setTrump(trump); } + + } + + } + + } + catch(exception& e) { + errorOut(e, "FilterSeqsCommand", "FilterSeqsCommand"); + exit(1); + } } -/**************************************************************************************/ -void FilterSeqsCommand::doSoft() { - //soft = atoi(globaldata->getSoft().c_str()); -// vector > columnSymbolSums; -// vector > columnSymbols; -// for(int i = 0; i < db->get(0).getLength(); i++) { -// vector symbols; -// vector sums; -// columnSymbols[i] = symbols; -// columnSymbolSums[i] = sums; -// } -// -// for(int i = 0; i < db->size(); i++) { -// Sequence cur = db->get(i); -// string curAligned = cur.getAligned(); -// -// for(int j = 0; j < curAligned.length-1; j++) { -// string curChar = curAligned.substr(j, j+1); -// vector curColumnSymbols = columnSymbols[j]; -// -// bool newSymbol = true; -// -// for(int k = 0; j < curColumnSymbols.size(); j++) -// if(curChar.compare(curColumnSymbols[k]) == 0) { -// newSymbol = false; -// columnSymbolSums[j][k]++; -// } -// -// if(newSymbol) { -// columnSymbols.push_back(curChar); -// columnSymbolSums[j].push_back(1); -// } -// } -// } -// -// for(int i = 0; i < columnSymbolSums.size(); i++) { -// int totalSum = 0; -// int max = 0; -// vector curColumn = columnSymbolSums[i]; -// -// for(int j = 0; j < curColumn.size(); j++) { -// int curSum = curColumn[j]; -// if(curSum > max) -// max = curSum; -// totalSum += curSum; -// } -// -// if((double)max/(double)totalSum * 100 < soft) -// columnsToRemove[i] = true; -// } +//********************************************************************************************************************** + +void FilterSeqsCommand::help(){ + try { + mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n"); + mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n"); + mothurOut("The fasta parameter is required.\n"); + mothurOut("The trump parameter .... The default is ...\n"); + mothurOut("The soft parameter .... The default is ....\n"); + mothurOut("The hard parameter .... The default is ....\n"); + mothurOut("The vertical parameter .... The default is T.\n"); + mothurOut("The filter.seqs command should be in the following format: \n"); + mothurOut("filter.seqs(fasta=yourFastaFile, trump=yourTrump, soft=yourSoft, hard=yourHard, vertical=yourVertical) \n"); + mothurOut("Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T).\n"); + mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n"); + + } + catch(exception& e) { + errorOut(e, "FilterSeqsCommand", "help"); + exit(1); + } } -void FilterSeqsCommand::doFilter() {} + /**************************************************************************************/ + int FilterSeqsCommand::execute() { try { - globaldata = GlobalData::getInstance(); - filename = globaldata->inputFileName; + + if (abort == true) { return 0; } - if(globaldata->getFastaFile().compare("") != 0) { - readFasta = new ReadFasta(filename); - readFasta->read(); - db = readFasta->getDB(); - } + ifstream inFASTA; + openInputFile(fastafile, inFASTA); - else if(globaldata->getNexusFile().compare("") != 0) { - readNexus = new ReadNexus(filename); - readNexus->read(); - db = readNexus->getDB(); - } + Sequence testSeq(inFASTA); + alignmentLength = testSeq.getAlignLength(); + inFASTA.seekg(0); + + F.setLength(alignmentLength); - else if(globaldata->getClustalFile().compare("") != 0) { - readClustal = new ReadClustal(filename); - readClustal->read(); - db = readClustal->getDB(); + if(soft != 0 || isTrue(vertical)){ + F.initialize(); } + + if(hard.compare("") != 0) { F.doHard(hard); } + else { F.setFilter(string(alignmentLength, '1')); } - else if(globaldata->getPhylipFile().compare("") != 0) { - readPhylip = new ReadPhylip(filename); - readPhylip->read(); - db = readPhylip->getDB(); - } - - for(int i = 0; i < db->get(0).getLength(); i++) - columnsToRemove[i] = false; - - // Trump - if(globaldata->getTrump().compare("") != 0) { + if(trump != '*' || isTrue(vertical) || soft != 0){ + while(!inFASTA.eof()){ //read through and create the filter... + Sequence seq(inFASTA); + if(trump != '*'){ F.doTrump(seq); } + if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); } + numSeqs++; + cout.flush(); + } - } + inFASTA.close(); + F.setNumSeqs(numSeqs); - // Soft - if(globaldata->getSoft().compare("") != 0) {} + + if(isTrue(vertical) == 1) { F.doVertical(); } + if(soft != 0) { F.doSoft(); } + + filter = F.getFilter(); + ofstream outFilter; + string filterFile = getRootName(fastafile) + "filter"; + openOutputFile(filterFile, outFilter); + outFilter << filter << endl; + outFilter.close(); - + ifstream inFasta2; + openInputFile(fastafile, inFasta2); + string filteredFasta = getRootName(fastafile) + "filter.fasta"; + ofstream outFASTA; + openOutputFile(filteredFasta, outFASTA); + + numSeqs = 0; + while(!inFasta2.eof()){ + Sequence seq(inFasta2); + string align = seq.getAligned(); + string filterSeq = ""; + + for(int j=0;j' << seq.getName() << endl << filterSeq << endl; + numSeqs++; + gobble(inFasta2); + } + outFASTA.close(); + inFasta2.close(); - // Filter - //if(globaldata->getFilter().compare("") != 0) { -// -// filter = globaldata->getFilter(); -// ifstream filehandle; -// openInputFile(filter, filehandle); -// -// char c; -// int count = 0; -// while(!filehandle.eof()) { -// c = filehandle.get(); -// if(c == '0') -// columnsToRemove[count] = true; -// count++; -// } -// } + int filteredLength = 0; + for(int i=0;i