From 7e354c9abb09ea3cf5b500a16cc7f6dd79ccb6f5 Mon Sep 17 00:00:00 2001 From: westcott Date: Thu, 2 Jul 2009 15:11:04 +0000 Subject: [PATCH] made changes to concensus to find a better tree. also fixed minor bug in venn with 4 groups --- Mothur.xcodeproj/project.pbxproj | 8 ++ chimeraseqscommand.cpp | 230 +++++++++++++++++++++++++++++++ chimeraseqscommand.h | 57 ++++++++ consensuscommand.cpp | 165 +++++++++++++++++----- consensuscommand.h | 6 +- filters.h | 102 ++++++++++++++ filterseqscommand.cpp | 95 +++---------- filterseqscommand.h | 8 +- venn.cpp | 63 ++++++--- 9 files changed, 600 insertions(+), 134 deletions(-) create mode 100644 chimeraseqscommand.cpp create mode 100644 chimeraseqscommand.h create mode 100644 filters.h diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 79629fc..b8a1057 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -70,6 +70,7 @@ 3792948A0F2E258500B9034A /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379294890F2E258500B9034A /* parsimony.cpp */; }; 3799A9500FD6A58C00E33EDE /* distancedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3799A94C0FD6A58C00E33EDE /* distancedb.cpp */; }; 3799A9510FD6A58C00E33EDE /* seqsummarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3799A94E0FD6A58C00E33EDE /* seqsummarycommand.cpp */; }; + 379D3D510FF90E090068C1C0 /* chimeraseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379D3D500FF90E090068C1C0 /* chimeraseqscommand.cpp */; }; 37AD4CE40F28AEA300AA2D49 /* sharedlistvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4CE30F28AEA300AA2D49 /* sharedlistvector.cpp */; }; 37AD4DBB0F28E2FE00AA2D49 /* tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4DBA0F28E2FE00AA2D49 /* tree.cpp */; }; 37AD4DCA0F28F3DD00AA2D49 /* readtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */; }; @@ -304,6 +305,9 @@ 3799A94D0FD6A58C00E33EDE /* distancedb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = distancedb.hpp; sourceTree = SOURCE_ROOT; }; 3799A94E0FD6A58C00E33EDE /* seqsummarycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = seqsummarycommand.cpp; sourceTree = SOURCE_ROOT; }; 3799A94F0FD6A58C00E33EDE /* seqsummarycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqsummarycommand.h; sourceTree = SOURCE_ROOT; }; + 379D3D4F0FF90E090068C1C0 /* chimeraseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraseqscommand.h; sourceTree = ""; }; + 379D3D500FF90E090068C1C0 /* chimeraseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraseqscommand.cpp; sourceTree = ""; }; + 379D3D6B0FF917F40068C1C0 /* filters.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filters.h; sourceTree = ""; }; 37AD4CE20F28AEA300AA2D49 /* sharedlistvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedlistvector.h; sourceTree = SOURCE_ROOT; }; 37AD4CE30F28AEA300AA2D49 /* sharedlistvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedlistvector.cpp; sourceTree = SOURCE_ROOT; }; 37AD4DB90F28E2FE00AA2D49 /* tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tree.h; sourceTree = SOURCE_ROOT; }; @@ -650,6 +654,7 @@ 37C753F40FB34C0300DBD02E /* eachgapignore.h */, 373C69960FC1E63600137ACD /* efron.cpp */, 373C69970FC1E63600137ACD /* efron.h */, + 379D3D6B0FF917F40068C1C0 /* filters.h */, EB9303F70F53517300E8EF26 /* geom.h */, EB9303F80F53517300E8EF26 /* geom.cpp */, 378C1AF10FB0644D004D63F5 /* goodscoverage.h */, @@ -755,6 +760,8 @@ 37D927CB0F21331F001D4494 /* collectsharedcommand.cpp */, 377326640FAF16E0007ABB8B /* consensuscommand.h */, 377326630FAF16E0007ABB8B /* consensuscommand.cpp */, + 379D3D4F0FF90E090068C1C0 /* chimeraseqscommand.h */, + 379D3D500FF90E090068C1C0 /* chimeraseqscommand.cpp */, 37B28F660F27590100808A62 /* deconvolutecommand.h */, 37B28F670F27590100808A62 /* deconvolutecommand.cpp */, 37C753CC0FB3415200DBD02E /* distancecommand.h */, @@ -1103,6 +1110,7 @@ 378598740FDD4C1500EF9D03 /* heatmapsim.cpp in Sources */, 378599100FDD7E8E00EF9D03 /* optionparser.cpp in Sources */, 7E5A17AF0FE57292003C6A03 /* mergefilecommand.cpp in Sources */, + 379D3D510FF90E090068C1C0 /* chimeraseqscommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp new file mode 100644 index 0000000..b810b9f --- /dev/null +++ b/chimeraseqscommand.cpp @@ -0,0 +1,230 @@ +/* + * chimeraseqscommand.cpp + * Mothur + * + * Created by Sarah Westcott on 6/29/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include "chimeraseqscommand.h" + +//*************************************************************************************************************** + +ChimeraSeqsCommand::ChimeraSeqsCommand(string option){ + try { + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"fasta", "filter", "correction", "processors", "method" }; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + + //check to make sure all parameters are valid for command + for (map::iterator it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //check for required parameters + fastafile = validParameter.validFile(parameters, "fasta", true); + if (fastafile == "not open") { abort = true; } + else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true; } + + string temp; + temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; } + filter = isTrue(temp); + + temp = validParameter.validFile(parameters, "correction", false); if (temp == "not found") { temp = "T"; } + correction = isTrue(temp); + + temp = validParameter.validFile(parameters, "processors", true); if (temp == "not found") { temp = "1"; } + convert(temp, processors); + + method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "bellerophon"; } + + } + } + catch(exception& e) { + errorOut(e, "ChimeraSeqsCommand", "ChimeraSeqsCommand"); + exit(1); + } +} +//********************************************************************************************************************** + +void ChimeraSeqsCommand::help(){ + try { + mothurOut("The chimera.seqs command reads a fastafile and creates a sorted priority score list of potentially chimeric sequences (ideally, the sequences should already be aligned).\n"); + mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors and method. fasta is required.\n"); + mothurOut("The filter parameter allows you to specify if you would like to apply a 50% soft filter. The default is false. \n"); + mothurOut("The correction parameter allows you to ..... The default is true. \n"); + mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"); + mothurOut("The method parameter allows you to specify the method for finding chimeric sequences. The default is bellerophon. \n"); + mothurOut("The chimera.seqs command should be in the following format: \n"); + mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n"); + mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, processors=2, method=yourMethod) \n"); + mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n"); + } + catch(exception& e) { + errorOut(e, "ChimeraSeqsCommand", "help"); + exit(1); + } +} + +//*************************************************************************************************************** + +ChimeraSeqsCommand::~ChimeraSeqsCommand(){ /* do nothing */ } + +//*************************************************************************************************************** + +int ChimeraSeqsCommand::execute(){ + try{ + + if (abort == true) { return 0; } + + //do soft filter + if (filter) { + string optionString = "fasta=" + fastafile + ", soft=50.0, vertical=F"; + filterSeqs = new FilterSeqsCommand(optionString); + filterSeqs->execute(); + delete filterSeqs; + + //reset fastafile to filtered file + fastafile = getRootName(fastafile) + "filter.fasta"; + } + + //read in sequences + readSeqs(); + + //int numSeqs = seqs.size(); + + //find average midpoint of seqs + midpoint = findAverageMidPoint(); + + //this should be parallelized + //generatePreferences(); + + + //output results to screen + mothurOutEndLine(); + mothurOut("\t\t"); mothurOutEndLine(); + //mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); mothurOutEndLine(); + //mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); mothurOutEndLine(); + //mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); mothurOutEndLine(); + //mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); mothurOutEndLine(); + //mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); mothurOutEndLine(); + //mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); mothurOutEndLine(); + //mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); mothurOutEndLine(); + //mothurOut("# of Seqs:\t" + toString(numSeqs)); mothurOutEndLine(); + + //outSummary.close(); + return 0; + } + catch(exception& e) { + errorOut(e, "ChimeraSeqsCommand", "execute"); + exit(1); + } +} + +//*************************************************************************************************************** +void ChimeraSeqsCommand::readSeqs(){ + try { + ifstream inFASTA; + openInputFile(fastafile, inFASTA); + + //read in seqs and store in vector + while(!inFASTA.eof()){ + Sequence current(inFASTA); + + seqs.push_back(current); + + gobble(inFASTA); + } + inFASTA.close(); + + } + catch(exception& e) { + errorOut(e, "ChimeraSeqsCommand", "readSeqs"); + exit(1); + } +} + + +//*************************************************************************************************************** +int ChimeraSeqsCommand::findAverageMidPoint(){ + try { + int totalMids = 0; + int averageMid = 0; + + //loop through the seqs and find midpoint + for (int i = 0; i < seqs.size(); i++) { + + //get unaligned sequence + seqs[i].setUnaligned(seqs[i].getUnaligned()); //if you read an aligned file the unaligned is really aligned, so we need to make sure its unaligned + + string unaligned = seqs[i].getUnaligned(); + string aligned = seqs[i].getAligned(); + + //find midpoint of this seq + int count = 0; + int thismid = 0; + for (int j = 0; j < aligned.length(); j++) { + + thismid++; + + //if you are part of the unaligned sequence increment + if (isalpha(aligned[j])) { count++; } + + //if you have reached the halfway point stop + if (count >= (unaligned.length() / 2)) { break; } + } + + //add this mid to total + totalMids += thismid; + + } + + averageMid = (totalMids / seqs.size()); + + return averageMid; + + + } + catch(exception& e) { + errorOut(e, "ChimeraSeqsCommand", "findAverageMidPoint"); + exit(1); + } +} + +/*************************************************************************************************************** +int ChimeraSeqsCommand::createSparseMatrix(int startLine, int endLine, SparseMatrix* sparse){ + try { + + for(int i=startLine; icalcDist(seqs.get(i), seqs.get(j)); + double dist = distCalculator->getDist(); + + + + } + + + return 1; + } + catch(exception& e) { + errorOut(e, "ChimeraSeqsCommand", "createSparseMatrix"); + exit(1); + } +} +/**************************************************************************************************/ + diff --git a/chimeraseqscommand.h b/chimeraseqscommand.h new file mode 100644 index 0000000..455ac92 --- /dev/null +++ b/chimeraseqscommand.h @@ -0,0 +1,57 @@ +#ifndef CHIMERACOMMAND_H +#define CHIMERACOMMAND_H + +/* + * chimeraseqscommand.h + * Mothur + * + * Created by Sarah Westcott on 6/29/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include "mothur.h" +#include "command.hpp" +#include "filterseqscommand.h" +#include "sequence.hpp" + + + +/***********************************************************/ + +class ChimeraSeqsCommand : public Command { +public: + ChimeraSeqsCommand(string); + ~ChimeraSeqsCommand(); + int execute(); + void help(); + +private: + //Dist* distCalculator; + + struct Preference { + string leftParent; + string rightParent; + float score; + + }; + + + bool abort; + string method, fastafile; + bool filter, correction; + int processors, midpoint; + FilterSeqsCommand* filterSeqs; + vector seqs; + vector pref; + + int findAverageMidPoint(); + void readSeqs(); + + +}; + +/***********************************************************/ + +#endif + diff --git a/consensuscommand.cpp b/consensuscommand.cpp index 9a90219..8c34068 100644 --- a/consensuscommand.cpp +++ b/consensuscommand.cpp @@ -17,18 +17,9 @@ ConcensusCommand::ConcensusCommand(string fileroot){ abort = false; filename = fileroot; - //allow user to run help - //if(option == "help") { help(); abort = true; } - //else { - //if (option != "") { mothurOut("There are no valid parameters for the consensus command."); mothurOutEndLine(); abort = true; } - - // //no trees were read - // if (globaldata->gTree.size() == 0) { mothurOut("You must execute the read.tree command, before you may use the consensus command."); mothurOutEndLine(); abort = true; } - //else { - t = globaldata->gTree; - // } - //} + t = globaldata->gTree; + } catch(exception& e) { errorOut(e, "ConcensusCommand", "ConcensusCommand"); @@ -97,9 +88,10 @@ int ConcensusCommand::execute(){ out2 << "Species in Order: " << endl << endl; for (int i = 0; i < treeSet.size(); i++) { out2 << i+1 << ". " << treeSet[i] << endl; } - vector temp; //output sets included out2 << endl << "Sets included in the consensus tree:" << endl << endl; + + vector temp; for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) { //only output pairs not leaves if (it2->first.size() > 1) { @@ -112,6 +104,7 @@ int ConcensusCommand::execute(){ //find spot int index = findSpot(it2->first[i]); temp[index] = "*"; + //temp[index] = it2->first[i] + " "; } //output temp @@ -173,8 +166,8 @@ int ConcensusCommand::buildConcensusTree(vector nodeSet) { //terminate recursion }else if (count == numNodes) { return 0; } else { - leftChildSet = getNextAvailableSet(nodeSet); - rightChildSet = getRestSet(nodeSet, leftChildSet); + //finds best child pair + leftChildSet = getNextAvailableSet(nodeSet, rightChildSet); int left = buildConcensusTree(leftChildSet); int right = buildConcensusTree(rightChildSet); consensusTree->tree[count].setChildren(left, right); @@ -221,6 +214,7 @@ void ConcensusCommand::getSets() { } } + //add each leaf to terminate recursion in consensus //you want the leaves in there but with insignifigant sightings value so it is added last //for each leaf node get descendant info. @@ -238,33 +232,70 @@ void ConcensusCommand::getSets() { } sort(treeSet.begin(), treeSet.end()); + + + map< vector, int> nodePairsCopy = nodePairs; + + + //set initial rating on pairs to sightings + subgroup sightings + while (nodePairsCopy.size() != 0) { + + vector small = getSmallest(nodePairsCopy); + + int subgrouprate = getSubgroupRating(small); + + nodePairsInitialRate[small] = nodePairs[small] + subgrouprate; + + nodePairsCopy.erase(small); + } + } catch(exception& e) { errorOut(e, "ConcensusCommand", "getSets"); exit(1); } } +//********************************************************************************************************************** +vector ConcensusCommand::getSmallest(map< vector, int> nodes) { + try{ + vector smallest = nodes.begin()->first; + int smallsize = smallest.size(); + + for(it2 = nodes.begin(); it2 != nodes.end(); it2++) { + if(it2->first.size() < smallsize) { smallsize = it2->first.size(); smallest = it2->first; } + } + + return smallest; + } + catch(exception& e) { + errorOut(e, "ConcensusCommand", "getSmallest"); + exit(1); + } +} //********************************************************************************************************************** -vector ConcensusCommand::getNextAvailableSet(vector bigset) { +vector ConcensusCommand::getNextAvailableSet(vector bigset, vector& rest) { try { +//cout << "new call " << endl << endl << endl; vector largest; largest.clear(); - int largestSighting = -1; - - //go through the sets - for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) { - //are you a subset of bigset - if (isSubset(bigset, it2->first) == true) { - - //are you the largest. if you are the same size as current largest refer to sighting - if (it2->first.size() > largest.size()) { largest = it2->first; largestSighting = it2->second; } - else if (it2->first.size() == largest.size()) { - if (it2->second > largestSighting) { largest = it2->first; largestSighting = it2->second; } - } - - } + rest.clear(); + + //if you are just 2 groups + if (bigset.size() == 2) { + rest.push_back(bigset[0]); + largest.push_back(bigset[1]); + }else{ + rest = bestSplit[bigset][0]; + largest = bestSplit[bigset][1]; } + + //save for printing out later and for branch lengths + nodePairsInTree[rest] = nodePairs[rest]; + + //delete whatever set you return because it is no longer available + nodePairs.erase(rest); + //save for printing out later and for branch lengths nodePairsInTree[largest] = nodePairs[largest]; @@ -280,6 +311,73 @@ vector ConcensusCommand::getNextAvailableSet(vector bigset) { } } +/**********************************************************************************************************************/ +int ConcensusCommand::getSubgroupRating(vector group) { + try { + map< vector, int>::iterator ittemp; + map< vector< vector > , int >::iterator it3; + int rate = 0; + + // ***********************************************************************************// + //1. this function must be called passing it littlest sets to biggest + // since it the rating is made from your sighting plus you best splits rating + //2. it saves the top pair to use later + // ***********************************************************************************// + + + if (group.size() < 3) { return rate; } + + map< vector, int> possiblePairing; //this is all the subsets of group + + //go through the sets + for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) { + //are you a subset of bigset, then save in possiblePairings + if (isSubset(group, it2->first) == true) { possiblePairing[it2->first] = it2->second; } + } + + map< vector< vector > , int > rating; + + while (possiblePairing.size() != 0) { + + it2 = possiblePairing.begin(); + vector temprest = getRestSet(group, it2->first); + + //is the rest a set available in possiblePairings + ittemp = possiblePairing.find(temprest); + if (ittemp != possiblePairing.end()) { //if the rest is in the possible pairings then add this pair to rating map + vector< vector > temprate; + temprate.push_back(it2->first); temprate.push_back(temprest); + + rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]); + + //erase so you dont add 1,2 and 2,1. + possiblePairing.erase(temprest); + } + + possiblePairing.erase(it2); + } + + + it3 = rating.begin(); + rate = it3->second; + vector< vector > topPair = it3->first; + + //choose the split with the best rating + for (it3 = rating.begin(); it3 != rating.end(); it3++) { + + if (it3->second > rate) { rate = it3->second; topPair = it3->first; } + } + + bestSplit[group] = topPair; + + return rate; + } + catch(exception& e) { + errorOut(e, "ConcensusCommand", "getSubgroupRating"); + exit(1); + } +} + //********************************************************************************************************************** vector ConcensusCommand::getRestSet(vector bigset, vector subset) { try { @@ -294,12 +392,6 @@ vector ConcensusCommand::getRestSet(vector bigset, vector ConcensusCommand::getRestSet(vector bigset, vector bigset, vector subset) { try { + + if (subset.size() > bigset.size()) { return false; } + //check if each guy in suset is also in bigset for (int i = 0; i < subset.size(); i++) { bool match = false; diff --git a/consensuscommand.h b/consensuscommand.h index 53b8094..145bea5 100644 --- a/consensuscommand.h +++ b/consensuscommand.h @@ -35,6 +35,8 @@ private: // ie. combos FI and EGK would create nodePairs[vector containing F and I] = 1; nodePairs[vector containing E, G and K] = 1 // if you saw the combo FI again in another tree you would then update nodePairs[vector containing F and I] = 2; // requires vectors to be sorted to find key. + map< vector, vector< vector > > bestSplit; //maps a group to its best split + map< vector, int > nodePairsInitialRate; map< vector, int > nodePairsInTree; map::iterator it; map< vector, int>::iterator it2; @@ -43,7 +45,9 @@ private: int numNodes, numLeaves, count; //count is the next available spot in the tree vector void getSets(); - vector getNextAvailableSet(vector); //gets next largest and highest rated set that is a subset of the set passed in. + int getSubgroupRating(vector); + vector getSmallest(map< vector, int>); + vector getNextAvailableSet(vector, vector&); vector getRestSet(vector, vector); bool isSubset(vector, vector); int findSpot(string); diff --git a/filters.h b/filters.h new file mode 100644 index 0000000..4c39607 --- /dev/null +++ b/filters.h @@ -0,0 +1,102 @@ +#ifndef FILTERS_H +#define FILTERS_H + +/* + * filters.h + * Mothur + * + * Created by Sarah Westcott on 6/29/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include "mothur.h" +#include "sequence.hpp" + + +/***********************************************************************/ + +class Filters { + +public: + Filters() {}; + ~Filters(){}; + + string getFilter() { return filter; } + void setFilter(string s) { filter = s; } + void setLength(int l) { alignmentLength = l; } + void setSoft(float s) { soft = s; } + void setTrump(float t) { trump = t; } + void setNumSeqs(int num) { numSeqs = num; } + + void initialize() { + a.assign(alignmentLength, 0); + t.assign(alignmentLength, 0); + g.assign(alignmentLength, 0); + c.assign(alignmentLength, 0); + gap.assign(alignmentLength, 0); + } + + void doSoft() { + int threshold = int (soft * numSeqs); + + for(int i=0;i> filter; + + fileHandle.close(); + } + + void getFreqs(Sequence seq) { + + string curAligned = seq.getAligned(); + + for(int j=0;j a, t, g, c, gap; + int alignmentLength, numSeqs; + float soft; + char trump; + +}; + +/***********************************************************************/ + +#endif + diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index ad491aa..68450fb 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -56,6 +56,13 @@ FilterSeqsCommand::FilterSeqsCommand(string option){ numSeqs = 0; + if (abort == false) { + + if (soft != 0) { F.setSoft(soft); } + if (trump != '*') { F.setTrump(trump); } + + } + } } @@ -90,70 +97,6 @@ void FilterSeqsCommand::help(){ /**************************************************************************************/ -void FilterSeqsCommand::doHard() { - - ifstream fileHandle; - openInputFile(hard, fileHandle); - - fileHandle >> filter; - - fileHandle.close(); - -} - -/**************************************************************************************/ - -void FilterSeqsCommand::doTrump(Sequence seq) { - - string curAligned = seq.getAligned(); - - for(int j = 0; j < alignmentLength; j++) { - if(curAligned[j] == trump){ - filter[j] = '0'; - } - } - -} - -/**************************************************************************************/ - -void FilterSeqsCommand::doVertical() { - - for(int i=0;i a, t, g, c, gap; }; diff --git a/venn.cpp b/venn.cpp index 2c40c25..217dd35 100644 --- a/venn.cpp +++ b/venn.cpp @@ -380,78 +380,105 @@ void Venn::getPic(vector lookup, vector vCalcs //get estimates for numA data = singleCalc->getValues(sabundA); numA = data[0]; + //cout << "num a = " << numA << endl; //get estimates for numB data = singleCalc->getValues(sabundB); numB = data[0]; - + //cout << "num b = " << numB << endl; //get estimates for numC data = singleCalc->getValues(sabundC); numC = data[0]; - + //cout << "num c = " << numC << endl; //get estimates for numD data = singleCalc->getValues(sabundD); numD = data[0]; - +//cout << "num d = " << numD << endl; //get estimates for pairs subset.clear(); subset.push_back(lookup[0]); subset.push_back(lookup[1]); data = vCalcs[i]->getValues(subset); sharedAB = data[0]; - + //cout << "num ab = " << sharedAB << endl; subset.clear(); subset.push_back(lookup[0]); subset.push_back(lookup[2]); data = vCalcs[i]->getValues(subset); sharedAC = data[0]; - + //cout << "num ac = " << sharedAC << endl; subset.clear(); subset.push_back(lookup[0]); subset.push_back(lookup[3]); data = vCalcs[i]->getValues(subset); sharedAD = data[0]; - + //cout << "num ad = " << sharedAD << endl; subset.clear(); subset.push_back(lookup[1]); subset.push_back(lookup[2]); data = vCalcs[i]->getValues(subset); sharedBC = data[0]; - + //cout << "num bc = " << sharedBC << endl; subset.clear(); subset.push_back(lookup[1]); subset.push_back(lookup[3]); data = vCalcs[i]->getValues(subset); sharedBD = data[0]; - + //cout << "num bd = " << sharedBD << endl; subset.clear(); subset.push_back(lookup[2]); subset.push_back(lookup[3]); data = vCalcs[i]->getValues(subset); sharedCD = data[0]; - - + + //cout << "num cd = " << sharedCD << endl; //get estimates for combos of 3 subset.clear(); subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[2]); data = vCalcs[i]->getValues(subset); sharedABC = data[0]; - + //cout << "num abc = " << sharedABC << endl; subset.clear(); subset.push_back(lookup[0]); subset.push_back(lookup[2]); subset.push_back(lookup[3]); data = vCalcs[i]->getValues(subset); sharedACD = data[0]; - + //cout << "num acd = " << sharedACD << endl; subset.clear(); subset.push_back(lookup[1]); subset.push_back(lookup[2]); subset.push_back(lookup[3]); data = vCalcs[i]->getValues(subset); sharedBCD = data[0]; - + //cout << "num bcd = " << sharedBCD << endl; subset.clear(); subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[3]); data = vCalcs[i]->getValues(subset); sharedABD = data[0]; - +//cout << "num abd = " << sharedABD << endl; //get estimate for all four data = vCalcs[i]->getValues(lookup); sharedABCD = data[0]; - - + //cout << "num abcd = " << sharedABCD << endl << endl; + //make adjustments + sharedABC = sharedABC - sharedABCD; + //cout << "num abc = " << sharedABC << endl; + sharedABD = sharedABD - sharedABCD; + //cout << "num abd = " << sharedABD << endl; + sharedACD = sharedACD - sharedABCD; + //cout << "num acd = " << sharedACD << endl; + sharedBCD = sharedBCD - sharedABCD; + //cout << "num bcd = " << sharedBCD << endl; + + sharedAB = sharedAB - sharedABC - sharedABCD - sharedABD; // cout << "num ab = " << sharedAB << endl; + sharedAC = sharedAC - sharedABC - sharedABCD - sharedACD; // cout << "num ac = " << sharedAC << endl; + sharedAD = sharedAD - sharedABD - sharedABCD - sharedACD; // cout << "num ad = " << sharedAD << endl; + + sharedBC = sharedBC - sharedABC - sharedABCD - sharedBCD; // cout << "num bc = " << sharedBC << endl; + sharedBD = sharedBD - sharedABD - sharedABCD - sharedBCD; // cout << "num bd = " << sharedBD << endl; + sharedCD = sharedCD - sharedACD - sharedABCD - sharedBCD; // cout << "num cd = " << sharedCD << endl; + + numA = numA - sharedAB - sharedAC - sharedAD - sharedABCD - sharedABC - sharedACD - sharedABD; + //cout << "num a = " << numA << endl; + numB = numB - sharedAB - sharedBC - sharedBD - sharedABCD - sharedABC - sharedABD - sharedBCD; + //cout << "num b = " << numB << endl; + numC = numC - sharedAC - sharedBC - sharedCD - sharedABCD - sharedABC - sharedACD - sharedBCD; + //cout << "num c = " << numC << endl; + numD = numD - sharedAD - sharedBD - sharedCD - sharedABCD - sharedBCD - sharedACD - sharedABD; + //cout << "num d = " << numD << endl; + //image window outsvg << "\n"; outsvg << "\n"; @@ -478,8 +505,8 @@ void Venn::getPic(vector lookup, vector vCalcs outsvg << "" + toString(sharedBC) + "\n"; outsvg << "" + toString(numD) + "\n"; outsvg << "getGroup().length() / 2)) + "\" y=\"210\">" + lookup[3]->getGroup() + "\n"; - outsvg << "" + toString(sharedAD) + "\n"; - outsvg << "" + toString(sharedBD) + "\n"; + outsvg << "" + toString(sharedAD) + "\n"; + outsvg << "" + toString(sharedBD) + "\n"; outsvg << "" + toString(sharedCD) + "\n"; outsvg << "" + toString(sharedABD) + "\n"; outsvg << "" + toString(sharedBCD) + "\n"; -- 2.39.2