3792948A0F2E258500B9034A /* parsimony.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379294890F2E258500B9034A /* parsimony.cpp */; };
3799A9500FD6A58C00E33EDE /* distancedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3799A94C0FD6A58C00E33EDE /* distancedb.cpp */; };
3799A9510FD6A58C00E33EDE /* seqsummarycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3799A94E0FD6A58C00E33EDE /* seqsummarycommand.cpp */; };
+ 379D3D510FF90E090068C1C0 /* chimeraseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379D3D500FF90E090068C1C0 /* chimeraseqscommand.cpp */; };
37AD4CE40F28AEA300AA2D49 /* sharedlistvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4CE30F28AEA300AA2D49 /* sharedlistvector.cpp */; };
37AD4DBB0F28E2FE00AA2D49 /* tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4DBA0F28E2FE00AA2D49 /* tree.cpp */; };
37AD4DCA0F28F3DD00AA2D49 /* readtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */; };
3799A94D0FD6A58C00E33EDE /* distancedb.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = distancedb.hpp; sourceTree = SOURCE_ROOT; };
3799A94E0FD6A58C00E33EDE /* seqsummarycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = seqsummarycommand.cpp; sourceTree = SOURCE_ROOT; };
3799A94F0FD6A58C00E33EDE /* seqsummarycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqsummarycommand.h; sourceTree = SOURCE_ROOT; };
+ 379D3D4F0FF90E090068C1C0 /* chimeraseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraseqscommand.h; sourceTree = "<group>"; };
+ 379D3D500FF90E090068C1C0 /* chimeraseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraseqscommand.cpp; sourceTree = "<group>"; };
+ 379D3D6B0FF917F40068C1C0 /* filters.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = filters.h; sourceTree = "<group>"; };
37AD4CE20F28AEA300AA2D49 /* sharedlistvector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedlistvector.h; sourceTree = SOURCE_ROOT; };
37AD4CE30F28AEA300AA2D49 /* sharedlistvector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedlistvector.cpp; sourceTree = SOURCE_ROOT; };
37AD4DB90F28E2FE00AA2D49 /* tree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tree.h; sourceTree = SOURCE_ROOT; };
37C753F40FB34C0300DBD02E /* eachgapignore.h */,
373C69960FC1E63600137ACD /* efron.cpp */,
373C69970FC1E63600137ACD /* efron.h */,
+ 379D3D6B0FF917F40068C1C0 /* filters.h */,
EB9303F70F53517300E8EF26 /* geom.h */,
EB9303F80F53517300E8EF26 /* geom.cpp */,
378C1AF10FB0644D004D63F5 /* goodscoverage.h */,
37D927CB0F21331F001D4494 /* collectsharedcommand.cpp */,
377326640FAF16E0007ABB8B /* consensuscommand.h */,
377326630FAF16E0007ABB8B /* consensuscommand.cpp */,
+ 379D3D4F0FF90E090068C1C0 /* chimeraseqscommand.h */,
+ 379D3D500FF90E090068C1C0 /* chimeraseqscommand.cpp */,
37B28F660F27590100808A62 /* deconvolutecommand.h */,
37B28F670F27590100808A62 /* deconvolutecommand.cpp */,
37C753CC0FB3415200DBD02E /* distancecommand.h */,
378598740FDD4C1500EF9D03 /* heatmapsim.cpp in Sources */,
378599100FDD7E8E00EF9D03 /* optionparser.cpp in Sources */,
7E5A17AF0FE57292003C6A03 /* mergefilecommand.cpp in Sources */,
+ 379D3D510FF90E090068C1C0 /* chimeraseqscommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
--- /dev/null
+/*
+ * chimeraseqscommand.cpp
+ * Mothur
+ *
+ * Created by Sarah Westcott on 6/29/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "chimeraseqscommand.h"
+
+//***************************************************************************************************************
+
+ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
+ try {
+ abort = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; }
+
+ else {
+ //valid paramters for this command
+ string Array[] = {"fasta", "filter", "correction", "processors", "method" };
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+
+ //check to make sure all parameters are valid for command
+ for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ //check for required parameters
+ fastafile = validParameter.validFile(parameters, "fasta", true);
+ if (fastafile == "not open") { abort = true; }
+ else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true; }
+
+ string temp;
+ temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
+ filter = isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "correction", false); if (temp == "not found") { temp = "T"; }
+ correction = isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "processors", true); if (temp == "not found") { temp = "1"; }
+ convert(temp, processors);
+
+ method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "bellerophon"; }
+
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "ChimeraSeqsCommand", "ChimeraSeqsCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+void ChimeraSeqsCommand::help(){
+ try {
+ mothurOut("The chimera.seqs command reads a fastafile and creates a sorted priority score list of potentially chimeric sequences (ideally, the sequences should already be aligned).\n");
+ mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors and method. fasta is required.\n");
+ mothurOut("The filter parameter allows you to specify if you would like to apply a 50% soft filter. The default is false. \n");
+ mothurOut("The correction parameter allows you to ..... The default is true. \n");
+ mothurOut("The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n");
+ mothurOut("The method parameter allows you to specify the method for finding chimeric sequences. The default is bellerophon. \n");
+ mothurOut("The chimera.seqs command should be in the following format: \n");
+ mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
+ mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, processors=2, method=yourMethod) \n");
+ mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");
+ }
+ catch(exception& e) {
+ errorOut(e, "ChimeraSeqsCommand", "help");
+ exit(1);
+ }
+}
+
+//***************************************************************************************************************
+
+ChimeraSeqsCommand::~ChimeraSeqsCommand(){ /* do nothing */ }
+
+//***************************************************************************************************************
+
+int ChimeraSeqsCommand::execute(){
+ try{
+
+ if (abort == true) { return 0; }
+
+ //do soft filter
+ if (filter) {
+ string optionString = "fasta=" + fastafile + ", soft=50.0, vertical=F";
+ filterSeqs = new FilterSeqsCommand(optionString);
+ filterSeqs->execute();
+ delete filterSeqs;
+
+ //reset fastafile to filtered file
+ fastafile = getRootName(fastafile) + "filter.fasta";
+ }
+
+ //read in sequences
+ readSeqs();
+
+ //int numSeqs = seqs.size();
+
+ //find average midpoint of seqs
+ midpoint = findAverageMidPoint();
+
+ //this should be parallelized
+ //generatePreferences();
+
+
+ //output results to screen
+ mothurOutEndLine();
+ mothurOut("\t\t"); mothurOutEndLine();
+ //mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); mothurOutEndLine();
+ //mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); mothurOutEndLine();
+ //mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); mothurOutEndLine();
+ //mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); mothurOutEndLine();
+ //mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); mothurOutEndLine();
+ //mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); mothurOutEndLine();
+ //mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); mothurOutEndLine();
+ //mothurOut("# of Seqs:\t" + toString(numSeqs)); mothurOutEndLine();
+
+ //outSummary.close();
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "ChimeraSeqsCommand", "execute");
+ exit(1);
+ }
+}
+
+//***************************************************************************************************************
+void ChimeraSeqsCommand::readSeqs(){
+ try {
+ ifstream inFASTA;
+ openInputFile(fastafile, inFASTA);
+
+ //read in seqs and store in vector
+ while(!inFASTA.eof()){
+ Sequence current(inFASTA);
+
+ seqs.push_back(current);
+
+ gobble(inFASTA);
+ }
+ inFASTA.close();
+
+ }
+ catch(exception& e) {
+ errorOut(e, "ChimeraSeqsCommand", "readSeqs");
+ exit(1);
+ }
+}
+
+
+//***************************************************************************************************************
+int ChimeraSeqsCommand::findAverageMidPoint(){
+ try {
+ int totalMids = 0;
+ int averageMid = 0;
+
+ //loop through the seqs and find midpoint
+ for (int i = 0; i < seqs.size(); i++) {
+
+ //get unaligned sequence
+ seqs[i].setUnaligned(seqs[i].getUnaligned()); //if you read an aligned file the unaligned is really aligned, so we need to make sure its unaligned
+
+ string unaligned = seqs[i].getUnaligned();
+ string aligned = seqs[i].getAligned();
+
+ //find midpoint of this seq
+ int count = 0;
+ int thismid = 0;
+ for (int j = 0; j < aligned.length(); j++) {
+
+ thismid++;
+
+ //if you are part of the unaligned sequence increment
+ if (isalpha(aligned[j])) { count++; }
+
+ //if you have reached the halfway point stop
+ if (count >= (unaligned.length() / 2)) { break; }
+ }
+
+ //add this mid to total
+ totalMids += thismid;
+
+ }
+
+ averageMid = (totalMids / seqs.size());
+
+ return averageMid;
+
+
+ }
+ catch(exception& e) {
+ errorOut(e, "ChimeraSeqsCommand", "findAverageMidPoint");
+ exit(1);
+ }
+}
+
+/***************************************************************************************************************
+int ChimeraSeqsCommand::createSparseMatrix(int startLine, int endLine, SparseMatrix* sparse){
+ try {
+
+ for(int i=startLine; i<endLine; i++){
+
+ for(int j=0;j<i;j++){
+
+ distCalculator->calcDist(seqs.get(i), seqs.get(j));
+ double dist = distCalculator->getDist();
+
+
+
+ }
+
+
+ return 1;
+ }
+ catch(exception& e) {
+ errorOut(e, "ChimeraSeqsCommand", "createSparseMatrix");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
--- /dev/null
+#ifndef CHIMERACOMMAND_H
+#define CHIMERACOMMAND_H
+
+/*
+ * chimeraseqscommand.h
+ * Mothur
+ *
+ * Created by Sarah Westcott on 6/29/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+#include "filterseqscommand.h"
+#include "sequence.hpp"
+
+
+
+/***********************************************************/
+
+class ChimeraSeqsCommand : public Command {
+public:
+ ChimeraSeqsCommand(string);
+ ~ChimeraSeqsCommand();
+ int execute();
+ void help();
+
+private:
+ //Dist* distCalculator;
+
+ struct Preference {
+ string leftParent;
+ string rightParent;
+ float score;
+
+ };
+
+
+ bool abort;
+ string method, fastafile;
+ bool filter, correction;
+ int processors, midpoint;
+ FilterSeqsCommand* filterSeqs;
+ vector<Sequence> seqs;
+ vector<Preference> pref;
+
+ int findAverageMidPoint();
+ void readSeqs();
+
+
+};
+
+/***********************************************************/
+
+#endif
+
abort = false;
filename = fileroot;
- //allow user to run help
- //if(option == "help") { help(); abort = true; }
- //else {
- //if (option != "") { mothurOut("There are no valid parameters for the consensus command."); mothurOutEndLine(); abort = true; }
-
- // //no trees were read
- // if (globaldata->gTree.size() == 0) { mothurOut("You must execute the read.tree command, before you may use the consensus command."); mothurOutEndLine(); abort = true; }
- //else {
- t = globaldata->gTree;
- // }
- //}
+ t = globaldata->gTree;
+
}
catch(exception& e) {
errorOut(e, "ConcensusCommand", "ConcensusCommand");
out2 << "Species in Order: " << endl << endl;
for (int i = 0; i < treeSet.size(); i++) { out2 << i+1 << ". " << treeSet[i] << endl; }
- vector<string> temp;
//output sets included
out2 << endl << "Sets included in the consensus tree:" << endl << endl;
+
+ vector<string> temp;
for (it2 = nodePairsInTree.begin(); it2 != nodePairsInTree.end(); it2++) {
//only output pairs not leaves
if (it2->first.size() > 1) {
//find spot
int index = findSpot(it2->first[i]);
temp[index] = "*";
+ //temp[index] = it2->first[i] + " ";
}
//output temp
//terminate recursion
}else if (count == numNodes) { return 0; }
else {
- leftChildSet = getNextAvailableSet(nodeSet);
- rightChildSet = getRestSet(nodeSet, leftChildSet);
+ //finds best child pair
+ leftChildSet = getNextAvailableSet(nodeSet, rightChildSet);
int left = buildConcensusTree(leftChildSet);
int right = buildConcensusTree(rightChildSet);
consensusTree->tree[count].setChildren(left, right);
}
}
+
//add each leaf to terminate recursion in consensus
//you want the leaves in there but with insignifigant sightings value so it is added last
//for each leaf node get descendant info.
}
sort(treeSet.begin(), treeSet.end());
+
+
+ map< vector<string>, int> nodePairsCopy = nodePairs;
+
+
+ //set initial rating on pairs to sightings + subgroup sightings
+ while (nodePairsCopy.size() != 0) {
+
+ vector<string> small = getSmallest(nodePairsCopy);
+
+ int subgrouprate = getSubgroupRating(small);
+
+ nodePairsInitialRate[small] = nodePairs[small] + subgrouprate;
+
+ nodePairsCopy.erase(small);
+ }
+
}
catch(exception& e) {
errorOut(e, "ConcensusCommand", "getSets");
exit(1);
}
}
+//**********************************************************************************************************************
+vector<string> ConcensusCommand::getSmallest(map< vector<string>, int> nodes) {
+ try{
+ vector<string> smallest = nodes.begin()->first;
+ int smallsize = smallest.size();
+
+ for(it2 = nodes.begin(); it2 != nodes.end(); it2++) {
+ if(it2->first.size() < smallsize) { smallsize = it2->first.size(); smallest = it2->first; }
+ }
+
+ return smallest;
+ }
+ catch(exception& e) {
+ errorOut(e, "ConcensusCommand", "getSmallest");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
-vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset) {
+vector<string> ConcensusCommand::getNextAvailableSet(vector<string> bigset, vector<string>& rest) {
try {
+//cout << "new call " << endl << endl << endl;
vector<string> largest; largest.clear();
- int largestSighting = -1;
-
- //go through the sets
- for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
- //are you a subset of bigset
- if (isSubset(bigset, it2->first) == true) {
-
- //are you the largest. if you are the same size as current largest refer to sighting
- if (it2->first.size() > largest.size()) { largest = it2->first; largestSighting = it2->second; }
- else if (it2->first.size() == largest.size()) {
- if (it2->second > largestSighting) { largest = it2->first; largestSighting = it2->second; }
- }
-
- }
+ rest.clear();
+
+ //if you are just 2 groups
+ if (bigset.size() == 2) {
+ rest.push_back(bigset[0]);
+ largest.push_back(bigset[1]);
+ }else{
+ rest = bestSplit[bigset][0];
+ largest = bestSplit[bigset][1];
}
+
+ //save for printing out later and for branch lengths
+ nodePairsInTree[rest] = nodePairs[rest];
+
+ //delete whatever set you return because it is no longer available
+ nodePairs.erase(rest);
+
//save for printing out later and for branch lengths
nodePairsInTree[largest] = nodePairs[largest];
}
}
+/**********************************************************************************************************************/
+int ConcensusCommand::getSubgroupRating(vector<string> group) {
+ try {
+ map< vector<string>, int>::iterator ittemp;
+ map< vector< vector<string> > , int >::iterator it3;
+ int rate = 0;
+
+ // ***********************************************************************************//
+ //1. this function must be called passing it littlest sets to biggest
+ // since it the rating is made from your sighting plus you best splits rating
+ //2. it saves the top pair to use later
+ // ***********************************************************************************//
+
+
+ if (group.size() < 3) { return rate; }
+
+ map< vector<string>, int> possiblePairing; //this is all the subsets of group
+
+ //go through the sets
+ for (it2 = nodePairs.begin(); it2 != nodePairs.end(); it2++) {
+ //are you a subset of bigset, then save in possiblePairings
+ if (isSubset(group, it2->first) == true) { possiblePairing[it2->first] = it2->second; }
+ }
+
+ map< vector< vector<string> > , int > rating;
+
+ while (possiblePairing.size() != 0) {
+
+ it2 = possiblePairing.begin();
+ vector<string> temprest = getRestSet(group, it2->first);
+
+ //is the rest a set available in possiblePairings
+ ittemp = possiblePairing.find(temprest);
+ if (ittemp != possiblePairing.end()) { //if the rest is in the possible pairings then add this pair to rating map
+ vector< vector<string> > temprate;
+ temprate.push_back(it2->first); temprate.push_back(temprest);
+
+ rating[temprate] = (nodePairsInitialRate[it2->first] + nodePairsInitialRate[temprest]);
+
+ //erase so you dont add 1,2 and 2,1.
+ possiblePairing.erase(temprest);
+ }
+
+ possiblePairing.erase(it2);
+ }
+
+
+ it3 = rating.begin();
+ rate = it3->second;
+ vector< vector<string> > topPair = it3->first;
+
+ //choose the split with the best rating
+ for (it3 = rating.begin(); it3 != rating.end(); it3++) {
+
+ if (it3->second > rate) { rate = it3->second; topPair = it3->first; }
+ }
+
+ bestSplit[group] = topPair;
+
+ return rate;
+ }
+ catch(exception& e) {
+ errorOut(e, "ConcensusCommand", "getSubgroupRating");
+ exit(1);
+ }
+}
+
//**********************************************************************************************************************
vector<string> ConcensusCommand::getRestSet(vector<string> bigset, vector<string> subset) {
try {
//its not in the subset so put it in the rest
if (inSubset == false) { rest.push_back(bigset[i]); }
}
-
- //save for printing out later and for branch lengths
- nodePairsInTree[rest] = nodePairs[rest];
-
- //delete whatever set you return because it is no longer available
- nodePairs.erase(rest);
return rest;
bool ConcensusCommand::isSubset(vector<string> bigset, vector<string> subset) {
try {
+
+ if (subset.size() > bigset.size()) { return false; }
+
//check if each guy in suset is also in bigset
for (int i = 0; i < subset.size(); i++) {
bool match = false;
// ie. combos FI and EGK would create nodePairs[vector containing F and I] = 1; nodePairs[vector containing E, G and K] = 1
// if you saw the combo FI again in another tree you would then update nodePairs[vector containing F and I] = 2;
// requires vectors to be sorted to find key.
+ map< vector<string>, vector< vector<string> > > bestSplit; //maps a group to its best split
+ map< vector<string>, int > nodePairsInitialRate;
map< vector<string>, int > nodePairsInTree;
map<string, int>::iterator it;
map< vector<string>, int>::iterator it2;
int numNodes, numLeaves, count; //count is the next available spot in the tree vector
void getSets();
- vector<string> getNextAvailableSet(vector<string>); //gets next largest and highest rated set that is a subset of the set passed in.
+ int getSubgroupRating(vector<string>);
+ vector<string> getSmallest(map< vector<string>, int>);
+ vector<string> getNextAvailableSet(vector<string>, vector<string>&);
vector<string> getRestSet(vector<string>, vector<string>);
bool isSubset(vector<string>, vector<string>);
int findSpot(string);
--- /dev/null
+#ifndef FILTERS_H
+#define FILTERS_H
+
+/*
+ * filters.h
+ * Mothur
+ *
+ * Created by Sarah Westcott on 6/29/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "sequence.hpp"
+
+
+/***********************************************************************/
+
+class Filters {
+
+public:
+ Filters() {};
+ ~Filters(){};
+
+ string getFilter() { return filter; }
+ void setFilter(string s) { filter = s; }
+ void setLength(int l) { alignmentLength = l; }
+ void setSoft(float s) { soft = s; }
+ void setTrump(float t) { trump = t; }
+ void setNumSeqs(int num) { numSeqs = num; }
+
+ void initialize() {
+ a.assign(alignmentLength, 0);
+ t.assign(alignmentLength, 0);
+ g.assign(alignmentLength, 0);
+ c.assign(alignmentLength, 0);
+ gap.assign(alignmentLength, 0);
+ }
+
+ void doSoft() {
+ int threshold = int (soft * numSeqs);
+
+ for(int i=0;i<alignmentLength;i++){
+ if(a[i] < threshold && t[i] < threshold && g[i] < threshold && c[i] < threshold){ filter[i] = 0; }
+ }
+ }
+
+ void doVertical() {
+
+ for(int i=0;i<alignmentLength;i++){
+ if(gap[i] == numSeqs) { filter[i] = '0'; }
+ }
+
+ }
+
+ void doTrump(Sequence seq) {
+
+ string curAligned = seq.getAligned();
+
+ for(int j = 0; j < alignmentLength; j++) {
+ if(curAligned[j] == trump){
+ filter[j] = '0';
+ }
+ }
+
+ }
+
+ void doHard(string hard) {
+ ifstream fileHandle;
+ openInputFile(hard, fileHandle);
+
+ fileHandle >> filter;
+
+ fileHandle.close();
+ }
+
+ void getFreqs(Sequence seq) {
+
+ string curAligned = seq.getAligned();
+
+ for(int j=0;j<alignmentLength;j++){
+ if(toupper(curAligned[j]) == 'A') { a[j]++; }
+ else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[j]) == 'U') { t[j]++; }
+ else if(toupper(curAligned[j]) == 'G') { g[j]++; }
+ else if(toupper(curAligned[j]) == 'C') { c[j]++; }
+ else if(curAligned[j] == '-' || curAligned[j] == '.') { gap[j]++; }
+ }
+ }
+
+protected:
+ string filter;
+ vector<int> a, t, g, c, gap;
+ int alignmentLength, numSeqs;
+ float soft;
+ char trump;
+
+};
+
+/***********************************************************************/
+
+#endif
+
numSeqs = 0;
+ if (abort == false) {
+
+ if (soft != 0) { F.setSoft(soft); }
+ if (trump != '*') { F.setTrump(trump); }
+
+ }
+
}
}
/**************************************************************************************/
-void FilterSeqsCommand::doHard() {
-
- ifstream fileHandle;
- openInputFile(hard, fileHandle);
-
- fileHandle >> filter;
-
- fileHandle.close();
-
-}
-
-/**************************************************************************************/
-
-void FilterSeqsCommand::doTrump(Sequence seq) {
-
- string curAligned = seq.getAligned();
-
- for(int j = 0; j < alignmentLength; j++) {
- if(curAligned[j] == trump){
- filter[j] = '0';
- }
- }
-
-}
-
-/**************************************************************************************/
-
-void FilterSeqsCommand::doVertical() {
-
- for(int i=0;i<alignmentLength;i++){
- if(gap[i] == numSeqs) { filter[i] = '0'; }
- }
-
-}
-
-/**************************************************************************************/
-
-void FilterSeqsCommand::doSoft() {
-
- int threshold = int (soft * numSeqs);
-
- for(int i=0;i<alignmentLength;i++){
- if(a[i] < threshold && t[i] < threshold && g[i] < threshold && c[i] < threshold){ filter[i] = 0; }
- }
-}
-
-/**************************************************************************************/
-
-void FilterSeqsCommand::getFreqs(Sequence seq) {
-
- string curAligned = seq.getAligned();;
-
- for(int j=0;j<alignmentLength;j++){
- if(toupper(curAligned[j]) == 'A') { a[j]++; }
- else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[j]) == 'U') { t[j]++; }
- else if(toupper(curAligned[j]) == 'G') { g[j]++; }
- else if(toupper(curAligned[j]) == 'C') { c[j]++; }
- else if(curAligned[j] == '-' || curAligned[j] == '.') { gap[j]++; }
- }
-
-}
-
-/**************************************************************************************/
-
int FilterSeqsCommand::execute() {
try {
alignmentLength = testSeq.getAlignLength();
inFASTA.seekg(0);
+ F.setLength(alignmentLength);
+
if(soft != 0 || isTrue(vertical)){
- a.assign(alignmentLength, 0);
- t.assign(alignmentLength, 0);
- g.assign(alignmentLength, 0);
- c.assign(alignmentLength, 0);
- gap.assign(alignmentLength, 0);
+ F.initialize();
}
- if(hard.compare("") != 0) { doHard(); }
- else { filter = string(alignmentLength, '1'); }
+ if(hard.compare("") != 0) { F.doHard(hard); }
+ else { F.setFilter(string(alignmentLength, '1')); }
if(trump != '*' || isTrue(vertical) || soft != 0){
while(!inFASTA.eof()){ //read through and create the filter...
Sequence seq(inFASTA);
- if(trump != '*'){ doTrump(seq); }
- if(isTrue(vertical) || soft != 0){ getFreqs(seq); }
+ if(trump != '*'){ F.doTrump(seq); }
+ if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); }
numSeqs++;
cout.flush();
}
}
inFASTA.close();
+ F.setNumSeqs(numSeqs);
+
+
+ if(isTrue(vertical) == 1) { F.doVertical(); }
+ if(soft != 0) { F.doSoft(); }
- if(isTrue(vertical) == 1) { doVertical(); }
- if(soft != 0) { doSoft(); }
+ filter = F.getFilter();
ofstream outFilter;
string filterFile = getRootName(fastafile) + "filter";
*/
#include "command.hpp"
+#include "filters.h"
class Sequence;
class FilterSeqsCommand : public Command {
void help();
private:
- void doHard();
- void doTrump(Sequence);
- void doVertical();
- void doSoft();
- void getFreqs(Sequence);
string vertical, filter, fastafile, hard;
int alignmentLength;
float soft;
int numSeqs;
+ Filters F;
+
vector<int> a, t, g, c, gap;
};
//get estimates for numA
data = singleCalc->getValues(sabundA);
numA = data[0];
+ //cout << "num a = " << numA << endl;
//get estimates for numB
data = singleCalc->getValues(sabundB);
numB = data[0];
-
+ //cout << "num b = " << numB << endl;
//get estimates for numC
data = singleCalc->getValues(sabundC);
numC = data[0];
-
+ //cout << "num c = " << numC << endl;
//get estimates for numD
data = singleCalc->getValues(sabundD);
numD = data[0];
-
+//cout << "num d = " << numD << endl;
//get estimates for pairs
subset.clear();
subset.push_back(lookup[0]); subset.push_back(lookup[1]);
data = vCalcs[i]->getValues(subset);
sharedAB = data[0];
-
+ //cout << "num ab = " << sharedAB << endl;
subset.clear();
subset.push_back(lookup[0]); subset.push_back(lookup[2]);
data = vCalcs[i]->getValues(subset);
sharedAC = data[0];
-
+ //cout << "num ac = " << sharedAC << endl;
subset.clear();
subset.push_back(lookup[0]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset);
sharedAD = data[0];
-
+ //cout << "num ad = " << sharedAD << endl;
subset.clear();
subset.push_back(lookup[1]); subset.push_back(lookup[2]);
data = vCalcs[i]->getValues(subset);
sharedBC = data[0];
-
+ //cout << "num bc = " << sharedBC << endl;
subset.clear();
subset.push_back(lookup[1]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset);
sharedBD = data[0];
-
+ //cout << "num bd = " << sharedBD << endl;
subset.clear();
subset.push_back(lookup[2]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset);
sharedCD = data[0];
-
-
+
+ //cout << "num cd = " << sharedCD << endl;
//get estimates for combos of 3
subset.clear();
subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[2]);
data = vCalcs[i]->getValues(subset);
sharedABC = data[0];
-
+ //cout << "num abc = " << sharedABC << endl;
subset.clear();
subset.push_back(lookup[0]); subset.push_back(lookup[2]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset);
sharedACD = data[0];
-
+ //cout << "num acd = " << sharedACD << endl;
subset.clear();
subset.push_back(lookup[1]); subset.push_back(lookup[2]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset);
sharedBCD = data[0];
-
+ //cout << "num bcd = " << sharedBCD << endl;
subset.clear();
subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[3]);
data = vCalcs[i]->getValues(subset);
sharedABD = data[0];
-
+//cout << "num abd = " << sharedABD << endl;
//get estimate for all four
data = vCalcs[i]->getValues(lookup);
sharedABCD = data[0];
-
-
+ //cout << "num abcd = " << sharedABCD << endl << endl;
+ //make adjustments
+ sharedABC = sharedABC - sharedABCD;
+ //cout << "num abc = " << sharedABC << endl;
+ sharedABD = sharedABD - sharedABCD;
+ //cout << "num abd = " << sharedABD << endl;
+ sharedACD = sharedACD - sharedABCD;
+ //cout << "num acd = " << sharedACD << endl;
+ sharedBCD = sharedBCD - sharedABCD;
+ //cout << "num bcd = " << sharedBCD << endl;
+
+ sharedAB = sharedAB - sharedABC - sharedABCD - sharedABD; // cout << "num ab = " << sharedAB << endl;
+ sharedAC = sharedAC - sharedABC - sharedABCD - sharedACD; // cout << "num ac = " << sharedAC << endl;
+ sharedAD = sharedAD - sharedABD - sharedABCD - sharedACD; // cout << "num ad = " << sharedAD << endl;
+
+ sharedBC = sharedBC - sharedABC - sharedABCD - sharedBCD; // cout << "num bc = " << sharedBC << endl;
+ sharedBD = sharedBD - sharedABD - sharedABCD - sharedBCD; // cout << "num bd = " << sharedBD << endl;
+ sharedCD = sharedCD - sharedACD - sharedABCD - sharedBCD; // cout << "num cd = " << sharedCD << endl;
+
+ numA = numA - sharedAB - sharedAC - sharedAD - sharedABCD - sharedABC - sharedACD - sharedABD;
+ //cout << "num a = " << numA << endl;
+ numB = numB - sharedAB - sharedBC - sharedBD - sharedABCD - sharedABC - sharedABD - sharedBCD;
+ //cout << "num b = " << numB << endl;
+ numC = numC - sharedAC - sharedBC - sharedCD - sharedABCD - sharedABC - sharedACD - sharedBCD;
+ //cout << "num c = " << numC << endl;
+ numD = numD - sharedAD - sharedBD - sharedCD - sharedABCD - sharedBCD - sharedACD - sharedABD;
+ //cout << "num d = " << numD << endl;
+
//image window
outsvg << "<svg width=\"100%\" height=\"100%\" viewBox=\"0 0 700 700\" >\n";
outsvg << "<g>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(215 - ((int)toString(sharedBC).length() / 2)) + "\" y=\"190\">" + toString(sharedBC) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(150 - ((int)toString(numD).length() / 2)) + "\" y=\"230\">" + toString(numD) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(150 - ((int)lookup[3]->getGroup().length() / 2)) + "\" y=\"210\">" + lookup[3]->getGroup() + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(240 - ((int)toString(sharedBC).length() / 2)) + "\" y=\"325\">" + toString(sharedAD) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(470 - ((int)toString(sharedBC).length() / 2)) + "\" y=\"325\">" + toString(sharedBD) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(240 - ((int)toString(sharedAD).length() / 2)) + "\" y=\"325\">" + toString(sharedAD) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(470 - ((int)toString(sharedBD).length() / 2)) + "\" y=\"325\">" + toString(sharedBD) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(350 - ((int)toString(sharedCD).length() / 2)) + "\" y=\"430\">" + toString(sharedCD) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(275 - ((int)toString(sharedABD).length() / 2)) + "\" y=\"240\">" + toString(sharedABD) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(400 - ((int)toString(sharedBCD).length() / 2)) + "\" y=\"360\">" + toString(sharedBCD) + "</text>\n";