A70B53AC0F4CD7AD0064797E /* getlinecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */; };
A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */; };
A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */; };
+ A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */; };
A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; };
A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */; };
A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0450D106CEEC90046FC83 /* slayer.cpp */; };
A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = SOURCE_ROOT; };
A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckrdp.h; sourceTree = SOURCE_ROOT; };
A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckrdp.cpp; sourceTree = SOURCE_ROOT; };
+ A73329CD1083B3B3003B10C5 /* getlistcountcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlistcountcommand.h; sourceTree = "<group>"; };
+ A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlistcountcommand.cpp; sourceTree = "<group>"; };
A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; };
A7B04491106CC3E60046FC83 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayer.h; sourceTree = SOURCE_ROOT; };
A70B53A60F4CD7AD0064797E /* getlabelcommand.cpp */,
A70B53A90F4CD7AD0064797E /* getlinecommand.h */,
A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */,
+ A73329CD1083B3B3003B10C5 /* getlistcountcommand.h */,
+ A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */,
370B88050F8A4EE4005AB382 /* getoturepcommand.h */,
370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */,
3749273D0FD5956B0031C06B /* getrabundcommand.h */,
A7E4A824106A9AD700688F62 /* maligner.cpp in Sources */,
A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */,
A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */,
+ A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
alignment->resize(candidateSeq->getUnaligned().length()+1);
}
-
+
report.setCandidate(candidateSeq);
Sequence temp = templateDB->findClosestSequence(candidateSeq);
Sequence* templateSeq = &temp;
-
+
report.setTemplate(templateSeq);
report.setSearchParameters(search, templateDB->getSearchScore());
-
+
Nast nast(alignment, candidateSeq, templateSeq);
report.setAlignmentParameters(align, alignment);
-
+
report.setNastParameters(nast);
alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
-
+
report.print();
- delete candidateSeq;
+ delete candidateSeq;
+
}
alignmentFile.close();
#include "dist.h"
#include "decalc.h"
+/***********************************************************/
//This class was created using the algorythms described in the
// "Evaluating putative chimeric sequences from PCR-amplified products" paper
//by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.
virtual void setDivR(float d) { divR = d; }
virtual void setParents(int p) { parents = p; }
virtual void setMinSim(int s) { minSim = s; }
- virtual void setPrint(int p) { printAll = p; }
+ virtual void setIters(int i) { iters = i; }
virtual void setCons(string){};
protected:
- bool filter, correction, svg, printAll;
- int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents;
+ bool filter, correction, svg;
+ int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents, iters;
float divR;
string seqMask, quanfile, filterString, name;
#include "kmerdb.hpp"
#include "database.hpp"
+/***********************************************************/
//This class was created using the algorythms described in
//CHIMERA_CHECK version 2.7 written by Niels Larsen.
else {
//valid paramters for this command
- string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "printall" };
+ string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "iters" };
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
temp = validParameter.validFile(parameters, "correction", false); if (temp == "not found") { temp = "T"; }
correction = isTrue(temp);
- temp = validParameter.validFile(parameters, "printall", false); if (temp == "not found") { temp = "F"; }
- printAll = isTrue(temp);
-
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; }
convert(temp, processors);
temp = validParameter.validFile(parameters, "parents", false); if (temp == "not found") { temp = "5"; }
convert(temp, parents);
+
+ temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
+ convert(temp, iters);
temp = validParameter.validFile(parameters, "increment", false);
if ((temp == "not found") && ((method == "chimeracheck") || (method == "chimeraslayer"))) { temp = "10"; }
//"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name"
//mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
- mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name.\n");
+ mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters.\n");
mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n");
mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
mothurOut("The ksize parameter allows you to input kmersize. \n");
mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
+ mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
mothurOut("Details for each method: \n");
mothurOut("\tpintail: \n");
chimera->setDivR(divR);
chimera->setParents(parents);
chimera->setMinSim(minSimilarity);
- chimera->setPrint(printAll);
+ chimera->setIters(iters);
//find chimeras
void ChimeraSlayer::print(ostream& out) {
try {
mothurOutEndLine();
+ mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
+ mothurOutEndLine();
for (int i = 0; i < querySeqs.size(); i++) {
if (chimeraFlags[i] == "yes") {
- mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
-
+ if ((chimeraResults[i][0].bsa >= 90.0) || (chimeraResults[i][0].bsb >= 90.0)) {
+ mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
+ out << querySeqs[i]->getName() << "\tyes" << endl;
+ }else {
+ out << querySeqs[i]->getName() << "\tno" << endl;
+ mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
+ }
+
+ printBlock(chimeraResults[i][0], out, i);
+
+ out << endl;
}else{
out << querySeqs[i]->getName() << "\tno" << endl;
- mothurOut("no");
+ mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
}
}
-/*
-
- my $div_ratio_QLA_QRB = $data_struct->{div_ratio_QLA_QRB};
- my $div_ratio_QRA_QLB = $data_struct->{div_ratio_QLB_QRA};
-
- my $per_id_QLA = $data_struct->{per_id_QLA};
- my $per_id_QRB = $data_struct->{per_id_QRB};
- my $per_id_AB = $data_struct->{per_id_AB};
- my $per_id_QA = $data_struct->{per_id_QA};
- my $per_id_QB = $data_struct->{per_id_QB};
- my $per_id_LAB = $data_struct->{per_id_LAB};
- my $per_id_RAB = $data_struct->{per_id_RAB};
- my $per_id_QRA = $data_struct->{per_id_QRA};
- my $per_id_QLB = $data_struct->{per_id_QLB};
- my $per_id_QLB_QRA = $data_struct->{per_id_QLB_QRA};
- my $per_id_QLA_QRB = $data_struct->{per_id_QLA_QRB};
-
- my $win_left_end5 = $data_struct->{win_left_end5};
- my $win_left_end3 = $data_struct->{win_left_end3};
- my $win_right_end5 = $data_struct->{win_right_end5};
- my $win_right_end3 = $data_struct->{win_right_end3};
- my $Q = $data_struct->{query_alignment};
- my $A = $data_struct->{parent_A_alignment};
- my $B = $data_struct->{parent_B_alignment};
- my $BS_A = $data_struct->{BS_A};
- my $BS_B = $data_struct->{BS_B};
-
- my @Q_chars = @{$Q->{align}};
- my @A_chars = @{$A->{align}};
- my @B_chars = @{$B->{align}};
-
- my $query_acc = $Q->{acc};
- my $A_acc = $A->{acc};
- my $B_acc = $B->{acc};
-
- my $break_left = $Q->{seqPos}->[$win_left_end3];
- my $break_right = $Q->{seqPos}->[$win_right_end5];
-
-
- cout << "//\n## CHIMERA\t" << querySeqs[i]->getName() << "\t" << $break_left-$break_right" << endl
- << "\tDIV_QLARB: ". sprintf("%.3f", $div_ratio_QLA_QRB)
- << "\tBS_QLARB: " . sprintf("%.2f", $BS_A)
- << "\tDIV_QRALB: " . sprintf("%.3f", $div_ratio_QRA_QLB)
- << "\tBS_QRALB: " . sprintf("%.2f", $BS_B)
- << "\t$A_acc\t$B_acc"
- << "\tbreakpoint: $break_left-$break_right\n\n";
-
- ## draw illustration:
-
- print " Per_id parents: " . sprintf("%.2f", $per_id_AB) . "\n\n";
- print " Per_id(Q,A): " . sprintf("%.2f", $per_id_QA) . "\n";
- print "--------------------------------------------------- A: $A_acc\n"
- . " " . sprintf("%.2f", $per_id_QLA) . " " . sprintf("%.2f", $per_id_QRA) . "\n"
- . "~~~~~~~~~~~~~~~~~~~~~~~~\\ /~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n"
- . "DivR: " . sprintf("%.3f", $div_ratio_QLA_QRB) . " BS: " . sprintf("%.2f", $BS_A) . " |\n"
- . "Per_id(QLA,QRB): " . sprintf("%.2f", $per_id_QLA_QRB) . " |\n"
- . " |\n"
- . " (L-AB: " . sprintf("%.2f", $per_id_LAB) . ") | (R-AB: " . sprintf("%.2f", $per_id_RAB) . ")\n"
- . " WinL:$win_left_end5-$win_left_end3 | WinR:$win_right_end5-$win_right_end3\n"
- . " |\n"
- . "Per_id(QLB,QRA): " . sprintf("%.2f", $per_id_QLB_QRA) . " |\n"
- . "DivR: " . sprintf("%.3f", $div_ratio_QRA_QLB) . " BS: " . sprintf("%.2f", $BS_B) . " |\n"
- . "~~~~~~~~~~~~~~~~~~~~~~~~/ \\~~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n"
- . " " . sprintf("%.2f", $per_id_QLB) . " " . sprintf("%.2f", $per_id_QRB) . "\n"
- . "---------------------------------------------------- B: $B_acc\n";
- print " Per_id(Q,B): ". sprintf("%.2f", $per_id_QB) . "\n\n";
-
- my $deltaL = $per_id_QLA - $per_id_QLB;
- my $deltaR = $per_id_QRA - $per_id_QRB;
-
- print "DeltaL: " . sprintf("%.2f", $deltaL) . " DeltaR: " . sprintf("%.2f", $deltaR) . "\n\n";
-
- unless ($printAlignmentsFlag) { return; }
-
-
- ## build the left windows:
- my @Q_left_win = @Q_chars[$win_left_end5..$win_left_end3];
- my @A_left_win = @A_chars[$win_left_end5..$win_left_end3];
- my @B_left_win = @B_chars[$win_left_end5..$win_left_end3];
-
- &print_alignment($A_acc, \@A_left_win,
- $query_acc, \@Q_left_win,
- $B_acc, \@B_left_win);
-
- print "\t\t** Breakpoint **\n\n";
-
- my @Q_right_win = @Q_chars[$win_right_end5..$win_right_end3];
- my @A_right_win = @A_chars[$win_right_end5..$win_right_end3];
- my @B_right_win = @B_chars[$win_right_end5..$win_right_end3];
-
- &print_alignment($A_acc, \@A_right_win,
- $query_acc, \@Q_right_win,
- $B_acc, \@B_right_win);
-
- return;
-}
-
-
-####
-
- */
}
catch(exception& e) {
chimeraResults.resize(numSeqs);
chimeraFlags.resize(numSeqs, "no");
+ spotMap.resize(numSeqs);
//break up file if needed
int linesPerProcess = numSeqs / processors ;
if (seqMask != "") { decalc = new DeCalculator(); } //to use below
+ //initialize spotMap
+ for (int j = 0; j < numSeqs; j++) {
+ for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
+ spotMap[j][i] = i;
+ }
+ }
+
//referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
- slayer = new Slayer(window, increment, minSim, divR);
+ slayer = new Slayer(window, increment, minSim, divR, iters);
for (int i = 0; i < querySeqs.size(); i++) {
string chimeraFlag = maligner->getResults(querySeqs[i]);
- float percentIdentical = maligner->getPercentID();
+ //float percentIdentical = maligner->getPercentID();
vector<results> Results = maligner->getOutput();
- //cout << querySeqs[i]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl;
+ cout << "Processing sequence: " << i+1 << endl;
- for (int j = 0; j < Results.size(); j++) {
+ //for (int j = 0; j < Results.size(); j++) {
//cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
- }
+ //}
- if (chimeraFlag == "yes") {
+ //if (chimeraFlag == "yes") {
//get sequence that were given from maligner results
vector<SeqDist> seqs;
+ map<string, float> removeDups;
+ map<string, float>::iterator itDup;
for (int j = 0; j < Results.size(); j++) {
- Sequence* seq = getSequence(Results[j].parent); //makes copy so you can filter and mask and not effect template
-
- //seq = NULL if error occurred in getSequence
- if (seq == NULL) { break; }
- else {
- SeqDist member;
- member.seq = seq;
- member.dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
- seqs.push_back(member);
+ float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
+ //only add if you are not a duplicate
+ itDup = removeDups.find(Results[j].parent);
+ if (itDup == removeDups.end()) { //this is not duplicate
+ removeDups[Results[j].parent] = dist;
+ }else if (dist > itDup->second) { //is this a stronger number for this parent
+ removeDups[Results[j].parent] = dist;
}
}
-
+
+ for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
+ Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
+
+ SeqDist member;
+ member.seq = seq;
+ member.dist = itDup->second;
+
+ seqs.push_back(member);
+ }
+
//limit number of parents to explore - default 5
if (Results.size() > parents) {
//sort by distance
//put seqs into vector to send to slayer
vector<Sequence*> seqsForSlayer;
for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); }
+//ofstream out;
+//string name = querySeqs[i]->getName();
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(0, name.find_last_of("|"));
+//cout << name << endl;
+//string filename = name + ".seqsforslayer";
+//openOutputFile(filename, out);
+//for (int u = 0; u < seqsForSlayer.size(); u++) { seqsForSlayer[u]->printSequence(out); }
+//out.close();
+//filename = name + ".fasta";
+//openOutputFile(filename, out);
+//q->printSequence(out);
+//out.close();
+
//mask then send to slayer...
if (seqMask != "") {
decalc->runMask(seqsForSlayer[k]);
}
+ for (int i = 0; i < numSeqs; i++) {
+ spotMap[i] = decalc->getMaskMap();
+ }
+
}
//send to slayer
//free memory
for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; }
- }
+ //}
}
//free memory
}
}
//***************************************************************************************************************
+void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
+ try {
+
+ out << "parentA: " << data.parentA.getName() << " parentB: " << data.parentB.getName() << endl;
+ out << "Left Window: " << spotMap[i][data.winLStart] << " " << spotMap[i][data.winLEnd] << endl;
+ out << "Right Window: " << spotMap[i][data.winRStart] << " " << spotMap[i][data.winREnd] << endl;
+
+ out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
+ out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
+
+ out << "Similarity of parents: " << data.ab << endl;
+ out << "Similarity of query to parentA: " << data.qa << endl;
+ out << "Similarity of query to parentB: " << data.qb << endl;
+
+ //out << "Per_id(QL,A): " << data.qla << endl;
+ //out << "Per_id(QL,B): " << data.qlb << endl;
+ //out << "Per_id(QR,A): " << data.qra << endl;
+ //out << "Per_id(QR,B): " << data.qrb << endl;
+
+ out << "DeltaL: " << (data.qla - data.qlb) << endl;
+ out << "DeltaR: " << (data.qra - data.qrb) << endl;
+ }
+ catch(exception& e) {
+ errorOut(e, "ChimeraSlayer", "printBlock");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
vector<linePair*> lines;
vector<Sequence*> querySeqs;
vector<Sequence*> templateSeqs;
+ vector< map<int, int> > spotMap;
vector< vector<data_struct> > chimeraResults;
vector<string> chimeraFlags;
string fastafile, templateFile;
Sequence* getSequence(string); //find sequence from name
-
-
+ void printBlock(data_struct, ostream&, int i);
};
/************************************************************************/
#include "systemcommand.h"
#include "secondarystructurecommand.h"
#include "getsharedotucommand.h"
+#include "getlistcountcommand.h"
/***********************************************************/
commands["system"] = "system";
commands["align.check"] = "align.check";
commands["get.sharedotu"] = "get.sharedotu";
+ commands["get.listcount"] = "get.listcount";
commands["quit"] = "quit";
}
else if(commandName == "system") { command = new SystemCommand(optionString); }
else if(commandName == "align.check") { command = new AlignCheckCommand(optionString); }
else if(commandName == "get.sharedotu") { command = new GetSharedOTUCommand(optionString); }
+ else if(commandName == "get.listcount") { command = new GetListCountCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
--- /dev/null
+/*
+ * getlistcountcommand.cpp
+ * Mothur
+ *
+ * Created by westcott on 10/12/09.
+ * Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "getlistcountcommand.h"
+
+//**********************************************************************************************************************
+GetListCountCommand::GetListCountCommand(string option){
+ try {
+ globaldata = GlobalData::getInstance();
+ abort = false;
+ allLines = 1;
+ labels.clear();
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; }
+
+ else {
+ //valid paramters for this command
+ string AlignArray[] = {"list","label"};
+ vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+
+ OptionParser parser(option);
+ map<string, string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+
+ //check to make sure all parameters are valid for command
+ for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ string ranRead = globaldata->getListFile();
+
+ //check for required parameters
+ listfile = validParameter.validFile(parameters, "list", true);
+ if ((listfile == "not found") && (globaldata->getListFile() == "")) { mothurOut("You must read a listfile before running the get.listcount command."); mothurOutEndLine(); abort = true; }
+ else if ((listfile == "not found") && (globaldata->getListFile() != "")) { listfile = globaldata->getListFile(); }
+ else if (listfile == "not open") { abort = true; }
+ else { globaldata->setListFile(listfile); }
+
+ //check for optional parameter and set defaults
+ // ...at some point should added some additional type checking...
+
+ label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; }
+ else {
+ if(label != "all") { splitAtDash(label, labels); allLines = 0; }
+ else { allLines = 1; }
+ }
+
+ //if the user has not specified any labels use the ones from read.otu
+ if ((label == "") && (ranRead != "")) {
+ allLines = globaldata->allLines;
+ labels = globaldata->labels;
+ }
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "GetListCountCommand", "GetListCountCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+void GetListCountCommand::help(){
+ try {
+ mothurOut("The get.listcount command can only be executed after a successful read.otu command of a listfile or providing a list file using the list parameter.\n");
+ mothurOut("The get.listcount command parameters are list and label. No parameters are required.\n");
+ mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and are separated by dashes.\n");
+ mothurOut("The get.listcount command should be in the following format: get.listcount(list=yourlistFile, label=yourLabels).\n");
+ mothurOut("Example get.listcount(list=amazon.fn.list, label=0.10).\n");
+ mothurOut("The default value for label is all lines in your inputfile.\n");
+ mothurOut("The get.listcount command outputs a .otu file for each distance you specify listing the bin number and the names of the sequences in that bin.\n");
+ mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
+ }
+ catch(exception& e) {
+ errorOut(e, "GetListCountCommand", "help");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+GetListCountCommand::~GetListCountCommand(){}
+
+//**********************************************************************************************************************
+
+int GetListCountCommand::execute(){
+ try {
+ if (abort == true) { return 0; }
+
+ globaldata->setFormat("list");
+
+ //read list file
+ read = new ReadOTUFile(listfile);
+ read->read(&*globaldata);
+
+ input = globaldata->ginput;
+ list = globaldata->gListVector;
+ string lastLabel = list->getLabel();
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+
+ if(allLines == 1 || labels.count(list->getLabel()) == 1){
+
+ process(list);
+
+ processedLabels.insert(list->getLabel());
+ userLabels.erase(list->getLabel());
+ }
+
+ if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ delete list;
+ list = input->getListVector(lastLabel);
+
+ process(list);
+
+ processedLabels.insert(list->getLabel());
+ userLabels.erase(list->getLabel());
+ }
+
+ lastLabel = list->getLabel();
+
+ delete list;
+ list = input->getListVector();
+ }
+
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
+ needToRun = true;
+ }else {
+ mothurOut(". Please refer to " + lastLabel + "."); mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ if (list != NULL) { delete list; }
+ list = input->getListVector(lastLabel);
+
+ process(list);
+ delete list;
+ }
+
+ delete read;
+ globaldata->gListVector = NULL;
+
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "GetListCountCommand", "execute");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+//return 1 if error, 0 otherwise
+void GetListCountCommand::process(ListVector* list) {
+ try {
+ string binnames, name, sequence;
+ string outputFileName = getRootName(listfile) + list->getLabel() + ".otu";
+ openOutputFile(outputFileName, out);
+
+ mothurOut(list->getLabel()); mothurOutEndLine();
+
+ //for each bin in the list vector
+ for (int i = 0; i < list->getNumBins(); i++) {
+ binnames = list->get(i);
+ out << i+1 << '\t' << binnames << endl;
+ }
+
+ out.close();
+ }
+ catch(exception& e) {
+ errorOut(e, "GetListCountCommand", "process");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
--- /dev/null
+#ifndef GETLISTCOUNTCOMMAND_H
+#define GETLISTCOUNTCOMMAND_H
+/*
+ * getlistcountcommand.h
+ * Mothur
+ *
+ * Created by westcott on 10/12/09.
+ * Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "listvector.hpp"
+#include "readotu.h"
+
+class GlobalData;
+
+/**********************************************************/
+
+class GetListCountCommand : public Command {
+
+public:
+ GetListCountCommand(string);
+ ~GetListCountCommand();
+ int execute();
+ void help();
+
+private:
+ GlobalData* globaldata;
+ ListVector* list;
+ ReadOTUFile* read;
+ InputData* input;
+
+ bool abort, allLines;
+ set<string> labels; //holds labels to be used
+ string label, listfile;
+ ofstream out;
+
+ void process(ListVector*);
+};
+/**********************************************************/
+
+#endif
delete validCommands;
- mothurOutEndLine(); mothurOut("For further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki, or contact Pat Schloss at pschloss@umich.edu.\n");
+ mothurOutEndLine(); mothurOut("For further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki, or contact Pat Schloss at mothur.bugs@gmail.com.\n");
return 0;
}
string Maligner::getResults(Sequence* q) {
try {
+ outputResults.clear();
+
//make copy so trimming doesn't destroy query from calling class - remember to deallocate
query = new Sequence(q->getName(), q->getAligned());
//find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
refSeqs = decalc->findClosest(query, db, numWanted);
-
+
refSeqs = minCoverageFilter(refSeqs);
if (refSeqs.size() < 2) {
mothurOut("Error: ");
mothurOut(toString(e.what()));
- mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at pschloss@umich.edu, and be sure to include the mothur.logFile with your inquiry.");
+ mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
mothurOutEndLine();
}
candidateSeq->setAligned(candAln);
return;
}
-
+
int fullAlignIndex = 0;
int pairwiseAlignIndex = 0;
string newTemplateAlign = ""; // this is going to be messy so we want a temporary template
newTemplateAlign += tempAln[i];//
}
- int start, end;
+ int start = 0;
+ int end = candAln.length()-1;
+
for(int i=0;i<candAln.length();i++){
if(candAln[i] == 'Z' || !isalnum(candAln[i])) { candAln[i] = '.'; } // if we padded the alignemnt from
else{ start = i; break; } // blast with Z's, change them to
candAln[i] = toupper(candAln[i]); // everything is upper case
}
-
+
if(candAln.length() != tempAln.length()){ // if the regapped candidate sequence is longer than the official
removeExtraGaps(candAln, tempAln, newTemplateAlign);// template alignment then we need to do steps C-F in Fig.
} // 2 of Desantis et al.
-
+
candidateSeq->setAligned(candAln);
}
catch(exception& e) {
#include "dist.h"
#include "decalc.h"
+/***********************************************************/
//This class was created using the algorythms described in the
// "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper
//by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
#include "slayer.h"
/***********************************************************************/
-Slayer::Slayer(int win, int increment, int parentThreshold, float div) :
- windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div) {}
+Slayer::Slayer(int win, int increment, int parentThreshold, float div, int i) :
+ windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div), iters(i){}
/***********************************************************************/
string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
try {
-cout << "refSeqs = " << refSeqs.size() << endl;
vector<data_struct> all; all.clear();
for (int i = 0; i < refSeqs.size(); i++) {
float snpRateLeft = numSNPSLeft / (float) winSizeLeft;
float snpRateRight = numSNPSRight / (float) winSizeRight;
-
float logR = log(snpRateLeft / snpRateRight) / log(2);
// do not accept excess snp ratio on either side of the break
float BS_A, BS_B;
bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B);
-
+
divs[k].bsa = BS_A;
divs[k].bsb = BS_B;
try{
vector<data_struct> data;
-cout << q->getName() << '\t' << q->getAligned().length() << endl;
+
//vertical filter
vector<Sequence*> temp;
temp.push_back(q); temp.push_back(pA); temp.push_back(pB);
- verticalFilter(temp);
+ map<int, int> spots = verticalFilter(temp);
//get these to avoid numerous function calls
string query = q->getAligned();
string parentA = pA->getAligned();
string parentB = pB->getAligned();
int length = query.length();
-cout << q->getName() << '\t' << length << endl;
//check window size
if (length < (2*windowSize+windowStep)) {
member.rab = RAB;
member.qra = QRA;
member.qlb = QLB;
- member.winLStart = 0;
- member.winLEnd = breakpoint;
- member.winRStart = breakpoint+1;
- member.winREnd = length-1;
+ member.winLStart = spots[0];
+ member.winLEnd = spots[breakpoint]; //so breakpoint reflects spot in alignment before filter
+ member.winRStart = spots[breakpoint+1];
+ member.winREnd = spots[length-1];
member.querySeq = *(q);
member.parentA = *(pA);
member.parentB = *(pB);
int numLeft = max(1, int(left.size()/10 +0.5));
int numRight = max(1, int(right.size()/10 + 0.5));
- for (int i = 0; i < 100; i++) {
+ for (int i = 0; i < iters; i++) {
//random sampling with replacement.
vector<snps> selectedLeft;
+
for (int j = 0; j < numLeft; j++) {
int index = int(rand() % left.size());
selectedLeft.push_back(left[index]);
}
-
+
vector<snps> selectedRight;
for (int j = 0; j < numRight; j++) {
int index = int(rand() % right.size());
float QLB = snpQB(selectedLeft);
float QRB = snpQB(selectedRight);
-
+
//in original - not used - not sure why?
//float ALB = snpAB(selectedLeft);
//float ARB = snpAB(selectedRight);
}
- BSA = (float) count_A;
- BSB = (float) count_B;
+ BSA = ((float) count_A / (float) iters) * 100;
+ BSB = ((float) count_B / (float) iters) * 100;
}
catch(exception& e) {
}
}
- float percentID = (numIdentical / data.size()) * 100;
+ float percentID = (numIdentical / (float) data.size()) * 100;
return percentID;
}
}
}
- float percentID = (numIdentical / data.size()) * 100;
+ float percentID = (numIdentical / (float) data.size()) * 100;
return percentID;
}
}
- float percentID = (numIdentical / data.size()) * 100;
+ float percentID = (numIdentical / (float) data.size()) * 100;
return percentID;
}
/***********************************************************************/
//remove columns that contain any gaps
-void Slayer::verticalFilter(vector<Sequence*> seqs) {
+map<int, int> Slayer::verticalFilter(vector<Sequence*> seqs) {
try {
vector<int> gaps; gaps.resize(seqs[0]->getAligned().length(), 0);
//zero out spot where all sequences have blanks
int numColRemoved = 0;
+ int count = 0;
+ map<int, int> maskMap; maskMap.clear();
for(int i = 0; i < seqs[0]->getAligned().length(); i++){
if(gaps[i] != 0) { filterString[i] = '0'; numColRemoved++; }
+ else {
+ maskMap[count] = i;
+ count++;
+ }
}
-
+
//for each sequence
for (int i = 0; i < seqs.size(); i++) {
seqs[i]->setAligned(newAligned);
}
+
+ return maskMap;
}
catch(exception& e) {
errorOut(e, "Slayer", "verticalFilter");
public:
- Slayer(int, int, int, float);
+ Slayer(int, int, int, float, int);
~Slayer() {};
string getResults(Sequence*, vector<Sequence*>);
private:
- int windowSize, windowStep, parentFragmentThreshold;
+ int windowSize, windowStep, parentFragmentThreshold, iters;
float divRThreshold;
vector<data_struct> outputResults;
- void verticalFilter(vector<Sequence*>);
+ map<int, int> verticalFilter(vector<Sequence*>);
float computePercentID(string, string, int, int);
vector<data_struct> runBellerophon(Sequence*, Sequence*, Sequence*);