templateDB = new KmerDB(templateFileName, kmerSize);
}
- if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, 3000); }
- else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 3000); }
+ int longestBase = templateDB->getLongestBase();
+
+ if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); }
+ else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); }
else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); }
else if(align == "noalign") { alignment = new NoAlign(); }
else {
mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
mothurOutEndLine();
- alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 3000);
+ alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
}
string alignFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "align";
inFASTA.close();
lines.push_back(new linePair(0, numFastaSeqs));
-
+
driver(lines[0], alignFileName, reportFileName);
}
ifstream inFASTA;
openInputFile(candidateFileName, inFASTA);
+
inFASTA.seekg(line->start);
-
+
for(int i=0;i<line->numSeqs;i++){
Sequence* candidateSeq = new Sequence(inFASTA);
report.setCandidate(candidateSeq);
-
+
Sequence temp = templateDB->findClosestSequence(candidateSeq);
Sequence* templateSeq = &temp;
report.setSearchParameters(search, templateDB->getSearchScore());
Nast nast(alignment, candidateSeq, templateSeq);
+
report.setAlignmentParameters(align, alignment);
+
report.setNastParameters(nast);
alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
/**************************************************************************************************/
Alignment::Alignment(int A) : nCols(A), nRows(A) {
-
- alignment.resize(nRows); // For the Gotoh and Needleman-Wunsch we initialize the dynamic programming
- for(int i=0;i<nRows;i++){ // matrix by initializing a matrix that is A x A. By default we will set A
- alignment[i].resize(nCols); // at 2000 for 16S rRNA gene sequences
- }
-
+ try {
+ alignment.resize(nRows); // For the Gotoh and Needleman-Wunsch we initialize the dynamic programming
+ for(int i=0;i<nRows;i++){ // matrix by initializing a matrix that is A x A. By default we will set A
+ alignment[i].resize(nCols); // at 2000 for 16S rRNA gene sequences
+ }
+ }
+ catch(exception& e) {
+ errorOut(e, "Alignment", "Alignment");
+ exit(1);
+ }
}
/**************************************************************************************************/
void Alignment::traceBack(){ // This traceback routine is used by the dynamic programming algorithms
- // to fill the values of seqAaln and seqBaln
- seqAaln = "";
- seqBaln = "";
- int row = lB-1;
- int column = lA-1;
-// seqAstart = 1;
-// seqAend = column;
-
- AlignmentCell currentCell = alignment[row][column]; // Start the traceback from the bottom-right corner of the
- // matrix
-
- if(currentCell.prevCell == 'x'){ seqAaln = seqBaln = "NOALIGNMENT"; }//If there's an 'x' in the bottom-
- else{ // right corner bail out because it means nothing got aligned
- while(currentCell.prevCell != 'x'){ // while the previous cell isn't an 'x', keep going...
-
- if(currentCell.prevCell == 'u'){ // if the pointer to the previous cell is 'u', go up in the
- seqAaln = '-' + seqAaln; // matrix. this indicates that we need to insert a gap in
- seqBaln = seqB[row] + seqBaln; // seqA and a base in seqB
- currentCell = alignment[--row][column];
- }
- else if(currentCell.prevCell == 'l'){ // if the pointer to the previous cell is 'l', go to the left
- seqBaln = '-' + seqBaln; // in the matrix. this indicates that we need to insert a gap
- seqAaln = seqA[column] + seqAaln; // in seqB and a base in seqA
- currentCell = alignment[row][--column];
- }
- else{
- seqAaln = seqA[column] + seqAaln; // otherwise we need to go diagonally up and to the left,
- seqBaln = seqB[row] + seqBaln; // here we add a base to both alignments
- currentCell = alignment[--row][--column];
+ try {
+ // to fill the values of seqAaln and seqBaln
+ seqAaln = "";
+ seqBaln = "";
+ int row = lB-1;
+ int column = lA-1;
+ // seqAstart = 1;
+ // seqAend = column;
+
+ AlignmentCell currentCell = alignment[row][column]; // Start the traceback from the bottom-right corner of the
+ // matrix
+
+ if(currentCell.prevCell == 'x'){ seqAaln = seqBaln = "NOALIGNMENT"; }//If there's an 'x' in the bottom-
+ else{ // right corner bail out because it means nothing got aligned
+ while(currentCell.prevCell != 'x'){ // while the previous cell isn't an 'x', keep going...
+
+ if(currentCell.prevCell == 'u'){ // if the pointer to the previous cell is 'u', go up in the
+ seqAaln = '-' + seqAaln; // matrix. this indicates that we need to insert a gap in
+ seqBaln = seqB[row] + seqBaln; // seqA and a base in seqB
+ currentCell = alignment[--row][column];
+ }
+ else if(currentCell.prevCell == 'l'){ // if the pointer to the previous cell is 'l', go to the left
+ seqBaln = '-' + seqBaln; // in the matrix. this indicates that we need to insert a gap
+ seqAaln = seqA[column] + seqAaln; // in seqB and a base in seqA
+ currentCell = alignment[row][--column];
+ }
+ else{
+ seqAaln = seqA[column] + seqAaln; // otherwise we need to go diagonally up and to the left,
+ seqBaln = seqB[row] + seqBaln; // here we add a base to both alignments
+ currentCell = alignment[--row][--column];
+ }
}
}
+
+ pairwiseLength = seqAaln.length();
+ seqAstart = 1; seqAend = 0;
+ seqBstart = 1; seqBend = 0;
+
+ for(int i=0;i<seqAaln.length();i++){
+ if(seqAaln[i] != '-' && seqBaln[i] == '-') { seqAstart++; }
+ else if(seqAaln[i] == '-' && seqBaln[i] != '-') { seqBstart++; }
+ else { break; }
+ }
+
+ pairwiseLength -= (seqAstart + seqBstart - 2);
+
+ for(int i=seqAaln.length()-1; i>=0;i--){
+ if(seqAaln[i] != '-' && seqBaln[i] == '-') { seqAend++; }
+ else if(seqAaln[i] == '-' && seqBaln[i] != '-') { seqBend++; }
+ else { break; }
+ }
+ pairwiseLength -= (seqAend + seqBend);
+
+ seqAend = seqA.length() - seqAend - 1;
+ seqBend = seqB.length() - seqBend - 1;
}
-
- pairwiseLength = seqAaln.length();
- seqAstart = 1; seqAend = 0;
- seqBstart = 1; seqBend = 0;
-
- for(int i=0;i<seqAaln.length();i++){
- if(seqAaln[i] != '-' && seqBaln[i] == '-') { seqAstart++; }
- else if(seqAaln[i] == '-' && seqBaln[i] != '-') { seqBstart++; }
- else { break; }
- }
-
- pairwiseLength -= (seqAstart + seqBstart - 2);
-
- for(int i=seqAaln.length()-1; i>=0;i--){
- if(seqAaln[i] != '-' && seqBaln[i] == '-') { seqAend++; }
- else if(seqAaln[i] == '-' && seqBaln[i] != '-') { seqBend++; }
- else { break; }
+ catch(exception& e) {
+ errorOut(e, "Alignment", "traceBack");
+ exit(1);
}
- pairwiseLength -= (seqAend + seqBend);
-
- seqAend = seqA.length() - seqAend - 1;
- seqBend = seqB.length() - seqBend - 1;
-
}
/**************************************************************************************************/
virtual ~Alignment();
virtual void align(string, string) = 0;
+
// float getAlignmentScore();
string getSeqAAln();
string getSeqBAln();
int getCandidateEndPos();
int getTemplateStartPos();
int getTemplateEndPos();
-
+
int getPairwiseLength();
// int getLongestTemplateGap();
void getChimeras();
void print(ostream&);
+ void setCons(string){};
+
private:
Dist* distCalculator;
BlastAlignment(float, float, float, float);
~BlastAlignment();
void align(string, string);
+ void setMatrix(int){};
+
private:
string candidateFileName;
double singles = (double)rank->get(1);
double doubles = (double)rank->get(2);
double chaovar = 0.0000;
-
+//cout << "singles = " << singles << " doubles = " << doubles << " sobs = " << sobs << endl;
double chao = sobs + singles*(singles-1)/(2*(doubles+1));
if(singles > 0 && doubles > 0){
Chimera(){};
Chimera(string);
+ Chimera(string, string);
virtual ~Chimera(){};
virtual void setFilter(bool f) { filter = f; }
virtual void setCorrection(bool c) { correction = c; }
virtual void setProcessors(int p) { processors = p; }
virtual void setWindow(int w) { window = w; }
virtual void setIncrement(int i) { increment = i; }
- virtual void setTemplate(string t) { templateFile = t; }
+
+ virtual void setCons(string) {};
+
//pure functions
virtual void getChimeras() = 0;
bool filter, correction;
int processors, window, increment;
- string templateFile;
-
+
};
else {
//valid paramters for this command
- string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template" };
+ string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation" };
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
if (templatefile == "not open") { abort = true; }
else if (templatefile == "not found") { templatefile = ""; }
+ consfile = validParameter.validFile(parameters, "conservation", true);
+ if (consfile == "not open") { abort = true; }
+ else if (consfile == "not found") { consfile = ""; }
+
string temp;
temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "T"; }
temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
convert(temp, window);
- temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "10"; }
+ temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
convert(temp, increment);
method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "pintail"; }
- if ((method == "pintail") && (templatefile == "")) { mothurOut("You must provide a template file with the pintail method."); mothurOutEndLine(); abort = true; }
+ if ((method == "pintail") && (templatefile == "") && (consfile == "")) { mothurOut("You must provide a template or conservation file with the pintail method."); mothurOutEndLine(); abort = true; }
}
if (abort == true) { return 0; }
- if (method == "bellerophon") { chimera = new Bellerophon(fastafile); }
- else if (method == "pintail") { chimera = new Pintail(fastafile); }
- else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; }
+ if (method == "bellerophon") { chimera = new Bellerophon(fastafile); }
+ else if (method == "pintail") { chimera = new Pintail(fastafile, templatefile);
+ if (consfile != "") { chimera->setCons(consfile); }
+ else { chimera->setCons(""); }
+ }else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0; }
//set user options
chimera->setFilter(filter);
chimera->setProcessors(processors);
chimera->setWindow(window);
chimera->setIncrement(increment);
- chimera->setTemplate(templatefile);
-
+
//find chimeras
chimera->getChimeras();
exit(1);
}
}
-
/**************************************************************************************************/
private:
bool abort;
- string method, fastafile, templatefile;
+ string method, fastafile, templatefile, consfile;
bool filter, correction;
int processors, midpoint, averageLeft, averageRight, window, iters, increment;
Chimera* chimera;
Database::Database(string fastaFileName){ // This assumes that the template database is in fasta format, may
// need to alter this in the future?
-
+ longest = 0;
+
ifstream fastaFile;
openInputFile(fastaFileName, fastaFile);
}
}
templateSequences[i] = Sequence(seqName, aligned);
+
+ //necessary because Sequence constructor by default sets whatever it reads to unaligned
+ //the setUnaligned function remove gap char.
+ templateSequences[i].setUnaligned(templateSequences[i].getUnaligned());
+
+ if (templateSequences[i].getUnaligned().length() > longest) { longest = templateSequences[i].getUnaligned().length(); }
+
fastaFile.putback(letter);
}
float Database::getSearchScore() { return searchScore; } // we're assuming that the search is already done
+
/**************************************************************************************************/
+
+int Database::getLongestBase() { return longest+1; }
+
+/**************************************************************************************************/
+
virtual ~Database();
virtual Sequence findClosestSequence(Sequence*) = 0;
virtual float getSearchScore();
+ virtual int getLongestBase();
+
protected:
- int numSeqs;
+ int numSeqs, longest;
float searchScore;
vector<Sequence> templateSequences;
};
string ScriptEngine::getNextCommand(string& commandString) {
try {
string nextcommand = "";
+ int count = 0;
- nextcommand = commandString.substr(0,commandString.find_first_of(';'));
-
+ //go through string until you reach ; or end
+ while (count < commandString.length()) {
+
+ if (commandString[count] == ';') { break; }
+ else { nextcommand += commandString[count]; }
+
+ count++;
+ }
+
+ //if you are not at the end
+ if (count != commandString.length()) { commandString = commandString.substr(count+1, commandString.length()); }
+ else { commandString = ""; }
- if ((commandString.find_first_of(';')+1) <= commandString.length()) {
- commandString = commandString.substr(commandString.find_first_of(';')+1, commandString.length());
- }else { commandString = ""; } //you have reached the last command.
//get rid of spaces in between commands if any
if (commandString.length() > 0) {
GotohOverlap::GotohOverlap(float gO, float gE, float m, float mm, int r) :
gapOpen(gO), gapExtend(gE), match(m), mismatch(mm), Alignment(r) {
- for(int i=1;i<nCols;i++){ // we initialize the dynamic programming matrix by setting the pointers in
- alignment[0][i].prevCell = 'l'; // the first row to the left
- alignment[0][i].cValue = 0;
- alignment[0][i].dValue = 0;
+ try {
+ for(int i=1;i<nCols;i++){ // we initialize the dynamic programming matrix by setting the pointers in
+ alignment[0][i].prevCell = 'l'; // the first row to the left
+ alignment[0][i].cValue = 0;
+ alignment[0][i].dValue = 0;
+ }
+
+ for(int i=1;i<nRows;i++){ // we initialize the dynamic programming matrix by setting the pointers in
+ alignment[i][0].prevCell = 'u'; // the first column upward
+ alignment[i][0].cValue = 0;
+ alignment[i][0].iValue = 0;
+ }
+
}
-
- for(int i=1;i<nRows;i++){ // we initialize the dynamic programming matrix by setting the pointers in
- alignment[i][0].prevCell = 'u'; // the first column upward
- alignment[i][0].cValue = 0;
- alignment[i][0].iValue = 0;
+ catch(exception& e) {
+ errorOut(e, "GotohOverlap", "GotohOverlap");
+ exit(1);
}
}
/**************************************************************************************************/
void GotohOverlap::align(string A, string B){
-
- seqA = ' ' + A; lA = seqA.length(); // the algorithm requires that the first character be a dummy value
- seqB = ' ' + B; lB = seqB.length(); // the algorithm requires that the first character be a dummy value
-
- for(int i=1;i<lB;i++){ // the recursion here is shown in Webb and Miller, Fig. 1A. Note that
- for(int j=1;j<lA;j++){ // if we need to conserve on space we should see Fig. 1B, which is linear
- // in space, which I think is unnecessary
- float diagonal;
- if(seqB[i] == seqA[j]) { diagonal = alignment[i-1][j-1].cValue + match; }
- else { diagonal = alignment[i-1][j-1].cValue + mismatch; }
-
- alignment[i][j].iValue = max(alignment[i][j-1].iValue, alignment[i][j-1].cValue + gapOpen) + gapExtend;
- alignment[i][j].dValue = max(alignment[i-1][j].dValue, alignment[i-1][j].cValue + gapOpen) + gapExtend;
-
- if(alignment[i][j].iValue > alignment[i][j].dValue){
- if(alignment[i][j].iValue > diagonal){
- alignment[i][j].cValue = alignment[i][j].iValue;
- alignment[i][j].prevCell = 'l';
+ try {
+ seqA = ' ' + A; lA = seqA.length(); // the algorithm requires that the first character be a dummy value
+ seqB = ' ' + B; lB = seqB.length(); // the algorithm requires that the first character be a dummy value
+
+ for(int i=1;i<lB;i++){ // the recursion here is shown in Webb and Miller, Fig. 1A. Note that
+ for(int j=1;j<lA;j++){ // if we need to conserve on space we should see Fig. 1B, which is linear
+ // in space, which I think is unnecessary
+ float diagonal;
+ if(seqB[i] == seqA[j]) { diagonal = alignment[i-1][j-1].cValue + match; }
+ else { diagonal = alignment[i-1][j-1].cValue + mismatch; }
+
+ alignment[i][j].iValue = max(alignment[i][j-1].iValue, alignment[i][j-1].cValue + gapOpen) + gapExtend;
+ alignment[i][j].dValue = max(alignment[i-1][j].dValue, alignment[i-1][j].cValue + gapOpen) + gapExtend;
+
+ if(alignment[i][j].iValue > alignment[i][j].dValue){
+ if(alignment[i][j].iValue > diagonal){
+ alignment[i][j].cValue = alignment[i][j].iValue;
+ alignment[i][j].prevCell = 'l';
+ }
+ else{
+ alignment[i][j].cValue = diagonal;
+ alignment[i][j].prevCell = 'd';
+ }
}
else{
- alignment[i][j].cValue = diagonal;
- alignment[i][j].prevCell = 'd';
+ if(alignment[i][j].dValue > diagonal){
+ alignment[i][j].cValue = alignment[i][j].dValue;
+ alignment[i][j].prevCell = 'u';
+ }
+ else{
+ alignment[i][j].cValue = diagonal;
+ alignment[i][j].prevCell = 'd';
+ }
}
+
}
- else{
- if(alignment[i][j].dValue > diagonal){
- alignment[i][j].cValue = alignment[i][j].dValue;
- alignment[i][j].prevCell = 'u';
- }
- else{
- alignment[i][j].cValue = diagonal;
- alignment[i][j].prevCell = 'd';
- }
- }
-
}
+ Overlap over;
+ over.setOverlap(alignment, lA, lB, 0); // Fix the gaps at the ends of the sequences
+ traceBack(); // Construct the alignment and set seqAaln and seqBaln
+
+ }
+ catch(exception& e) {
+ errorOut(e, "GotohOverlap", "align");
+ exit(1);
}
- Overlap over;
- over.setOverlap(alignment, lA, lB, 0); // Fix the gaps at the ends of the sequences
- traceBack(); // Construct the alignment and set seqAaln and seqBaln
}
/**************************************************************************************************/
public:
GotohOverlap(float, float, float, float, int);
void align(string, string);
+
~GotohOverlap() {}
private:
/**************************************************************************************************/
KmerDB::KmerDB(string fastaFileName, int kSize) : Database(fastaFileName), kmerSize(kSize) {
-
- string kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
- ifstream kmerFileTest(kmerDBName.c_str());
-
- int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
-
- maxKmer = power4s[kmerSize];
- kmerLocations.resize(maxKmer+1);
+ try {
- if(!kmerFileTest){ // if we can open the kmer db file, then read it in...
- mothurOut("Generating the " + kmerDBName + " database...\t"); cout.flush();
- generateKmerDB(kmerDBName);
- }
- else{ // ...otherwise generate it.
- mothurOut("Reading in the " + kmerDBName + " database...\t"); cout.flush();
- readKmerDB(kmerDBName, kmerFileTest);
+ string kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+ ifstream kmerFileTest(kmerDBName.c_str());
+
+ int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+
+ maxKmer = power4s[kmerSize];
+ kmerLocations.resize(maxKmer+1);
+
+ if(!kmerFileTest){ // if we can open the kmer db file, then read it in...
+ mothurOut("Generating the " + kmerDBName + " database...\t"); cout.flush();
+ generateKmerDB(kmerDBName);
+ }
+ else{ // ...otherwise generate it.
+ mothurOut("Reading in the " + kmerDBName + " database...\t"); cout.flush();
+ readKmerDB(kmerDBName, kmerFileTest);
+ }
+ mothurOut("DONE."); mothurOutEndLine(); mothurOutEndLine(); cout.flush();
+
}
- mothurOut("DONE."); mothurOutEndLine(); mothurOutEndLine(); cout.flush();
+ catch(exception& e) {
+ errorOut(e, "KmerDB", "KmerDB");
+ exit(1);
+ }
}
/**************************************************************************************************/
/**************************************************************************************************/
Sequence KmerDB::findClosestSequence(Sequence* candidateSeq){
+ try {
- Kmer kmer(kmerSize);
-
- searchScore = 0;
- int maxSequence = 0;
-
- vector<int> matches(numSeqs, 0); // a record of the sequences with shared kmers
- vector<int> timesKmerFound(kmerLocations.size()+1, 0); // a record of the kmers that we have already found
-
- int numKmers = candidateSeq->getNumBases() - kmerSize + 1;
-
- for(int i=0;i<numKmers;i++){
- int kmerNumber = kmer.getKmerNumber(candidateSeq->getUnaligned(), i); // go through the query sequence and get a kmer number
- if(timesKmerFound[kmerNumber] == 0){ // if we haven't seen it before...
- for(int j=0;j<kmerLocations[kmerNumber].size();j++){//increase the count for each sequence that also has
- matches[kmerLocations[kmerNumber][j]]++; // that kmer
+ Kmer kmer(kmerSize);
+
+ searchScore = 0;
+ int maxSequence = 0;
+
+ vector<int> matches(numSeqs, 0); // a record of the sequences with shared kmers
+ vector<int> timesKmerFound(kmerLocations.size()+1, 0); // a record of the kmers that we have already found
+
+ int numKmers = candidateSeq->getNumBases() - kmerSize + 1;
+
+ for(int i=0;i<numKmers;i++){
+ int kmerNumber = kmer.getKmerNumber(candidateSeq->getUnaligned(), i); // go through the query sequence and get a kmer number
+ if(timesKmerFound[kmerNumber] == 0){ // if we haven't seen it before...
+ for(int j=0;j<kmerLocations[kmerNumber].size();j++){//increase the count for each sequence that also has
+ matches[kmerLocations[kmerNumber][j]]++; // that kmer
+ }
}
+ timesKmerFound[kmerNumber] = 1; // ok, we've seen the kmer now
}
- timesKmerFound[kmerNumber] = 1; // ok, we've seen the kmer now
- }
-
- for(int i=0;i<numSeqs;i++){ // find the db sequence that shares the most kmers with
- if(matches[i] > searchScore){ // the query sequence
- searchScore = matches[i];
- maxSequence = i;
+
+ for(int i=0;i<numSeqs;i++){ // find the db sequence that shares the most kmers with
+ if(matches[i] > searchScore){ // the query sequence
+ searchScore = matches[i];
+ maxSequence = i;
+ }
}
+
+ searchScore = 100 * searchScore / (float) numKmers; // return the Sequence object corresponding to the db
+
+ return templateSequences[maxSequence]; // sequence with the most shared kmers.
+
}
-
- searchScore = 100 * searchScore / (float) numKmers; // return the Sequence object corresponding to the db
- return templateSequences[maxSequence]; // sequence with the most shared kmers.
+ catch(exception& e) {
+ errorOut(e, "KmerDB", "findClosestSequence");
+ exit(1);
+ }
}
/**************************************************************************************************/
void KmerDB::generateKmerDB(string kmerDBName){
+ try {
- Kmer kmer(kmerSize);
-
- for(int i=0;i<numSeqs;i++){ // for all of the template sequences...
-
- string seq = templateSequences[i].getUnaligned(); // ...take the unaligned sequence...
- int numKmers = seq.length() - kmerSize + 1;
-
- vector<int> seenBefore(maxKmer+1,0);
- for(int j=0;j<numKmers;j++){ // ...step though the sequence and get each kmer...
- int kmerNumber = kmer.getKmerNumber(seq, j);
- if(seenBefore[kmerNumber] == 0){
- kmerLocations[kmerNumber].push_back(i); // ...insert the sequence index into kmerLocations for
- } // the appropriate kmer number
- seenBefore[kmerNumber] = 1;
- }
- }
-
- ofstream kmerFile; // once we have the kmerLocations folder print it out
- openOutputFile(kmerDBName, kmerFile); // to a file
-
- for(int i=0;i<maxKmer;i++){ // step through all of the possible kmer numbers
- kmerFile << i << ' ' << kmerLocations[i].size(); // print the kmer number and the number of sequences with
- for(int j=0;j<kmerLocations[i].size();j++){ // that kmer. then print out the indices of the sequences
- kmerFile << ' ' << kmerLocations[i][j]; // with that kmer.
+ Kmer kmer(kmerSize);
+
+ for(int i=0;i<numSeqs;i++){ // for all of the template sequences...
+
+ string seq = templateSequences[i].getUnaligned(); // ...take the unaligned sequence...
+ int numKmers = seq.length() - kmerSize + 1;
+
+ vector<int> seenBefore(maxKmer+1,0);
+ for(int j=0;j<numKmers;j++){ // ...step though the sequence and get each kmer...
+ int kmerNumber = kmer.getKmerNumber(seq, j);
+ if(seenBefore[kmerNumber] == 0){
+ kmerLocations[kmerNumber].push_back(i); // ...insert the sequence index into kmerLocations for
+ } // the appropriate kmer number
+ seenBefore[kmerNumber] = 1;
+ }
}
- kmerFile << endl;
+
+ ofstream kmerFile; // once we have the kmerLocations folder print it out
+ openOutputFile(kmerDBName, kmerFile); // to a file
+
+ for(int i=0;i<maxKmer;i++){ // step through all of the possible kmer numbers
+ kmerFile << i << ' ' << kmerLocations[i].size(); // print the kmer number and the number of sequences with
+ for(int j=0;j<kmerLocations[i].size();j++){ // that kmer. then print out the indices of the sequences
+ kmerFile << ' ' << kmerLocations[i][j]; // with that kmer.
+ }
+ kmerFile << endl;
+ }
+ kmerFile.close();
+
}
- kmerFile.close();
+ catch(exception& e) {
+ errorOut(e, "KmerDB", "generateKmerDB");
+ exit(1);
+ }
}
/**************************************************************************************************/
void KmerDB::readKmerDB(string kmerDBName, ifstream& kmerDBFile){
-
- kmerDBFile.seekg(0); // start at the beginning of the file
-
- string seqName;
- int seqNumber;
+ try {
- for(int i=0;i<maxKmer;i++){
- int numValues;
- kmerDBFile >> seqName >> numValues;
+ kmerDBFile.seekg(0); // start at the beginning of the file
+
+ string seqName;
+ int seqNumber;
- for(int j=0;j<numValues;j++){ // for each kmer number get the...
- kmerDBFile >> seqNumber; // 1. number of sequences with the kmer number
- kmerLocations[i].push_back(seqNumber); // 2. sequence indices
+ for(int i=0;i<maxKmer;i++){
+ int numValues;
+ kmerDBFile >> seqName >> numValues;
+
+ for(int j=0;j<numValues;j++){ // for each kmer number get the...
+ kmerDBFile >> seqNumber; // 1. number of sequences with the kmer number
+ kmerLocations[i].push_back(seqNumber); // 2. sequence indices
+ }
}
+ kmerDBFile.close();
+
}
- kmerDBFile.close();
+ catch(exception& e) {
+ errorOut(e, "KmerDB", "readKmerDB");
+ exit(1);
+ }
}
/**************************************************************************************************/
/**************************************************************************************************/
Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method), candidateSeq(cand), templateSeq(temp) {
- maxInsertLength = 0;
- pairwiseAlignSeqs(); // This is part A in Fig. 2 of DeSantis et al.
- regapSequences(); // This is parts B-F in Fig. 2 of DeSantis et al.
+ try {
+ maxInsertLength = 0;
+ pairwiseAlignSeqs(); // This is part A in Fig. 2 of DeSantis et al.
+ regapSequences(); // This is parts B-F in Fig. 2 of DeSantis et al.
+
+ }
+ catch(exception& e) {
+ errorOut(e, "Nast", "Nast");
+ exit(1);
+ }
+
}
/**************************************************************************************************/
void Nast::pairwiseAlignSeqs(){ // Here we call one of the pairwise alignment methods to align our unaligned candidate
// and template sequences
- alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
+ try {
+
+ alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
- string candAln = alignment->getSeqAAln();
- string tempAln = alignment->getSeqBAln();
+ string candAln = alignment->getSeqAAln();
+ string tempAln = alignment->getSeqBAln();
- if(candAln == ""){
- candidateSeq->setPairwise("");
- templateSeq->setPairwise(templateSeq->getUnaligned());
- }
- else{
- if(tempAln[0] == '-'){
- int pairwiseAlignmentLength = tempAln.length(); // we need to make sure that the candidate sequence alignment
- for(int i=0;i<pairwiseAlignmentLength;i++){ // starts where the template sequence alignment starts, if it
- if(isalpha(tempAln[i])){ // starts early, we nuke the beginning of the candidate
- candAln = candAln.substr(i); // sequence
- tempAln = tempAln.substr(i);
- break;
+ if(candAln == ""){
+
+ candidateSeq->setPairwise("");
+ templateSeq->setPairwise(templateSeq->getUnaligned());
+
+ }
+ else{
+
+ if(tempAln[0] == '-'){
+ int pairwiseAlignmentLength = tempAln.length(); // we need to make sure that the candidate sequence alignment
+ for(int i=0;i<pairwiseAlignmentLength;i++){ // starts where the template sequence alignment starts, if it
+ if(isalpha(tempAln[i])){ // starts early, we nuke the beginning of the candidate
+ candAln = candAln.substr(i); // sequence
+ tempAln = tempAln.substr(i);
+ break;
+ }
}
}
- }
-
- int pairwiseAlignmentLength = tempAln.length();
- if(tempAln[pairwiseAlignmentLength-1] == '-'){ // we need to make sure that the candidate sequence alignment
- for(int i=pairwiseAlignmentLength-1; i>=0; i--){// ends where the template sequence alignment ends, if it runs
- if(isalpha(tempAln[i])){ // long, we nuke the end of the candidate sequence
- candAln = candAln.substr(0,i+1);
- tempAln = tempAln.substr(0,i+1);
- break;
- }
+
+ int pairwiseAlignmentLength = tempAln.length();
+ if(tempAln[pairwiseAlignmentLength-1] == '-'){ // we need to make sure that the candidate sequence alignment
+ for(int i=pairwiseAlignmentLength-1; i>=0; i--){// ends where the template sequence alignment ends, if it runs
+ if(isalpha(tempAln[i])){ // long, we nuke the end of the candidate sequence
+ candAln = candAln.substr(0,i+1);
+ tempAln = tempAln.substr(0,i+1);
+ break;
+ }
+ }
}
+
}
+
+ candidateSeq->setPairwise(candAln); // set the pairwise sequences in the Sequence objects for
+ templateSeq->setPairwise(tempAln); // the candidate and template sequences
+
}
- candidateSeq->setPairwise(candAln); // set the pairwise sequences in the Sequence objects for
- templateSeq->setPairwise(tempAln); // the candidate and template sequences
-
+ catch(exception& e) {
+ errorOut(e, "Nast", "pairwiseAlignSeqs");
+ exit(1);
+ }
}
/**************************************************************************************************/
void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAlign){
// here we do steps C-F of Fig. 2 from DeSantis et al.
+ try {
- int longAlignmentLength = newTemplateAlign.length();
-
- for(int i=0; i<longAlignmentLength; i++){ // use the long alignment as the standard
- int rightIndex, rightRoom, leftIndex, leftRoom;
-
- // Part C of Fig. 2 from DeSantis et al.
- if((isalpha(newTemplateAlign[i]) != isalpha(tempAln[i]))){ //if there is a discrepancy between the regapped
-
-
-
- // Part D of Fig. 2 from DeSantis et al. // template sequence and the official template sequence
- for(leftIndex=i-1;leftIndex>=0;leftIndex--){ // then we've got problems...
- if(!isalpha(candAln[leftIndex])){
- leftRoom = 1; //count how far it is to the nearest gap on the LEFT side of the anomaly
- while(leftIndex-leftRoom>=0 && !isalpha(candAln[leftIndex-leftRoom])) { leftRoom++; }
- break;
- }
- }
-
-
- for(rightIndex=i+1;rightIndex<longAlignmentLength;rightIndex++){
- if(!isalpha(candAln[rightIndex])){
- rightRoom = 1; //count how far it is to the nearest gap on the RIGHT side of the anomaly
- while(rightIndex+rightRoom<longAlignmentLength && !isalpha(candAln[rightIndex+rightRoom])) { rightRoom++; }
- break;
- }
- }
-
- int insertLength = 0; // figure out how long the anomaly is
- while(!isalpha(newTemplateAlign[i + insertLength])) { insertLength++; }
- if(insertLength > maxInsertLength){ maxInsertLength = insertLength; }
+ int longAlignmentLength = newTemplateAlign.length();
+
+ for(int i=0; i<longAlignmentLength; i++){ // use the long alignment as the standard
+ int rightIndex, rightRoom, leftIndex, leftRoom;
- if((leftRoom + rightRoom) >= insertLength){
- // Parts D & E from Fig. 2 of DeSantis et al.
- if((i-leftIndex) <= (rightIndex-i)){ // the left gap is closer - > move stuff left there's
- if(leftRoom >= insertLength){ // enough room to the left to move
- string leftTemplateString = newTemplateAlign.substr(0,i);
- string rightTemplateString = newTemplateAlign.substr(i+insertLength);
- newTemplateAlign = leftTemplateString + rightTemplateString;
- longAlignmentLength = newTemplateAlign.length();
-
- string leftCandidateString = candAln.substr(0,leftIndex-insertLength+1);
- string rightCandidateString = candAln.substr(leftIndex+1);
- candAln = leftCandidateString + rightCandidateString;
+ // Part C of Fig. 2 from DeSantis et al.
+ if((isalpha(newTemplateAlign[i]) != isalpha(tempAln[i]))){ //if there is a discrepancy between the regapped
+
+ rightRoom = 0; leftRoom = 0;
+
+ // Part D of Fig. 2 from DeSantis et al. // template sequence and the official template sequence
+ for(leftIndex=i-1;leftIndex>0;leftIndex--){ // then we've got problems...
+ if(!isalpha(candAln[leftIndex])){
+ leftRoom = 1; //count how far it is to the nearest gap on the LEFT side of the anomaly
+ while(leftIndex-leftRoom>=0 && !isalpha(candAln[leftIndex-leftRoom])) { leftRoom++; }
+ break;
}
- else{ // not enough room to the left, have to steal some space to
- string leftTemplateString = newTemplateAlign.substr(0,i); // the right
- string rightTemplateString = newTemplateAlign.substr(i+insertLength);
- newTemplateAlign = leftTemplateString + rightTemplateString;
- longAlignmentLength = newTemplateAlign.length();
-
- string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
- string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
- string rightCandidateString = candAln.substr(rightIndex+(insertLength-leftRoom));
- candAln = leftCandidateString + insertString + rightCandidateString;
+ }
+
+
+ for(rightIndex=i+1;rightIndex<longAlignmentLength-1;rightIndex++){
+ if(!isalpha(candAln[rightIndex])){
+ rightRoom = 1; //count how far it is to the nearest gap on the RIGHT side of the anomaly
+ while(rightIndex+rightRoom<longAlignmentLength && !isalpha(candAln[rightIndex+rightRoom])) { rightRoom++; }
+ break;
}
}
- else{ // the right gap is closer - > move stuff right there's
- if(rightRoom >= insertLength){ // enough room to the right to move
- string leftTemplateString = newTemplateAlign.substr(0,i);
- string rightTemplateString = newTemplateAlign.substr(i+insertLength);
- newTemplateAlign = leftTemplateString + rightTemplateString;
- longAlignmentLength = newTemplateAlign.length();
-
- string leftCandidateString = candAln.substr(0,rightIndex);
- string rightCandidateString = candAln.substr(rightIndex+insertLength);
- candAln = leftCandidateString + rightCandidateString;
-
+
+ int insertLength = 0; // figure out how long the anomaly is
+ while(!isalpha(newTemplateAlign[i + insertLength])) { insertLength++; }
+ if(insertLength > maxInsertLength){ maxInsertLength = insertLength; }
+
+ if((leftRoom + rightRoom) >= insertLength){
+
+ // Parts D & E from Fig. 2 of DeSantis et al.
+ if((i-leftIndex) <= (rightIndex-i)){ // the left gap is closer - > move stuff left there's
+
+ if(leftRoom >= insertLength){ // enough room to the left to move
+
+ string leftTemplateString = newTemplateAlign.substr(0,i);
+ string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+ newTemplateAlign = leftTemplateString + rightTemplateString;
+ longAlignmentLength = newTemplateAlign.length();
+
+ string leftCandidateString = candAln.substr(0,leftIndex-insertLength+1);
+ string rightCandidateString = candAln.substr(leftIndex+1);
+ candAln = leftCandidateString + rightCandidateString;
+
+ }
+ else{ // not enough room to the left, have to steal some space to
+ string leftTemplateString = newTemplateAlign.substr(0,i); // the right
+ string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+ newTemplateAlign = leftTemplateString + rightTemplateString;
+ longAlignmentLength = newTemplateAlign.length();
+
+ string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
+ string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
+ string rightCandidateString = candAln.substr(rightIndex+(insertLength-leftRoom));
+ candAln = leftCandidateString + insertString + rightCandidateString;
+
+ }
}
- else{ // not enough room to the right, have to steal some
- // space to the left lets move left and then right...
- string leftTemplateString = newTemplateAlign.substr(0,i);
- string rightTemplateString = newTemplateAlign.substr(i+insertLength);
- newTemplateAlign = leftTemplateString + rightTemplateString;
- longAlignmentLength = newTemplateAlign.length();
-
- string leftCandidateString = candAln.substr(0,leftIndex-(insertLength-rightRoom)+1);
- string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
- string rightCandidateString = candAln.substr(rightIndex+rightRoom);
- candAln = leftCandidateString + insertString + rightCandidateString;
-
+ else{ // the right gap is closer - > move stuff right there's
+ if(rightRoom >= insertLength){ // enough room to the right to move
+
+ string leftTemplateString = newTemplateAlign.substr(0,i);
+ string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+ newTemplateAlign = leftTemplateString + rightTemplateString;
+ longAlignmentLength = newTemplateAlign.length();
+
+ string leftCandidateString = candAln.substr(0,rightIndex);
+ string rightCandidateString = candAln.substr(rightIndex+insertLength);
+ candAln = leftCandidateString + rightCandidateString;
+
+ }
+ else{ // not enough room to the right, have to steal some
+ // space to the left lets move left and then right...
+ string leftTemplateString = newTemplateAlign.substr(0,i);
+ string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+ newTemplateAlign = leftTemplateString + rightTemplateString;
+ longAlignmentLength = newTemplateAlign.length();
+
+ string leftCandidateString = candAln.substr(0,leftIndex-(insertLength-rightRoom)+1);
+ string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
+ string rightCandidateString = candAln.substr(rightIndex+rightRoom);
+ candAln = leftCandidateString + insertString + rightCandidateString;
+
+ }
}
}
- }
- else{ // there could be a case where there isn't enough room in
- string leftTemplateString = newTemplateAlign.substr(0,i); // either direction to move stuff
- string rightTemplateString = newTemplateAlign.substr(i+leftRoom+rightRoom);
- newTemplateAlign = leftTemplateString + rightTemplateString;
- longAlignmentLength = newTemplateAlign.length();
+ else{
+ // there could be a case where there isn't enough room in
+ string leftTemplateString = newTemplateAlign.substr(0,i); // either direction to move stuff
+ string rightTemplateString = newTemplateAlign.substr(i+leftRoom+rightRoom);
+ newTemplateAlign = leftTemplateString + rightTemplateString;
+ longAlignmentLength = newTemplateAlign.length();
+
+ string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
+ string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
+ string rightCandidateString = candAln.substr(rightIndex+rightRoom);
+ candAln = leftCandidateString + insertString + rightCandidateString;
+
+ }
- string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
- string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
- string rightCandidateString = candAln.substr(rightIndex+rightRoom);
- candAln = leftCandidateString + insertString + rightCandidateString;
- }
-
- i -= insertLength;
- }
+ i -= insertLength;
+ }
+ }
}
-
+ catch(exception& e) {
+ errorOut(e, "Nast", "removeExtraGaps");
+ exit(1);
+ }
+
}
/**************************************************************************************************/
void Nast::regapSequences(){ //This is essentially part B in Fig 2. of DeSantis et al.
+ try {
- string candPair = candidateSeq->getPairwise();
- string candAln = "";
-
- string tempPair = templateSeq->getPairwise();
- string tempAln = templateSeq->getAligned(); // we use the template aligned sequence as our guide
-
- int pairwiseLength = candPair.length();
- int fullAlignLength = tempAln.length();
-
- if(candPair == ""){
- for(int i=0;i<fullAlignLength;i++) { candAln += '.'; }
- candidateSeq->setAligned(candAln);
- return;
- }
-
- int fullAlignIndex = 0;
- int pairwiseAlignIndex = 0;
- string newTemplateAlign = ""; // this is going to be messy so we want a temporary template
- // alignment string
- while(tempAln[fullAlignIndex] == '.' || tempAln[fullAlignIndex] == '-'){
- candAln += '.'; // add the initial '-' and '.' to the candidate and template
- newTemplateAlign += tempAln[fullAlignIndex];// pairwise sequences
- fullAlignIndex++;
- }
-
- string lastLoop = "";
-
- while(pairwiseAlignIndex<pairwiseLength){
- if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
- && isalpha(candPair[pairwiseAlignIndex])){
- // the template and candidate pairwise and template aligned have characters
- // need to add character onto the candidatSeq.aligned sequence
-
- candAln += candPair[pairwiseAlignIndex];
- newTemplateAlign += tempPair[pairwiseAlignIndex];//
-
- pairwiseAlignIndex++;
- fullAlignIndex++;
+ string candPair = candidateSeq->getPairwise();
+ string candAln = "";
+
+ string tempPair = templateSeq->getPairwise();
+ string tempAln = templateSeq->getAligned(); // we use the template aligned sequence as our guide
+
+ int pairwiseLength = candPair.length();
+ int fullAlignLength = tempAln.length();
+
+ if(candPair == ""){
+ for(int i=0;i<fullAlignLength;i++) { candAln += '.'; }
+ candidateSeq->setAligned(candAln);
+ return;
}
- else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
- && isalpha(candPair[pairwiseAlignIndex])){
- // the template pairwise and candidate pairwise are characters and the template aligned is a gap
- // need to insert gaps into the candidateSeq.aligned sequence
-
- candAln += '-';
- newTemplateAlign += '-';//
+
+ int fullAlignIndex = 0;
+ int pairwiseAlignIndex = 0;
+ string newTemplateAlign = ""; // this is going to be messy so we want a temporary template
+ // alignment string
+ while(tempAln[fullAlignIndex] == '.' || tempAln[fullAlignIndex] == '-'){
+ candAln += '.'; // add the initial '-' and '.' to the candidate and template
+ newTemplateAlign += tempAln[fullAlignIndex];// pairwise sequences
fullAlignIndex++;
}
- else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
- && isalpha(candPair[pairwiseAlignIndex])){
- // the template pairwise is a gap and the template aligned and pairwise sequences have characters
- // this is the alpha scenario. add character to the candidateSeq.aligned sequence without progressing
- // further through the tempAln sequence.
-
- candAln += candPair[pairwiseAlignIndex];
- newTemplateAlign += '-';//
- pairwiseAlignIndex++;
- }
- else if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
- && !isalpha(candPair[pairwiseAlignIndex])){
- // the template pairwise and full alignment are characters and the candidate sequence has a gap
- // should not be a big deal, just add the gap position to the candidateSeq.aligned sequence;
-
- candAln += candPair[pairwiseAlignIndex];
- newTemplateAlign += tempAln[fullAlignIndex];//
- fullAlignIndex++;
- pairwiseAlignIndex++;
+
+ string lastLoop = "";
+
+ while(pairwiseAlignIndex<pairwiseLength){
+ if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+ && isalpha(candPair[pairwiseAlignIndex])){
+ // the template and candidate pairwise and template aligned have characters
+ // need to add character onto the candidatSeq.aligned sequence
+
+ candAln += candPair[pairwiseAlignIndex];
+ newTemplateAlign += tempPair[pairwiseAlignIndex];//
+
+ pairwiseAlignIndex++;
+ fullAlignIndex++;
+ }
+ else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
+ && isalpha(candPair[pairwiseAlignIndex])){
+ // the template pairwise and candidate pairwise are characters and the template aligned is a gap
+ // need to insert gaps into the candidateSeq.aligned sequence
+
+ candAln += '-';
+ newTemplateAlign += '-';//
+ fullAlignIndex++;
+ }
+ else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+ && isalpha(candPair[pairwiseAlignIndex])){
+ // the template pairwise is a gap and the template aligned and pairwise sequences have characters
+ // this is the alpha scenario. add character to the candidateSeq.aligned sequence without progressing
+ // further through the tempAln sequence.
+
+ candAln += candPair[pairwiseAlignIndex];
+ newTemplateAlign += '-';//
+ pairwiseAlignIndex++;
+ }
+ else if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+ && !isalpha(candPair[pairwiseAlignIndex])){
+ // the template pairwise and full alignment are characters and the candidate sequence has a gap
+ // should not be a big deal, just add the gap position to the candidateSeq.aligned sequence;
+
+ candAln += candPair[pairwiseAlignIndex];
+ newTemplateAlign += tempAln[fullAlignIndex];//
+ fullAlignIndex++;
+ pairwiseAlignIndex++;
+ }
+ else if(!isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
+ && isalpha(candPair[pairwiseAlignIndex])){
+ // the template pairwise and aligned are gaps while the candidate pairwise has a character
+ // this would be an insertion, go ahead and add the character->seems to be the opposite of the alpha scenario
+
+ candAln += candPair[pairwiseAlignIndex];
+ newTemplateAlign += tempAln[fullAlignIndex];//
+ pairwiseAlignIndex++;
+ fullAlignIndex++;
+ }
+ else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
+ && !isalpha(candPair[pairwiseAlignIndex])){
+ // template pairwise has a character, but its full aligned sequence and candidate sequence have gaps
+ // this would happen like we need to add a gap. basically the opposite of the alpha situation
+
+ newTemplateAlign += tempAln[fullAlignIndex];//
+ candAln += "-";
+ fullAlignIndex++;
+ }
+ else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+ && !isalpha(candPair[pairwiseAlignIndex])){
+ // template and candidate pairwise are gaps and the template aligned is not a gap this should not be possible
+ // would skip the gaps and not progress through full alignment sequence
+ // not tested yet
+
+ mothurOut("We're into D " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); mothurOutEndLine();
+ pairwiseAlignIndex++;
+ }
+ else{
+ // everything has a gap - not possible
+ // not tested yet
+
+ mothurOut("We're into F " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); mothurOutEndLine();
+ pairwiseAlignIndex++;
+ fullAlignIndex++;
+ }
}
- else if(!isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
- && isalpha(candPair[pairwiseAlignIndex])){
- // the template pairwise and aligned are gaps while the candidate pairwise has a character
- // this would be an insertion, go ahead and add the character->seems to be the opposite of the alpha scenario
-
- candAln += candPair[pairwiseAlignIndex];
- newTemplateAlign += tempAln[fullAlignIndex];//
- pairwiseAlignIndex++;
- fullAlignIndex++;
+
+ for(int i=fullAlignIndex;i<fullAlignLength;i++){
+ candAln += '.';
+ newTemplateAlign += tempAln[i];//
}
- else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
- && !isalpha(candPair[pairwiseAlignIndex])){
- // template pairwise has a character, but its full aligned sequence and candidate sequence have gaps
- // this would happen like we need to add a gap. basically the opposite of the alpha situation
-
- newTemplateAlign += tempAln[fullAlignIndex];//
- candAln += "-";
- fullAlignIndex++;
+
+ int start, end;
+ for(int i=0;i<candAln.length();i++){
+ if(candAln[i] == 'Z' || !isalnum(candAln[i])) { candAln[i] = '.'; } // if we padded the alignemnt from
+ else{ start = i; break; } // blast with Z's, change them to
+ } // '.' characters
+
+ for(int i=candAln.length()-1;i>=0;i--){ // ditto.
+ if(candAln[i] == 'Z' || !isalnum(candAln[i])) { candAln[i] = '.'; }
+ else{ end = i; break; }
}
- else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
- && !isalpha(candPair[pairwiseAlignIndex])){
- // template and candidate pairwise are gaps and the template aligned is not a gap this should not be possible
- // would skip the gaps and not progress through full alignment sequence
- // not tested yet
-
- mothurOut("We're into D " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); mothurOutEndLine();
- pairwiseAlignIndex++;
+
+ for(int i=start;i<=end;i++){ // go through the candidate alignment sequence and make sure that
+ candAln[i] = toupper(candAln[i]); // everything is upper case
}
- else{
- // everything has a gap - not possible
- // not tested yet
-
- mothurOut("We're into F " + toString(fullAlignIndex) + " " + toString(pairwiseAlignIndex)); mothurOutEndLine();
- pairwiseAlignIndex++;
- fullAlignIndex++;
- }
- }
-
- for(int i=fullAlignIndex;i<fullAlignLength;i++){
- candAln += '.';
- newTemplateAlign += tempAln[i];//
- }
-
- int start, end;
- for(int i=0;i<candAln.length();i++){
- if(candAln[i] == 'Z' || !isalnum(candAln[i])) { candAln[i] = '.'; } // if we padded the alignemnt from
- else{ start = i; break; } // blast with Z's, change them to
- } // '.' characters
-
- for(int i=candAln.length()-1;i>=0;i--){ // ditto.
- if(candAln[i] == 'Z' || !isalnum(candAln[i])) { candAln[i] = '.'; }
- else{ end = i; break; }
- }
-
- for(int i=start;i<=end;i++){ // go through the candidate alignment sequence and make sure that
- candAln[i] = toupper(candAln[i]); // everything is upper case
+
+
+ if(candAln.length() != tempAln.length()){ // if the regapped candidate sequence is longer than the official
+ removeExtraGaps(candAln, tempAln, newTemplateAlign);// template alignment then we need to do steps C-F in Fig.
+ } // 2 of Desantis et al.
+
+
+ candidateSeq->setAligned(candAln);
}
-
-
- if(candAln.length() != tempAln.length()){ // if the regapped candidate sequence is longer than the official
- removeExtraGaps(candAln, tempAln, newTemplateAlign);// template alignment then we need to do steps C-F in Fig.
- } // 2 of Desantis et al.
-
-
- candidateSeq->setAligned(candAln);
+ catch(exception& e) {
+ errorOut(e, "Nast", "regapSequences");
+ exit(1);
+ }
}
/**************************************************************************************************/
float Nast::getSimilarityScore(){
-
- string cand = candidateSeq->getAligned();
- string temp = templateSeq->getAligned();
- int alignmentLength = temp.length();
- int mismatch = 0;
- int denominator = 0;
+ try {
- for(int i=0;i<alignmentLength;i++){
- if(cand[i] == '-' && temp[i] == '-'){
-
- }
- else if(cand[i] != '.' && temp[i] != '.'){
- denominator++;
-
- if(cand[i] != temp[i]){
- mismatch++;
+ string cand = candidateSeq->getAligned();
+ string temp = templateSeq->getAligned();
+ int alignmentLength = temp.length();
+ int mismatch = 0;
+ int denominator = 0;
+
+ for(int i=0;i<alignmentLength;i++){
+ if(cand[i] == '-' && temp[i] == '-'){
+
+ }
+ else if(cand[i] != '.' && temp[i] != '.'){
+ denominator++;
+
+ if(cand[i] != temp[i]){
+ mismatch++;
+ }
}
}
+ float similarity = 100 * (1. - mismatch / (float)denominator);
+ if(denominator == 0){ similarity = 0.0000; }
+
+ return similarity;
+
}
- float similarity = 100 * (1. - mismatch / (float)denominator);
- if(denominator == 0){ similarity = 0.0000; }
-
- return similarity;
+ catch(exception& e) {
+ errorOut(e, "Nast", "getSimilarityScore");
+ exit(1);
+ }
}
/**************************************************************************************************/
NeedlemanOverlap::NeedlemanOverlap(float gO, float m, float mm, int r) :// note that we don't have a gap extend
gap(gO), match(m), mismatch(mm), Alignment(r) { // the gap openning penalty is assessed for
- // every gapped position
- for(int i=1;i<nCols;i++){
- alignment[0][i].prevCell = 'l'; // initialize first row by pointing all poiters to the left
- alignment[0][i].cValue = 0; // and the score to zero
- }
+ try { // every gapped position
+ for(int i=1;i<nCols;i++){
+ alignment[0][i].prevCell = 'l'; // initialize first row by pointing all poiters to the left
+ alignment[0][i].cValue = 0; // and the score to zero
+ }
+
+ for(int i=1;i<nRows;i++){
+ alignment[i][0].prevCell = 'u'; // initialize first column by pointing all poiters upwards
+ alignment[i][0].cValue = 0; // and the score to zero
+ }
- for(int i=1;i<nRows;i++){
- alignment[i][0].prevCell = 'u'; // initialize first column by pointing all poiters upwards
- alignment[i][0].cValue = 0; // and the score to zero
}
+ catch(exception& e) {
+ errorOut(e, "NeedlemanOverlap", "NeedlemanOverlap");
+ exit(1);
+ }
+
+
}
-
/**************************************************************************************************/
NeedlemanOverlap::~NeedlemanOverlap(){ /* do nothing */ }
/**************************************************************************************************/
void NeedlemanOverlap::align(string A, string B){
-
- seqA = ' ' + A; lA = seqA.length(); // algorithm requires a dummy space at the beginning of each string
- seqB = ' ' + B; lB = seqB.length(); // algorithm requires a dummy space at the beginning of each string
-
- for(int i=1;i<lB;i++){ // This code was largely translated from Perl code provided in Ex 3.1
- for(int j=1;j<lA;j++){ // of the O'Reilly BLAST book. I found that the example output had a
- // number of errors
- float diagonal;
- if(seqB[i] == seqA[j]) { diagonal = alignment[i-1][j-1].cValue + match; }
- else { diagonal = alignment[i-1][j-1].cValue + mismatch; }
-
- float up = alignment[i-1][j].cValue + gap;
- float left = alignment[i][j-1].cValue + gap;
-
- if(diagonal >= up){
- if(diagonal >= left){
- alignment[i][j].cValue = diagonal;
- alignment[i][j].prevCell = 'd';
- }
- else{
- alignment[i][j].cValue = left;
- alignment[i][j].prevCell = 'l';
- }
- }
- else{
- if(up >= left){
- alignment[i][j].cValue = up;
- alignment[i][j].prevCell = 'u';
+ try {
+ seqA = ' ' + A; lA = seqA.length(); // algorithm requires a dummy space at the beginning of each string
+ seqB = ' ' + B; lB = seqB.length(); // algorithm requires a dummy space at the beginning of each string
+
+ if (lA > nRows) { mothurOut("Your one of your candidate sequences is longer than you longest template sequence."); mothurOutEndLine(); }
+
+ for(int i=1;i<lB;i++){ // This code was largely translated from Perl code provided in Ex 3.1
+ for(int j=1;j<lA;j++){ // of the O'Reilly BLAST book. I found that the example output had a
+ // number of errors
+ float diagonal;
+ if(seqB[i] == seqA[j]) { diagonal = alignment[i-1][j-1].cValue + match; }
+ else { diagonal = alignment[i-1][j-1].cValue + mismatch; }
+
+ float up = alignment[i-1][j].cValue + gap;
+ float left = alignment[i][j-1].cValue + gap;
+
+ if(diagonal >= up){
+ if(diagonal >= left){
+ alignment[i][j].cValue = diagonal;
+ alignment[i][j].prevCell = 'd';
+ }
+ else{
+ alignment[i][j].cValue = left;
+ alignment[i][j].prevCell = 'l';
+ }
}
else{
- alignment[i][j].cValue = left;
- alignment[i][j].prevCell = 'l';
+ if(up >= left){
+ alignment[i][j].cValue = up;
+ alignment[i][j].prevCell = 'u';
+ }
+ else{
+ alignment[i][j].cValue = left;
+ alignment[i][j].prevCell = 'l';
+ }
}
}
}
+ Overlap over;
+ over.setOverlap(alignment, lA, lB, 0); // Fix gaps at the beginning and end of the sequences
+ traceBack(); // Traceback the alignment to populate seqAaln and seqBaln
+
+ }
+ catch(exception& e) {
+ errorOut(e, "NeedlemanOverlap", "align");
+ exit(1);
}
- Overlap over;
- over.setOverlap(alignment, lA, lB, 0); // Fix gaps at the beginning and end of the sequences
- traceBack(); // Traceback the alignment to populate seqAaln and seqBaln
+
}
/**************************************************************************************************/
*/
#include "pintail.h"
-#include "ignoregaps.h"
+#include "eachgapdist.h"
//***************************************************************************************************************
-Pintail::Pintail(string name) {
- try {
- fastafile = name;
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "Pintail");
- exit(1);
- }
-}
+Pintail::Pintail(string filename, string temp) { fastafile = filename; templateFile = temp; }
//***************************************************************************************************************
Pintail::~Pintail() {
void Pintail::print(ostream& out) {
try {
- for (itCoef = DE.begin(); itCoef != DE.end(); itCoef++) {
+ for (int i = 0; i < querySeqs.size(); i++) {
- out << itCoef->first->getName() << '\t' << itCoef->second << endl;
+ out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << endl;
out << "Observed\t";
- itObsDist = obsDistance.find(itCoef->first);
- for (int i = 0; i < itObsDist->second.size(); i++) { out << itObsDist->second[i] << '\t'; }
+ for (int j = 0; j < obsDistance[i].size(); j++) { out << obsDistance[i][j] << '\t'; }
out << endl;
out << "Expected\t";
- itExpDist = expectedDistance.find(itCoef->first);
- for (int i = 0; i < itExpDist->second.size(); i++) { out << itExpDist->second[i] << '\t'; }
+ for (int m = 0; m < expectedDistance[i].size(); m++) { out << expectedDistance[i][m] << '\t'; }
out << endl;
}
-
-
}
catch(exception& e) {
errorOut(e, "Pintail", "print");
void Pintail::getChimeras() {
try {
- distCalculator = new ignoreGaps();
-
//read in query sequences and subject sequences
mothurOut("Reading sequences and template file... "); cout.flush();
querySeqs = readSeqs(fastafile);
int numSeqs = querySeqs.size();
- //if window is set to default
- if (window == 0) { if (querySeqs[0]->getAligned().length() > 800) { setWindow(200); }
- else{ setWindow((querySeqs[0]->getAligned().length() / 4)); } }
- else if (window > (querySeqs[0]->getAligned().length() / 4)) {
- mothurOut("You have selected to large a window size for you sequences. I will choose a smaller window."); mothurOutEndLine();
- setWindow((querySeqs[0]->getAligned().length() / 4));
- }
-
- //calculate number of iters
- iters = (querySeqs[0]->getAligned().length() - window + 1) / increment;
-cout << "length = " << querySeqs[0]->getAligned().length() << " window = " << window << " increment = " << increment << " iters = " << iters << endl;
+ obsDistance.resize(numSeqs);
+ expectedDistance.resize(numSeqs);
+ seqCoef.resize(numSeqs);
+ DE.resize(numSeqs);
+ Qav.resize(numSeqs);
+ bestfit.resize(numSeqs);
+ trim.resize(numSeqs);
+ deviation.resize(numSeqs);
+ windowSizes.resize(numSeqs, window);
+
+ //break up file if needed
int linesPerProcess = processors / numSeqs;
- //find breakup of sequences for all times we will Parallelize
- if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
- else {
- //fill line pairs
- for (int i = 0; i < (processors-1); i++) {
- lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ //find breakup of sequences for all times we will Parallelize
+ if (processors == 1) { lines.push_back(new linePair(0, numSeqs)); }
+ else {
+ //fill line pairs
+ for (int i = 0; i < (processors-1); i++) {
+ lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
+ }
+ //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
+ int i = processors - 1;
+ lines.push_back(new linePair((i*linesPerProcess), numSeqs));
}
- //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
- int i = processors - 1;
- lines.push_back(new linePair((i*linesPerProcess), numSeqs));
- }
-
- //map query sequences to their most similiar sequences in the template - Parallelized
- mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
- if (processors == 1) { findPairs(lines[0]->start, lines[0]->end); }
- else { createProcessesPairs(); }
- mothurOut("Done."); mothurOutEndLine();
-
- //find Oqs for each sequence - the observed distance in each window - Parallelized
- mothurOut("Calculating observed percentage differences for each sequence... "); cout.flush();
- if (processors == 1) { calcObserved(lines[0]->start, lines[0]->end); }
- else { createProcessesObserved(); }
- mothurOut("Done."); mothurOutEndLine();
-
+ #else
+ lines.push_back(new linePair(0, numSeqs));
+ #endif
+
+ distcalculator = new eachGapDist();
+
+ if (processors == 1) {
+ mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
+ bestfit = findPairs(lines[0]->start, lines[0]->end);
+for (int m = 0; m < templateSeqs.size(); m++) {
+ if (templateSeqs[m]->getName() == "198806") { bestfit[0] = *(templateSeqs[m]); }
+ if (templateSeqs[m]->getName() == "198806") { bestfit[1] = *(templateSeqs[m]); }
+ if (templateSeqs[m]->getName() == "108139") { bestfit[2] = *(templateSeqs[m]); }
+}
+
+for (int j = 0; j < bestfit.size(); j++) {//cout << querySeqs[j]->getName() << '\t' << "length = " << querySeqs[j]->getAligned().length() << '\t' << bestfit[j].getName() << " length = " << bestfit[j].getAligned().length() << endl;
+ //chops off beginning and end of sequences so they both start and end with a base
+ trimSeqs(querySeqs[j], bestfit[j], j);
+//cout << "NEW SEQ PAIR" << querySeqs[j]->getAligned() << endl << "IN THE MIDDLE" << endl << bestfit[j].getAligned() << endl;
+
+}
+
+ mothurOut("Done."); mothurOutEndLine();
+
+ windows = findWindows(lines[0]->start, lines[0]->end);
+ } else { createProcessesSpots(); }
+
//find P
- mothurOut("Calculating expected percentage differences for each sequence... "); cout.flush();
- vector<float> probabilityProfile = calcFreq(templateSeqs);
+ if (consfile == "") { probabilityProfile = calcFreq(templateSeqs); }
+ else { probabilityProfile = readFreq(); }
//make P into Q
for (int i = 0; i < probabilityProfile.size(); i++) { probabilityProfile[i] = 1 - probabilityProfile[i]; }
-
- //find Qav
- averageProbability = findQav(probabilityProfile);
-
- //find Coefficient - maps a sequence to its coefficient
- seqCoef = getCoef(averageProbability);
-
- //find Eqs for each sequence - the expected distance in each window - Parallelized
- if (processors == 1) { calcExpected(lines[0]->start, lines[0]->end); }
- else { createProcessesExpected(); }
- mothurOut("Done."); mothurOutEndLine();
- //find deviation - Parallelized
- mothurOut("Finding deviation from expected... "); cout.flush();
- if (processors == 1) { calcDE(lines[0]->start, lines[0]->end); }
- else { createProcessesDE(); }
- mothurOut("Done."); mothurOutEndLine();
+ if (processors == 1) {
+
+ mothurOut("Calculating observed distance... "); cout.flush();
+ obsDistance = calcObserved(lines[0]->start, lines[0]->end);
+ mothurOut("Done."); mothurOutEndLine();
+
+ mothurOut("Finding variability... "); cout.flush();
+ Qav = findQav(lines[0]->start, lines[0]->end);
+for (int i = 0; i < Qav.size(); i++) {
+cout << querySeqs[i]->getName() << " = ";
+for (int u = 0; u < Qav[i].size();u++) { cout << Qav[i][u] << '\t'; }
+cout << endl << endl;
+}
+
+
+ mothurOut("Done."); mothurOutEndLine();
+
+ mothurOut("Calculating alpha... "); cout.flush();
+ seqCoef = getCoef(lines[0]->start, lines[0]->end);
+for (int i = 0; i < seqCoef.size(); i++) {
+cout << querySeqs[i]->getName() << " coef = " << seqCoef[i] << endl;
+}
+
+ mothurOut("Done."); mothurOutEndLine();
+
+ mothurOut("Calculating expected distance... "); cout.flush();
+ expectedDistance = calcExpected(lines[0]->start, lines[0]->end);
+ mothurOut("Done."); mothurOutEndLine();
+
+ mothurOut("Finding deviation... "); cout.flush();
+ DE = calcDE(lines[0]->start, lines[0]->end);
+ deviation = calcDist(lines[0]->start, lines[0]->end);
+ mothurOut("Done."); mothurOutEndLine();
+
+
+
+ }
+ else { createProcesses(); }
-
+ delete distcalculator;
+
//free memory
for (int i = 0; i < lines.size(); i++) { delete lines[i]; }
- delete distCalculator;
+
}
catch(exception& e) {
}
}
+//***************************************************************************************************************
+//num is query's spot in querySeqs
+void Pintail::trimSeqs(Sequence* query, Sequence& subject, int num) {
+ try {
+
+ string q = query->getAligned();
+ string s = subject.getAligned();
+
+ int front = 0;
+ for (int i = 0; i < q.length(); i++) {
+ if (isalpha(q[i]) && isalpha(s[i])) { front = i; break; }
+ }
+
+ q = q.substr(front, q.length());
+ s = s.substr(front, s.length());
+
+ int back = 0;
+ for (int i = q.length(); i >= 0; i--) {
+ if (isalpha(q[i]) && isalpha(s[i])) { back = i; break; }
+ }
+
+ q = q.substr(0, back);
+ s = s.substr(0, back);
+
+ trim[num][front] = back;
+
+ //save
+ query->setAligned(q);
+ query->setUnaligned(q);
+ subject.setAligned(s);
+ subject.setUnaligned(s);
+ }
+ catch(exception& e) {
+ errorOut(e, "Pintail", "trimSeqs");
+ exit(1);
+ }
+}
+
+//***************************************************************************************************************
+
+vector<float> Pintail::readFreq() {
+ try {
+
+ ifstream in;
+ openInputFile(consfile, in);
+
+ vector<float> prob;
+
+ //read in probabilities and store in vector
+ int pos; float num;
+
+ while(!in.eof()){
+
+ in >> pos >> num;
+
+ prob.push_back(num);
+
+ gobble(in);
+ }
+
+ in.close();
+ return prob;
+
+ }
+ catch(exception& e) {
+ errorOut(e, "Pintail", "readFreq");
+ exit(1);
+ }
+}
+
//***************************************************************************************************************
//calculate the distances from each query sequence to all sequences in the template to find the closest sequence
-void Pintail::findPairs(int start, int end) {
+vector<Sequence> Pintail::findPairs(int start, int end) {
try {
+ vector<Sequence> seqsMatches; seqsMatches.resize(end-start);
+
for(int i = start; i < end; i++){
float smallest = 10000.0;
Sequence temp = *(templateSeqs[j]);
- distCalculator->calcDist(query, temp);
- float dist = distCalculator->getDist();
+ distcalculator->calcDist(query, temp);
+ float dist = distcalculator->getDist();
if (dist < smallest) {
-
- bestfit[querySeqs[i]] = templateSeqs[j];
+ seqsMatches[i] = *(templateSeqs[j]);
smallest = dist;
}
}
}
+ return seqsMatches;
+
}
catch(exception& e) {
errorOut(e, "Pintail", "findPairs");
exit(1);
}
}
+
//***************************************************************************************************************
-void Pintail::calcObserved(int start, int end) {
+//find the window breaks for each sequence
+vector< vector<int> > Pintail::findWindows(int start, int end) {
try {
+
+ vector< vector<int> > win; win.resize(end-start);
+
+ //for each sequence
+ int count = 0;
+ for(int i = start; i < end; i++){
+
+ //if window is set to default
+ if (windowSizes[i] == 0) { if (querySeqs[i]->getAligned().length() > 1200) { windowSizes[i] = 300; }
+ else{ windowSizes[i] = (querySeqs[i]->getAligned().length() / 4); } }
+ else if (windowSizes[i] > (querySeqs[i]->getAligned().length() / 4)) {
+ mothurOut("You have selected to large a window size for sequence " + querySeqs[i]->getName() + ". I will choose an appropriate window size."); mothurOutEndLine();
+ windowSizes[i] = (querySeqs[i]->getAligned().length() / 4);
+ }
-
+ //cout << "length = " << querySeqs[i]->getAligned().length() << " window = " << windowSizes[i] << " increment = " << increment << endl;
+
+
+ string seq = querySeqs[i]->getAligned();
+ int numBases = querySeqs[i]->getUnaligned().length();
+ int spot = 0;
+
+ //find location of first base
+ for (int j = 0; j < seq.length(); j++) {
+ if (isalpha(seq[j])) { spot = j; break; }
+ }
+
+ //save start of seq
+ win[count].push_back(spot);
+
+
+ //move ahead increment bases at a time until all bases are in a window
+ int countBases = 0;
+ int totalBases = 0; //used to eliminate window of blanks at end of sequence
+ for (int m = spot; m < seq.length(); m++) {
+
+ //count number of bases you see
+ if (isalpha(seq[m])) { countBases++; totalBases++; }
+
+ //if you have seen enough bases to make a new window
+ if (countBases >= increment) {
+ win[count].push_back(m); //save spot in alignment
+ countBases = 0; //reset bases you've seen in this window
+ }
+
+ //no need to continue if all your bases are in a window
+ if (totalBases == numBases) { break; }
+ }
+
+ count++;
+ }
+
+
+
+ return win;
+
+ }
+ catch(exception& e) {
+ errorOut(e, "Pintail", "findWindows");
+ exit(1);
+ }
+}
+
+//***************************************************************************************************************
+vector< vector<float> > Pintail::calcObserved(int start, int end) {
+ try {
+
+ vector< vector<float> > temp;
+ temp.resize(end-start);
+
+ int count = 0;
for(int i = start; i < end; i++){
- itBest = bestfit.find(querySeqs[i]);
- Sequence* query;
- Sequence* subject;
+ Sequence* query = querySeqs[i];
+ Sequence subject = bestfit[i];
- if (itBest != bestfit.end()) {
- query = itBest->first;
- subject = itBest->second;
- }else{ mothurOut("Error in calcObserved"); mothurOutEndLine(); }
-//cout << query->getName() << '\t' << subject->getName() << endl;
-
int startpoint = 0;
- for (int m = 0; m < iters; m++) {
+ for (int m = 0; m < windows[i].size(); m++) {
- string seqFrag = query->getAligned().substr(startpoint, window);
- string seqFragsub = subject->getAligned().substr(startpoint, window);
+ string seqFrag = query->getAligned().substr(windows[i][startpoint], windowSizes[i]);
+ string seqFragsub = subject.getAligned().substr(windows[i][startpoint], windowSizes[i]);
int diff = 0;
for (int b = 0; b < seqFrag.length(); b++) {
- //if this is not a gap
- if ((isalpha(seqFrag[b])) && (isalpha(seqFragsub[b]))) {
+ //if either the query or subject is not a gap
+ if ((isalpha(seqFrag[b])) || (isalpha(seqFragsub[b]))) {
//and they are different - penalize
if (seqFrag[b] != seqFragsub[b]) { diff++; }
}
}
//percentage of mismatched bases
- float dist = diff / (float)seqFrag.length();
+ float dist;
+ dist = diff / (float) seqFrag.length() * 100;
- obsDistance[query].push_back(dist);
+ temp[count].push_back(dist);
- startpoint += increment;
+ startpoint++;
}
+
+ count++;
}
+ return temp;
}
catch(exception& e) {
errorOut(e, "Pintail", "calcObserved");
exit(1);
}
}
+//***************************************************************************************************************
+vector<float> Pintail::calcDist(int start, int end) {
+ try {
+
+ vector<float> temp;
+
+ for(int i = start; i < end; i++){
+
+ Sequence* query = querySeqs[i];
+ Sequence subject = bestfit[i];
+
+ string seqFrag = query->getAligned();
+ string seqFragsub = subject.getAligned();
+
+ int diff = 0;
+ for (int b = 0; b < seqFrag.length(); b++) {
+
+ //if either the query or subject is not a gap
+ if ((isalpha(seqFrag[b])) || (isalpha(seqFragsub[b]))) {
+ //and they are different - penalize
+ if (seqFrag[b] != seqFragsub[b]) { diff++; }
+ }
+ }
+
+ //percentage of mismatched bases
+ float dist;
+ dist = diff / (float) seqFrag.length() * 100;
+
+ temp.push_back(dist);
+ }
+
+ return temp;
+ }
+ catch(exception& e) {
+ errorOut(e, "Pintail", "calcDist");
+ exit(1);
+ }
+}
//***************************************************************************************************************
-void Pintail::calcExpected(int start, int end) {
+vector< vector<float> > Pintail::calcExpected(int start, int end) {
try {
-
+ vector< vector<float> > temp; temp.resize(end-start);
+
//for each sequence
+ int count = 0;
for(int i = start; i < end; i++){
- itCoef = seqCoef.find(querySeqs[i]);
- float coef = itCoef->second;
+ float coef = seqCoef[i];
//for each window
vector<float> queryExpected;
- for (int m = 0; m < iters; m++) {
- float expected = averageProbability[m] * coef;
+ for (int m = 0; m < windows[i].size(); m++) {
+ float expected = Qav[i][m] * coef;
queryExpected.push_back(expected);
//cout << "average variabilty over window = " << averageProbability[m] << " coef = " << coef << " ei = " << expected << '\t' << "window = " << m << endl;
}
- expectedDistance[querySeqs[i]] = queryExpected;
+ temp[count] = queryExpected;
+
+ count++;
}
+
+ return temp;
}
catch(exception& e) {
}
}
//***************************************************************************************************************
-void Pintail::calcDE(int start, int end) {
+vector<float> Pintail::calcDE(int start, int end) {
try {
+ vector<float> temp; temp.resize(end-start);
//for each sequence
+ int count = 0;
for(int i = start; i < end; i++){
- itObsDist = obsDistance.find(querySeqs[i]);
- vector<float> obs = itObsDist->second;
+ vector<float> obs = obsDistance[i];
+ vector<float> exp = expectedDistance[i];
- itExpDist = expectedDistance.find(querySeqs[i]);
- vector<float> exp = itExpDist->second;
// cout << "difference between obs and exp = " << abs(obs[m] - exp[m]) << endl;
//for each window
float sum = 0.0; //sum = sum from 1 to m of (oi-ei)^2
- for (int m = 0; m < iters; m++) { sum += ((obs[m] - exp[m]) * (obs[m] - exp[m])); }
+ for (int m = 0; m < windows[i].size(); m++) { sum += ((obs[m] - exp[m]) * (obs[m] - exp[m])); }
- float de = sqrt((sum / (iters - 1)));
+ float de = sqrt((sum / (windows[i].size() - 1)));
- DE[querySeqs[i]] = de;
+ temp[count] = de;
+ count++;
}
-
+
+ return temp;
}
catch(exception& e) {
errorOut(e, "Pintail", "calcDE");
try {
vector<float> prob;
+ string freqfile = getRootName(templateFile) + "probability";
+ ofstream outFreq;
+
+ openOutputFile(freqfile, outFreq);
//at each position in the sequence
for (int i = 0; i < seqs[0]->getAligned().length(); i++) {
//find base with highest frequency
int highest = 0;
- for (int m = 0; m < freq.size(); m++) { if (freq[m] > highest) { highest = freq[m]; } }
-
- //add in gaps - so you can effectively "ignore them"
- highest += gaps;
+ for (int m = 0; m < freq.size(); m++) { if (freq[m] > highest) { highest = freq[m]; } }
- float highFreq = highest / (float) seqs.size();
+ float highFreq;
+ //if ( (seqs.size() - gaps) == 0 ) { highFreq = 1.0; }
+ //else { highFreq = highest / (float) (seqs.size() - gaps); }
+ highFreq = highest / (float) seqs.size();
+cout << i << '\t' << highFreq << endl;
float Pi;
- Pi = (highFreq - 0.25) / 0.75;
+ Pi = (highFreq - 0.25) / 0.75;
+
+ //saves this for later
+ outFreq << i << '\t' << Pi << endl;
prob.push_back(Pi);
}
+ outFreq.close();
+
return prob;
}
}
}
//***************************************************************************************************************
-vector<float> Pintail::findQav(vector<float> prob) {
+vector< vector<float> > Pintail::findQav(int start, int end) {
try {
- vector<float> averages;
+ vector< vector<float> > averages;
+ map<int, int>::iterator it;
- //for each window find average
- int startpoint = 0;
- for (int m = 0; m < iters; m++) {
-
- float average = 0.0;
- for (int i = startpoint; i < (startpoint+window); i++) { average += prob[i]; }
-
- average = average / window;
-//cout << average << endl;
- //save this windows average
- averages.push_back(average);
+ for(int i = start; i < end; i++){
- startpoint += increment;
+ //for each window find average
+ vector<float> temp;
+ for (int m = 0; m < windows[i].size(); m++) {
+
+ float average = 0.0;
+
+ it = trim[i].begin(); //trim[i] is a map of where this sequence was trimmed
+
+ //while you are in the window for this sequence
+ for (int j = windows[i][m]+it->first; j < (windows[i][m]+windowSizes[i]); j++) { average += probabilityProfile[j]; }
+
+ average = average / windowSizes[i];
+ //cout << average << endl;
+ //save this windows average
+ temp.push_back(average);
+ }
+
+ //save this qav
+ averages.push_back(temp);
}
return averages;
}
}
//***************************************************************************************************************
-map<Sequence*, float> Pintail::getCoef(vector<float> prob) {
+vector<float> Pintail::getCoef(int start, int end) {
try {
- map<Sequence*, float> coefs;
+ vector<float> coefs;
+ coefs.resize(end-start);
- //find average prob
- float probAverage = 0.0;
- for (int i = 0; i < prob.size(); i++) { probAverage += prob[i]; }
- probAverage = probAverage / (float) prob.size();
-cout << "(sum of ai) / m = " << probAverage << endl;
//find a coef for each sequence
- map<Sequence*, vector<float> >::iterator it;
- for (it = obsDistance.begin(); it != obsDistance.end(); it++) {
-
- vector<float> temp = it->second;
- Sequence* tempSeq = it->first;
+ int count = 0;
+ for(int i = start; i < end; i++){
+
+ //find average prob for this seqs windows
+ float probAverage = 0.0;
+ for (int j = 0; j < Qav[i].size(); j++) { probAverage += Qav[i][j]; }
+ probAverage = probAverage / (float) Qav[i].size();
+ cout << "(sum of ai) / m = " << probAverage << endl;
+
+ vector<float> temp = obsDistance[i];
//find observed average
float obsAverage = 0.0;
- for (int i = 0; i < temp.size(); i++) { obsAverage += temp[i]; }
+ for (int j = 0; j < temp.size(); j++) { obsAverage += temp[j]; }
obsAverage = obsAverage / (float) temp.size();
-cout << tempSeq->getName() << '\t' << obsAverage << endl;
+cout << "(sum of oi) / m = " << obsAverage << endl;
float coef = obsAverage / probAverage;
-cout << tempSeq->getName() << '\t' << "coef = " << coef << endl;
+
//save this sequences coefficient
- coefs[tempSeq] = coef;
+ coefs[count] = coef;
+
+ count++;
}
}
}
+
/**************************************************************************************************/
-void Pintail::createProcessesPairs() {
+void Pintail::createProcessesSpots() {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
vector<int> processIDS;
+ vector< vector<int> > win; win.resize(querySeqs.size());
+ vector< map <int, int> > t; t.resize(querySeqs.size());
//loop through and create all the processes you want
while (process != processors) {
processIDS.push_back(pid);
process++;
}else if (pid == 0){
- findPairs(lines[process]->start, lines[process]->end);
+
+ vector<Sequence> tempbest;
+ tempbest = findPairs(lines[process]->start, lines[process]->end);
+ int count = 0;
+ for (int i = lines[process]->start; i < lines[process]->end; i++) {
+ bestfit[i] = tempbest[count];
+
+ //chops off beginning and end of sequences so they both start and end with a base
+ trimSeqs(querySeqs[i], bestfit[i], i);
+ t[i] = trim[i];
+
+ count++;
+ }
+
+
+
+ vector< vector<int> > temp = findWindows(lines[process]->start, lines[process]->end);
+
+ //move into best
+ count = 0;
+ for (int i = lines[process]->start; i < lines[process]->end; i++) {
+ win[i] = temp[count];
+ count++;
+ }
+
exit(0);
}else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
}
wait(&temp);
}
+ windows = win;
+ trim = t;
#else
- findPairs(lines[0]->start, lines[0]->end);
+ windows = findWindows(lines[0]->start, lines[0]->end);
#endif
}
catch(exception& e) {
- errorOut(e, "Pintail", "createProcessesPairs");
+ errorOut(e, "Pintail", "createProcessesSpots");
exit(1);
}
}
+
/**************************************************************************************************/
-void Pintail::createProcessesObserved() {
+void Pintail::createProcesses() {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
vector<int> processIDS;
- //loop through and create all the processes you want
- while (process != processors) {
- int pid = fork();
-
- if (pid > 0) {
- processIDS.push_back(pid);
- process++;
- }else if (pid == 0){
- calcObserved(lines[process]->start, lines[process]->end);
- exit(0);
- }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
- }
+ vector< vector<float> > exp; exp.resize(querySeqs.size());
+ vector<float> de; de.resize(querySeqs.size());
+ vector< vector<float> > obs; obs.resize(querySeqs.size());
- //force parent to wait until all the processes are done
- for (int i=0;i<processors;i++) {
- int temp = processIDS[i];
- wait(&temp);
- }
-
-#else
- calcObserved(lines[0]->start, lines[0]->end);
-
-#endif
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "createProcessesObserved");
- exit(1);
- }
-}
-
-//***************************************************************************************************************
-
-void Pintail::createProcessesExpected() {
- try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 0;
- vector<int> processIDS;
//loop through and create all the processes you want
while (process != processors) {
processIDS.push_back(pid);
process++;
}else if (pid == 0){
- calcExpected(lines[process]->start, lines[process]->end);
- exit(0);
- }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
- }
-
- //force parent to wait until all the processes are done
- for (int i=0;i<processors;i++) {
- int temp = processIDS[i];
- wait(&temp);
- }
-
-#else
- calcExpected(lines[0]->start, lines[0]->end);
+
+ vector< vector<float> > temp;
+ vector<float> tempde;
+ int count = 0;
+
+
+ temp = calcObserved(lines[process]->start, lines[process]->end);
+ count = 0;
+ for (int i = lines[process]->start; i < lines[process]->end; i++) {
+ obs[i] = temp[count];
+ count++;
+ }
-#endif
- }
- catch(exception& e) {
- errorOut(e, "Pintail", "createProcessesExpected");
- exit(1);
- }
-}
+ temp = findQav(lines[process]->start, lines[process]->end);
+ count = 0;
+ for (int i = lines[process]->start; i < lines[process]->end; i++) {
+ Qav[i] = temp[count];
+ count++;
+ }
+
+ tempde = getCoef(lines[process]->start, lines[process]->end);
+ count = 0;
+ for (int i = lines[process]->start; i < lines[process]->end; i++) {
+ seqCoef[i] = tempde[count];
+ count++;
+ }
+
+ temp = calcExpected(lines[process]->start, lines[process]->end);
+ count = 0;
+ for (int i = lines[process]->start; i < lines[process]->end; i++) {
+ exp[i] = temp[count];
+ count++;
+ }
-/**************************************************************************************************/
+
+ tempde = calcDE(lines[process]->start, lines[process]->end);
+ count = 0;
+ for (int i = lines[process]->start; i < lines[process]->end; i++) {
+ de[i] = tempde[count];
+ count++;
+ }
-void Pintail::createProcessesDE() {
- try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- int process = 0;
- vector<int> processIDS;
-
- //loop through and create all the processes you want
- while (process != processors) {
- int pid = fork();
-
- if (pid > 0) {
- processIDS.push_back(pid);
- process++;
- }else if (pid == 0){
- calcDE(lines[process]->start, lines[process]->end);
exit(0);
}else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
}
wait(&temp);
}
+ obsDistance = obs;
+ expectedDistance = exp;
+ DE = de;
+
#else
- calcDE(lines[0]->start, lines[0]->end);
+ bestfit = findPairs(lines[0]->start, lines[0]->end);
+ obsDistance = calcObserved(lines[0]->start, lines[0]->end);
+ Qav = findQav(lines[0]->start, lines[0]->end);
+ seqCoef = getCoef(lines[0]->start, lines[0]->end);
+ expectedDistance = calcExpected(lines[0]->start, lines[0]->end);
+ DE = calcDE(lines[0]->start, lines[0]->end);
#endif
}
catch(exception& e) {
- errorOut(e, "Pintail", "createProcessesDE");
+ errorOut(e, "Pintail", "createProcesses");
exit(1);
}
}
class Pintail : public Chimera {
public:
- Pintail(string);
+ Pintail(string, string);
~Pintail();
void getChimeras();
void print(ostream&);
+ void setCons(string c) { consfile = c; }
+
private:
linePair(int i, int j) : start(i), end(j) {}
};
-
- Dist* distCalculator;
- string fastafile;
+ Dist* distcalculator;
int iters;
+ string fastafile, templateFile, consfile;
vector<linePair*> lines;
vector<Sequence*> querySeqs;
vector<Sequence*> templateSeqs;
- map<Sequence*, Sequence*> bestfit; //maps a query sequence to its most similiar sequence in the template
- map<Sequence*, Sequence*>::iterator itBest;
+ vector<Sequence> bestfit; //bestfit[0] matches queryseqs[0]...
- map<Sequence*, vector<float> > obsDistance; //maps a query sequence to its observed distance at each window
- map<Sequence*, vector<float> > expectedDistance; //maps a query sequence to its expected distance at each window
- map<Sequence*, vector<float> >::iterator itObsDist;
- map<Sequence*, vector<float> >::iterator itExpDist;
+ vector< vector<float> > obsDistance; //obsDistance[0] is the vector of observed distances for queryseqs[0]...
+ vector< vector<float> > expectedDistance; //expectedDistance[0] is the vector of expected distances for queryseqs[0]...
+ vector<float> deviation; //deviation[0] is the percentage of mismatched pairs over the whole seq between querySeqs[0] and its best match.
+ vector< vector<int> > windows; // windows[0] is a vector containing the starting spot in queryseqs[0] aligned sequence for each window.
+ //this is needed so you can move by bases and not just spots in the alignment
+ vector< map<int, int> > trim; //trim[0] is the start and end position of trimmed querySeqs[0]. Used to find the variability over each sequence window.
+
+ vector<int> windowSizes; //windowSizes[0] = window size of querySeqs[0]
- vector<float> averageProbability; //Qav
- map<Sequence*, float> seqCoef; //maps a sequence to its coefficient
- map<Sequence*, float> DE; //maps a sequence to its deviation
- map<Sequence*, float>::iterator itCoef;
+ vector< vector<float> > Qav; //Qav[0] is the vector of average variablility for queryseqs[0]...
+ vector<float> seqCoef; //seqCoef[0] is the coeff for queryseqs[0]...
+ vector<float> DE; //DE[0] is the deviaation for queryseqs[0]...
+ vector<float> probabilityProfile;
vector<Sequence*> readSeqs(string);
- vector<float> findQav(vector<float>);
+ void trimSeqs(Sequence*, Sequence&, int);
+ vector<float> readFreq();
+ vector< vector<float> > findQav(int, int);
vector<float> calcFreq(vector<Sequence*>);
- map<Sequence*, float> getCoef(vector<float>);
+ vector<float> getCoef(int, int);
- void findPairs(int, int);
- void calcObserved(int, int);
- void calcExpected(int, int);
- void calcDE(int, int);
+ vector<Sequence> findPairs(int, int);
+ vector< vector<int> > findWindows(int, int);
+ vector< vector<float> > calcObserved(int, int);
+ vector< vector<float> > calcExpected(int, int);
+ vector<float> calcDE(int, int);
+ vector<float> calcDist(int, int);
- void createProcessesPairs();
- void createProcessesObserved();
- void createProcessesExpected();
- void createProcessesDE();
+ void createProcessesSpots();
+ void createProcesses();
+
+
};
//create and initialize trees to 0.
initialTree(numGroups);
- //loop through vectors calculating the f11, f1A, f2A, f1B, f2B, S12 values
+
for (int i = 0; i < shared[0]->size(); i++) {
//get bin values and calc shared
bool sharedByAll = true;
//calculate chao1, (numleaves-1) because numleaves contains the ++ values.
bool bias;
for(int i=0;i<numLeaves;i++){
- if (f2leaves[i]->lvalue == 0) { bias = true;}// break;}
+ if ((f2leaves[i]->lvalue == 0) || (f2leaves[i]->rvalue == 0)) { bias = true; }// break;}
}
if(bias){
if (i != (numLeaves-1)) {
rightvalue = (float)(f1leaves[i]->rvalue * (f1leaves[i]->rvalue - 1)) / (float)((pow(2, (float)f2leaves[i]->rcoef)) * (f2leaves[i]->rvalue + 1));
}else{
+ //add in sobs
rightvalue = (float)(f1leaves[i]->rvalue);
}
Chao += leftvalue + rightvalue;
if (i != (numLeaves-1)) {
rightvalue = (float)(f1leaves[i]->rvalue * f1leaves[i]->rvalue) / (float)((pow(2, (float)f2leaves[i]->rcoef)) * f2leaves[i]->rvalue);
}else{
+ //add in sobs
rightvalue = (float)(f1leaves[i]->rvalue);
}
Chao += leftvalue + rightvalue;
delete f2leaves[i];
}
-
+
data[0] = Chao;
return data;
}
singleCalc = new Sobs();
}else if (vCalcs[i]->getName() == "sharedchao") {
singleCalc = new Chao1();
- }else if (vCalcs[i]->getName() == "sharedace") {
- singleCalc = new Ace(10);
- }
+ }//else if (vCalcs[i]->getName() == "sharedace") {
+ //singleCalc = new Ace(10);
+ //}
//get estimates for numA
vector<double> numA = singleCalc->getValues(sabundA);
//make a file for each calculator
for(int i=0;i<vCalcs.size();i++){
- //in essence you want to run it like a single
- if (vCalcs[i]->getName() == "sharedsobs") {
- singleCalc = new Sobs();
- }else if (vCalcs[i]->getName() == "sharedchao") {
- singleCalc = new Chao1();
- }else if (vCalcs[i]->getName() == "sharedace") {
- singleCalc = new Ace(10);
- }
+
+ string filenamesvg = getRootName(globaldata->inputFileName) + lookup[0]->getLabel() + ".venn." + vCalcs[i]->getName() + ".svg";
+ openOutputFile(filenamesvg, outsvg);
- //get estimates for numA
- vector<double> numA = singleCalc->getValues(sabundA);
+ if (vCalcs[i]->getName() == "sharedace") {
+
+ singleCalc = new Ace(10);
+
+ //get estimates for numA
+ vector<double> numA = singleCalc->getValues(sabundA);
- //get estimates for numB
- vector<double> numB = singleCalc->getValues(sabundB);
+ //get estimates for numB
+ vector<double> numB = singleCalc->getValues(sabundB);
- //get estimates for numC
- vector<double> numC = singleCalc->getValues(sabundC);
+ //get estimates for numC
+ vector<double> numC = singleCalc->getValues(sabundC);
-
- string filenamesvg = getRootName(globaldata->inputFileName) + lookup[0]->getLabel() + ".venn." + vCalcs[i]->getName() + ".svg";
- openOutputFile(filenamesvg, outsvg);
- //get estimates for sharedAB, sharedAC and sharedBC
- subset.clear();
- subset.push_back(lookup[0]); subset.push_back(lookup[1]);
- vector<double> sharedAB = vCalcs[i]->getValues(subset);
-
- subset.clear();
- subset.push_back(lookup[0]); subset.push_back(lookup[2]);
- vector<double> sharedAC = vCalcs[i]->getValues(subset);
-
- subset.clear();
- subset.push_back(lookup[1]); subset.push_back(lookup[2]);
- vector<double> sharedBC = vCalcs[i]->getValues(subset);
-
- vector<double> sharedAwithBC;
- vector<double> sharedBwithAC;
- vector<double> sharedCwithAB;
-
- //find possible sharedABC values
- float sharedABC1 = 0.0; float sharedABC2 = 0.0; float sharedABC3 = 0.0; float sharedABC = 0.0;
+ //get estimates for sharedAB, sharedAC and sharedBC
+ subset.clear();
+ subset.push_back(lookup[0]); subset.push_back(lookup[1]);
+ vector<double> sharedAB = vCalcs[i]->getValues(subset);
+
+ subset.clear();
+ subset.push_back(lookup[0]); subset.push_back(lookup[2]);
+ vector<double> sharedAC = vCalcs[i]->getValues(subset);
+
+ subset.clear();
+ subset.push_back(lookup[1]); subset.push_back(lookup[2]);
+ vector<double> sharedBC = vCalcs[i]->getValues(subset);
+
+ vector<double> sharedAwithBC;
+ vector<double> sharedBwithAC;
+ vector<double> sharedCwithAB;
+
+ //find possible sharedABC values
+ float sharedABC1 = 0.0; float sharedABC2 = 0.0; float sharedABC3 = 0.0; float sharedABC = 0.0;
- if (vCalcs[i]->getMultiple() == false) {
- //merge BC and estimate with shared with A
- SharedRAbundVector* merge = new SharedRAbundVector();
- for (int j = 0; j < lookup[1]->size(); j++) {
- merge->push_back((lookup[1]->getAbundance(j) + lookup[2]->getAbundance(j)), j, "");
- }
+ if (vCalcs[i]->getMultiple() == false) {
+ //merge BC and estimate with shared with A
+ SharedRAbundVector* merge = new SharedRAbundVector();
+ for (int j = 0; j < lookup[1]->size(); j++) {
+ merge->push_back((lookup[1]->getAbundance(j) + lookup[2]->getAbundance(j)), j, "");
+ }
+
+ subset.clear();
+ subset.push_back(lookup[0]); subset.push_back(merge);
+ sharedAwithBC = vCalcs[i]->getValues(subset);
- subset.clear();
- subset.push_back(lookup[0]); subset.push_back(merge);
- sharedAwithBC = vCalcs[i]->getValues(subset);
-
- delete merge;
- //merge AC and estimate with shared with B
- merge = new SharedRAbundVector();
- for (int j = 0; j < lookup[0]->size(); j++) {
- merge->push_back((lookup[0]->getAbundance(j) + lookup[2]->getAbundance(j)), j, "");
- }
+ delete merge;
+ //merge AC and estimate with shared with B
+ merge = new SharedRAbundVector();
+ for (int j = 0; j < lookup[0]->size(); j++) {
+ merge->push_back((lookup[0]->getAbundance(j) + lookup[2]->getAbundance(j)), j, "");
+ }
+
+ subset.clear();
+ subset.push_back(merge); subset.push_back(lookup[1]);
+ sharedBwithAC = vCalcs[i]->getValues(subset);
- subset.clear();
- subset.push_back(merge); subset.push_back(lookup[1]);
- sharedBwithAC = vCalcs[i]->getValues(subset);
+ delete merge;
+ //merge AB and estimate with shared with C
+ merge = new SharedRAbundVector();
+ for (int j = 0; j < lookup[0]->size(); j++) {
+ merge->push_back((lookup[0]->getAbundance(j) + lookup[1]->getAbundance(j)), j, "");
+ }
+
+ subset.clear();
+ subset.push_back(lookup[2]); subset.push_back(merge);
+ sharedCwithAB = vCalcs[i]->getValues(subset);
+ delete merge;
+
+ sharedABC1 = sharedAB[0] + sharedAC[0] - sharedAwithBC[0];
+ sharedABC2 = sharedAB[0] + sharedBC[0] - sharedBwithAC[0];
+ sharedABC3 = sharedAC[0] + sharedBC[0] - sharedCwithAB[0];
+
+ //if any of the possible m's are - throw them out
+ if (sharedABC1 < 0.00001) { sharedABC1 = 0; }
+ if (sharedABC2 < 0.00001) { sharedABC2 = 0; }
+ if (sharedABC3 < 0.00001) { sharedABC3 = 0; }
- delete merge;
- //merge AB and estimate with shared with C
- merge = new SharedRAbundVector();
- for (int j = 0; j < lookup[0]->size(); j++) {
- merge->push_back((lookup[0]->getAbundance(j) + lookup[1]->getAbundance(j)), j, "");
+ //sharedABC is the minimum of the 3 possibilities
+ if ((sharedABC1 < sharedABC2) && (sharedABC1 < sharedABC3)) { sharedABC = sharedABC1; }
+ else if ((sharedABC2 < sharedABC1) && (sharedABC2 < sharedABC3)) { sharedABC = sharedABC2; }
+ else if ((sharedABC3 < sharedABC1) && (sharedABC3 < sharedABC2)) { sharedABC = sharedABC3; }
+ }else{
+ vector<double> data = vCalcs[i]->getValues(lookup);
+ sharedABC = data[0];
+ sharedAwithBC.push_back(sharedAB[0] + sharedAC[0] - sharedABC);
+ sharedBwithAC.push_back(sharedAB[0] + sharedBC[0] - sharedABC);
+ sharedCwithAB.push_back(sharedAC[0] + sharedBC[0] - sharedABC);
}
+
+ //image window
+ outsvg << "<svg width=\"100%\" height=\"100%\" viewBox=\"0 0 800 800\" >\n";
+ outsvg << "<g>\n";
+
+ //draw circles
+ outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"800\" height=\"800\"/>";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"265\" y=\"30\">Venn Diagram at distance " + lookup[0]->getLabel() + "</text>\n";
+ outsvg << "<circle fill=\"rgb(255,0,0)\" opacity=\".3\" stroke=\"black\" cx=\"230\" cy=\"200\" r=\"150\"/>";
+ outsvg << "<circle fill=\"rgb(0,255,0)\" opacity=\".3\" stroke=\"black\" cx=\"455\" cy=\"200\" r=\"150\"/>";
+ outsvg << "<circle fill=\"rgb(0,0,255)\" opacity=\".3\" stroke=\"black\" cx=\"343\" cy=\"400\" r=\"150\"/>";
+
+ //place labels within overlaps
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)toString(numA[0]-sharedAwithBC[0]).length() / 2)) + "\" y=\"170\">" + toString(numA[0]-sharedAwithBC[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)lookup[0]->getGroup().length() / 2)) + "\" y=\"150\">" + lookup[0]->getGroup() + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedAB[0] - sharedABC).length() / 2)) + "\" y=\"170\">" + toString(sharedAB[0] - sharedABC) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)toString(numB[0]-sharedBwithAC[0]).length() / 2)) + "\" y=\"170\">" + toString(numB[0]-sharedBwithAC[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)lookup[1]->getGroup().length() / 2)) + "\" y=\"150\">" + lookup[1]->getGroup() + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(268 - ((int)toString(sharedAC[0] - sharedABC).length() / 2)) + "\" y=\"305\">" + toString(sharedAC[0] - sharedABC) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(numC[0]-sharedCwithAB[0]).length() / 2)) + "\" y=\"430\">" + toString(numC[0]-sharedCwithAB[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)lookup[2]->getGroup().length() / 2)) + "\" y=\"410\">" + lookup[2]->getGroup() + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(408 - ((int)toString(sharedBC[0] - sharedABC).length() / 2)) + "\" y=\"305\">" + toString(sharedBC[0] - sharedABC) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedABC).length() / 2)) + "\" y=\"280\">" + toString(sharedABC) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"660\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"680\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"700\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"720\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and combined groups " + lookup[1]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedAwithBC[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"740\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedBwithAC[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"760\">The number of sepecies shared between groups " + lookup[2]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[1]->getGroup() + " is " + toString(sharedCwithAB[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"580\">The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]);
+ if (numA.size() == 3) {
+ outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]) + "</text>\n";
+ }else { outsvg << "</text>\n"; }
+
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"600\">The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]);
+ if (numB.size() == 3) {
+ outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]) + "</text>\n";
+ }else { outsvg << "</text>\n"; }
+
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"620\">The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]);
+ if (numC.size() == 3) {
+ outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]) + "</text>\n";
+ }else { outsvg << "</text>\n"; }
+
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"640\">The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedAB[0] - sharedAC[0] - sharedBC[0] + sharedABC) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"780\">The total shared richness is " + toString(sharedABC) + "</text>\n";
+
+ delete singleCalc;
+
+ }else { //sharedchao and sharedsobs are multigroup
+
+ vector<SharedRAbundVector*> subset;
+
+ //get estimates for numA
+ subset.push_back(lookup[0]);
+ vector<double> numA = vCalcs[i]->getValues(subset);
+
+ //get estimates for numB
subset.clear();
- subset.push_back(lookup[2]); subset.push_back(merge);
- sharedCwithAB = vCalcs[i]->getValues(subset);
- delete merge;
-
- sharedABC1 = sharedAB[0] + sharedAC[0] - sharedAwithBC[0];
- sharedABC2 = sharedAB[0] + sharedBC[0] - sharedBwithAC[0];
- sharedABC3 = sharedAC[0] + sharedBC[0] - sharedCwithAB[0];
-
- //if any of the possible m's are - throw them out
- if (sharedABC1 < 0.00001) { sharedABC1 = 0; }
- if (sharedABC2 < 0.00001) { sharedABC2 = 0; }
- if (sharedABC3 < 0.00001) { sharedABC3 = 0; }
-
- //sharedABC is the minimum of the 3 possibilities
- if ((sharedABC1 < sharedABC2) && (sharedABC1 < sharedABC3)) { sharedABC = sharedABC1; }
- else if ((sharedABC2 < sharedABC1) && (sharedABC2 < sharedABC3)) { sharedABC = sharedABC2; }
- else if ((sharedABC3 < sharedABC1) && (sharedABC3 < sharedABC2)) { sharedABC = sharedABC3; }
- }else{
- vector<double> data = vCalcs[i]->getValues(lookup);
- sharedABC = data[0];
- sharedAwithBC.push_back(sharedAB[0] + sharedAC[0] - sharedABC);
- sharedBwithAC.push_back(sharedAB[0] + sharedBC[0] - sharedABC);
- sharedCwithAB.push_back(sharedAC[0] + sharedBC[0] - sharedABC);
- }
-
- //image window
- outsvg << "<svg width=\"100%\" height=\"100%\" viewBox=\"0 0 800 800\" >\n";
- outsvg << "<g>\n";
+ subset.push_back(lookup[1]);
+ vector<double> numB = vCalcs[i]->getValues(subset);
+
+ //get estimates for numC
+ subset.clear();
+ subset.push_back(lookup[2]);
+ vector<double> numC = vCalcs[i]->getValues(subset);
- //draw circles
- outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"800\" height=\"800\"/>";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"265\" y=\"30\">Venn Diagram at distance " + lookup[0]->getLabel() + "</text>\n";
- outsvg << "<circle fill=\"rgb(255,0,0)\" opacity=\".3\" stroke=\"black\" cx=\"230\" cy=\"200\" r=\"150\"/>";
- outsvg << "<circle fill=\"rgb(0,255,0)\" opacity=\".3\" stroke=\"black\" cx=\"455\" cy=\"200\" r=\"150\"/>";
- outsvg << "<circle fill=\"rgb(0,0,255)\" opacity=\".3\" stroke=\"black\" cx=\"343\" cy=\"400\" r=\"150\"/>";
+ subset.clear();
+ subset.push_back(lookup[0]); subset.push_back(lookup[1]);
+ vector<double> sharedab = vCalcs[i]->getValues(subset);
+
+ subset.clear();
+ subset.push_back(lookup[0]); subset.push_back(lookup[2]);
+ vector<double> sharedac = vCalcs[i]->getValues(subset);
+
+ subset.clear();
+ subset.push_back(lookup[1]); subset.push_back(lookup[2]);
+ vector<double> sharedbc = vCalcs[i]->getValues(subset);
+
+ subset.clear();
+ subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[2]);
+ vector<double> sharedabc = vCalcs[i]->getValues(subset);
+
+ //image window
+ outsvg << "<svg width=\"100%\" height=\"100%\" viewBox=\"0 0 800 800\" >\n";
+ outsvg << "<g>\n";
- //place labels within overlaps
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)toString(numA[0]-sharedAwithBC[0]).length() / 2)) + "\" y=\"170\">" + toString(numA[0]-sharedAwithBC[0]) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)lookup[0]->getGroup().length() / 2)) + "\" y=\"150\">" + lookup[0]->getGroup() + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedAB[0] - sharedABC).length() / 2)) + "\" y=\"170\">" + toString(sharedAB[0] - sharedABC) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)toString(numB[0]-sharedBwithAC[0]).length() / 2)) + "\" y=\"170\">" + toString(numB[0]-sharedBwithAC[0]) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)lookup[1]->getGroup().length() / 2)) + "\" y=\"150\">" + lookup[1]->getGroup() + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(268 - ((int)toString(sharedAC[0] - sharedABC).length() / 2)) + "\" y=\"305\">" + toString(sharedAC[0] - sharedABC) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(numC[0]-sharedCwithAB[0]).length() / 2)) + "\" y=\"430\">" + toString(numC[0]-sharedCwithAB[0]) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)lookup[2]->getGroup().length() / 2)) + "\" y=\"410\">" + lookup[2]->getGroup() + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(408 - ((int)toString(sharedBC[0] - sharedABC).length() / 2)) + "\" y=\"305\">" + toString(sharedBC[0] - sharedABC) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedABC).length() / 2)) + "\" y=\"280\">" + toString(sharedABC) + "</text>\n";
-
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"660\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB[0]) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"680\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC[0]) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"700\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC[0]) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"720\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and combined groups " + lookup[1]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedAwithBC[0]) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"740\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedBwithAC[0]) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"760\">The number of sepecies shared between groups " + lookup[2]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[1]->getGroup() + " is " + toString(sharedCwithAB[0]) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"580\">The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]);
- if (numA.size() == 3) {
- outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]) + "</text>\n";
- }else { outsvg << "</text>\n"; }
-
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"600\">The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]);
- if (numB.size() == 3) {
- outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]) + "</text>\n";
- }else { outsvg << "</text>\n"; }
-
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"620\">The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]);
- if (numC.size() == 3) {
- outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]) + "</text>\n";
- }else { outsvg << "</text>\n"; }
+ //draw circles
+ outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"800\" height=\"800\"/>";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"265\" y=\"30\">Venn Diagram at distance " + lookup[0]->getLabel() + "</text>\n";
+ outsvg << "<circle fill=\"rgb(255,0,0)\" opacity=\".3\" stroke=\"black\" cx=\"230\" cy=\"200\" r=\"150\"/>";
+ outsvg << "<circle fill=\"rgb(0,255,0)\" opacity=\".3\" stroke=\"black\" cx=\"455\" cy=\"200\" r=\"150\"/>";
+ outsvg << "<circle fill=\"rgb(0,0,255)\" opacity=\".3\" stroke=\"black\" cx=\"343\" cy=\"400\" r=\"150\"/>";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"640\">The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedAB[0] - sharedAC[0] - sharedBC[0] + sharedABC) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"780\">The total shared richness is " + toString(sharedABC) + "</text>\n";
+ //place labels within overlaps
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)toString(numA[0]-sharedab[0]-sharedac[0]+sharedabc[0]).length() / 2)) + "\" y=\"170\">" + toString(numA[0]-sharedab[0]-sharedac[0]+sharedabc[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)lookup[0]->getGroup().length() / 2)) + "\" y=\"150\">" + lookup[0]->getGroup() + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedab[0] - sharedabc[0]).length() / 2)) + "\" y=\"170\">" + toString(sharedab[0] - sharedabc[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)toString(numB[0]-sharedab[0]-sharedbc[0]+sharedabc[0]).length() / 2)) + "\" y=\"170\">" + toString(numB[0]-sharedab[0]-sharedbc[0]+sharedabc[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)lookup[1]->getGroup().length() / 2)) + "\" y=\"150\">" + lookup[1]->getGroup() + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(268 - ((int)toString(sharedac[0] - sharedabc[0]).length() / 2)) + "\" y=\"305\">" + toString(sharedac[0] - sharedabc[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(numC[0]-sharedac[0]-sharedbc[0]+sharedabc[0]).length() / 2)) + "\" y=\"430\">" + toString(numC[0]-sharedac[0]-sharedbc[0]+sharedabc[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)lookup[2]->getGroup().length() / 2)) + "\" y=\"410\">" + lookup[2]->getGroup() + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(408 - ((int)toString(sharedbc[0] - sharedabc[0]).length() / 2)) + "\" y=\"305\">" + toString(sharedbc[0] - sharedabc[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedabc[0]).length() / 2)) + "\" y=\"280\">" + toString(sharedabc[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"660\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedab[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"680\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedac[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"700\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedbc[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"580\">The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]);
+ if (numA.size() == 3) {
+ outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]) + "</text>\n";
+ }else { outsvg << "</text>\n"; }
+
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"600\">The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]);
+ if (numB.size() == 3) {
+ outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]) + "</text>\n";
+ }else { outsvg << "</text>\n"; }
+
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"620\">The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]);
+ if (numC.size() == 3) {
+ outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]) + "</text>\n";
+ }else { outsvg << "</text>\n"; }
+
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"640\">The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedab[0] - sharedac[0] - sharedbc[0] + sharedabc[0]) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"720\">The total shared richness is " + toString(sharedabc[0]) + "</text>\n";
+
+
+ }
+
+
//close file
outsvg << "</g>\n</svg>\n";
outsvg.close();
- delete singleCalc;
+
}
//get estimate for all four
data = vCalcs[i]->getValues(lookup);
sharedABCD = data[0];
- //cout << "num abcd = " << sharedABCD << endl << endl;
+ //cout << "num abcd = " << sharedABCD << endl << endl;
+
+
+
+ //image window
+ outsvg << "<svg width=\"100%\" height=\"100%\" viewBox=\"0 0 700 800\" >\n";
+ outsvg << "<g>\n";
+ outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"700\" height=\"800\"/>";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"265\" y=\"30\">Venn Diagram at distance " + lookup[0]->getLabel() + "</text>\n";
+
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"490\">The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"510\">The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"530\">The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"550\">The number of species in group " + lookup[3]->getGroup() + " is " + toString(numD) + "</text>\n";
+
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"570\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"590\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"610\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedAD) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"630\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"650\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBD) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"670\">The number of sepecies shared between groups " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedCD) + "</text>\n";
+
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"690\">The number of sepecies shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedABC) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"710\">The number of sepecies shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedABD) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"730\">The number of sepecies shared between groups " + lookup[0]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedACD) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"750\">The number of sepecies shared between groups " + lookup[1]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBCD) + "</text>\n";
+
//make adjustments
sharedABC = sharedABC - sharedABCD;
//cout << "num abc = " << sharedABC << endl;
sharedBCD = sharedBCD - sharedABCD;
//cout << "num bcd = " << sharedBCD << endl;
- sharedAB = sharedAB - sharedABC - sharedABCD - sharedABD; // cout << "num ab = " << sharedAB << endl;
- sharedAC = sharedAC - sharedABC - sharedABCD - sharedACD; // cout << "num ac = " << sharedAC << endl;
- sharedAD = sharedAD - sharedABD - sharedABCD - sharedACD; // cout << "num ad = " << sharedAD << endl;
-
- sharedBC = sharedBC - sharedABC - sharedABCD - sharedBCD; // cout << "num bc = " << sharedBC << endl;
+ sharedAB = sharedAB - sharedABC - sharedABCD - sharedABD; //cout << "num ab = " << sharedAB << endl;
+ sharedAC = sharedAC - sharedABC - sharedABCD - sharedACD; //cout << "num ac = " << sharedAC << endl;
+ sharedAD = sharedAD - sharedABD - sharedABCD - sharedACD; //cout << "num ad = " << sharedAD << endl;
+ sharedBC = sharedBC - sharedABC - sharedABCD - sharedBCD; //cout << "num bc = " << sharedBC << endl;
sharedBD = sharedBD - sharedABD - sharedABCD - sharedBCD; // cout << "num bd = " << sharedBD << endl;
- sharedCD = sharedCD - sharedACD - sharedABCD - sharedBCD; // cout << "num cd = " << sharedCD << endl;
+ sharedCD = sharedCD - sharedACD - sharedABCD - sharedBCD; //cout << "num cd = " << sharedCD << endl;
numA = numA - sharedAB - sharedAC - sharedAD - sharedABCD - sharedABC - sharedACD - sharedABD;
//cout << "num a = " << numA << endl;
numD = numD - sharedAD - sharedBD - sharedCD - sharedABCD - sharedBCD - sharedACD - sharedABD;
//cout << "num d = " << numD << endl;
- //image window
- outsvg << "<svg width=\"100%\" height=\"100%\" viewBox=\"0 0 700 700\" >\n";
- outsvg << "<g>\n";
-
//draw circles
- outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"700\" height=\"700\"/>";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"265\" y=\"30\">Venn Diagram at distance " + lookup[0]->getLabel() + "</text>\n";
outsvg << "<ellipse fill=\"red\" stroke=\"black\" opacity=\".35\" transform=\"rotate(-45 355 215) \" cx=\"355\" cy=\"215\" rx=\"200\" ry=\"115\"/>\n ";
outsvg << "<ellipse fill=\"green\" stroke=\"black\" opacity=\".35\" transform=\"rotate(+45 355 215) \" cx=\"355\" cy=\"215\" rx=\"200\" ry=\"115\"/>\n ";
outsvg << "<ellipse fill=\"blue\" stroke=\"black\" opacity=\".35\" transform=\"rotate(-40 440 315) \" cx=\"440\" cy=\"315\" rx=\"200\" ry=\"115\"/>\n ";
//A = red, B = green, C = blue, D = yellow
//place labels within overlaps
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(460 - ((int)toString(numA).length() / 2)) + "\" y=\"110\">" + toString(numA) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(460 - ((int)toString(numA).length() / 2)) + "\" y=\"110\">" + toString(numA) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(460 - ((int)lookup[0]->getGroup().length() / 2)) + "\" y=\"90\">" + lookup[0]->getGroup() + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(350 - ((int)toString(sharedAB).length() / 2)) + "\" y=\"160\">" + toString(sharedAB) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(250 - ((int)toString(numB).length() / 2)) + "\" y=\"110\">" + toString(numB) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(250 - ((int)toString(numB).length() / 2)) + "\" y=\"110\">" + toString(numB) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(250 - ((int)lookup[1]->getGroup().length() / 2)) + "\" y=\"90\">" + lookup[1]->getGroup() + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(490 - ((int)toString(sharedAC).length() / 2)) + "\" y=\"190\">" + toString(sharedAC) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(550 - ((int)toString(numC).length() / 2)) + "\" y=\"230\">" + toString(numC) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(550 - ((int)lookup[2]->getGroup().length() / 2)) + "\" y=\"210\">" + lookup[2]->getGroup() + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(215 - ((int)toString(sharedBC).length() / 2)) + "\" y=\"190\">" + toString(sharedBC) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(215 - ((int)toString(sharedBD).length() / 2)) + "\" y=\"190\">" + toString(sharedBD) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(150 - ((int)toString(numD).length() / 2)) + "\" y=\"230\">" + toString(numD) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(150 - ((int)lookup[3]->getGroup().length() / 2)) + "\" y=\"210\">" + lookup[3]->getGroup() + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(240 - ((int)toString(sharedAD).length() / 2)) + "\" y=\"325\">" + toString(sharedAD) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(470 - ((int)toString(sharedBD).length() / 2)) + "\" y=\"325\">" + toString(sharedBD) + "</text>\n";
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(470 - ((int)toString(sharedBC).length() / 2)) + "\" y=\"325\">" + toString(sharedBC) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(350 - ((int)toString(sharedCD).length() / 2)) + "\" y=\"430\">" + toString(sharedCD) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(275 - ((int)toString(sharedABD).length() / 2)) + "\" y=\"240\">" + toString(sharedABD) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(400 - ((int)toString(sharedBCD).length() / 2)) + "\" y=\"360\">" + toString(sharedBCD) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(305 - ((int)toString(sharedACD).length() / 2)) + "\" y=\"360\">" + toString(sharedACD) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(440 - ((int)toString(sharedABC).length() / 2)) + "\" y=\"240\">" + toString(sharedABC) + "</text>\n";
outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(350 - ((int)toString(sharedABCD).length() / 2)) + "\" y=\"320\">" + toString(sharedABCD) + "</text>\n";
- outsvg << "<text fill=\"black\" class=\"seri\" x=\"250\" y=\"490\">The total richness of all the groups is " + toString((float)(numA + numB + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD)) + "</text>\n";
-
+
+
+
+ outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"770\">The total richness of all the groups is " + toString((float)(numA + numB + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD)) + "</text>\n";
+
+
outsvg << "</g>\n</svg>\n";
outsvg.close();
delete singleCalc;