From 1244c4907c07baea86b0f0676d098a29d2e95a39 Mon Sep 17 00:00:00 2001
From: westcott <westcott>
Date: Wed, 9 Sep 2009 20:20:23 +0000
Subject: [PATCH] continued work on chimeras and fixed bug in trim.seqs and
 reverse.seqs that was due to the fact that we changed the sequence class so
 that all seqs now have an aligned version.  that change was made to fix
 another bug.

---
 Mothur.xcodeproj/project.pbxproj |  12 +-
 aligncommand.cpp                 |   5 +
 alignedsimilarity.cpp            | 543 -------------------------------
 alignedsimilarity.h              |  67 ----
 alignment.cpp                    |  15 +
 alignment.hpp                    |   2 +
 ccode.cpp                        |  11 -
 ccode.h                          |   1 -
 chimera.h                        |   7 +-
 chimeracheckrdp.cpp              | 295 +++++++++++++++++
 chimeracheckrdp.h                |  72 ++++
 chimeraseqscommand.cpp           |  28 +-
 chimeraseqscommand.h             |   2 +-
 database.cpp                     |   1 +
 kmer.cpp                         |  26 ++
 kmer.hpp                         |   2 +-
 kmerdb.hpp                       |   1 +
 nast.cpp                         |  16 +-
 needlemanoverlap.cpp             |   7 +-
 needlemanoverlap.hpp             |   1 +
 sequence.cpp                     |   1 +
 trimseqscommand.cpp              |   7 +-
 22 files changed, 465 insertions(+), 657 deletions(-)
 delete mode 100644 alignedsimilarity.cpp
 delete mode 100644 alignedsimilarity.h
 create mode 100644 chimeracheckrdp.cpp
 create mode 100644 chimeracheckrdp.h
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj
index 75ef0d5..6aa6b27 100644
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -162,7 +162,7 @@
 		A70B53AA0F4CD7AD0064797E /* getgroupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A40F4CD7AD0064797E /* getgroupcommand.cpp */; };
 		A70B53AB0F4CD7AD0064797E /* getlabelcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A60F4CD7AD0064797E /* getlabelcommand.cpp */; };
 		A70B53AC0F4CD7AD0064797E /* getlinecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */; };
-		A75B887D104C16860083C454 /* alignedsimilarity.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B8879104C16860083C454 /* alignedsimilarity.cpp */; };
+		A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */; };
 		A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; };
 		EB1216880F619B83004A865F /* bergerparker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216870F619B83004A865F /* bergerparker.cpp */; };
 		EB1216E50F61ACFB004A865F /* bstick.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216E40F61ACFB004A865F /* bstick.cpp */; };
@@ -515,8 +515,8 @@
 		A70B53A70F4CD7AD0064797E /* getlabelcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlabelcommand.h; sourceTree = SOURCE_ROOT; };
 		A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlinecommand.cpp; sourceTree = SOURCE_ROOT; };
 		A70B53A90F4CD7AD0064797E /* getlinecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlinecommand.h; sourceTree = SOURCE_ROOT; };
-		A75B8879104C16860083C454 /* alignedsimilarity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignedsimilarity.cpp; sourceTree = SOURCE_ROOT; };
-		A75B887A104C16860083C454 /* alignedsimilarity.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignedsimilarity.h; sourceTree = SOURCE_ROOT; };
+		A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckrdp.h; sourceTree = "<group>"; };
+		A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckrdp.cpp; sourceTree = "<group>"; };
 		A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
 		A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; };
 		C6859E8B029090EE04C91782 /* Mothur.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = Mothur.1; sourceTree = "<group>"; };
@@ -649,12 +649,12 @@
 			children = (
 				374F2FD51006320200E97C66 /* chimera.h */,
 				372095C1103196D70004D347 /* chimera.cpp */,
-				A75B887A104C16860083C454 /* alignedsimilarity.h */,
-				A75B8879104C16860083C454 /* alignedsimilarity.cpp */,
 				374F2FE9100634B600E97C66 /* bellerophon.h */,
 				374F2FEA100634B600E97C66 /* bellerophon.cpp */,
 				A75B887C104C16860083C454 /* ccode.h */,
 				A75B887B104C16860083C454 /* ccode.cpp */,
+				A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */,
+				A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */,
 				372A55531017922B00C5194B /* decalc.h */,
 				372A55541017922B00C5194B /* decalc.cpp */,
 				374F301110063B6F00E97C66 /* pintail.h */,
@@ -1177,8 +1177,8 @@
 				374F301310063B6F00E97C66 /* pintail.cpp in Sources */,
 				372A55551017922B00C5194B /* decalc.cpp in Sources */,
 				372095C2103196D70004D347 /* chimera.cpp in Sources */,
-				A75B887D104C16860083C454 /* alignedsimilarity.cpp in Sources */,
 				A75B887E104C16860083C454 /* ccode.cpp in Sources */,
+				A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/aligncommand.cpp b/aligncommand.cpp
index 958f29b..da807bd 100644
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -272,6 +272,11 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
 		for(int i=0;i<line->numSeqs;i++){
 			
 			Sequence* candidateSeq = new Sequence(inFASTA);
+
+			if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
+				alignment->resize(candidateSeq->getUnaligned().length()+1);
+			}
+			
 			report.setCandidate(candidateSeq);
 	
 			Sequence temp = templateDB->findClosestSequence(candidateSeq);
diff --git a/alignedsimilarity.cpp b/alignedsimilarity.cpp
deleted file mode 100644
index 0ed9ce3..0000000
--- a/alignedsimilarity.cpp
+++ /dev/null
@@ -1,543 +0,0 @@
-/*
- *  alignedsimilarity.cpp
- *  Mothur
- *
- *  Created by westcott on 8/18/09.
- *  Copyright 2009 Schloss Lab. All rights reserved.
- *
- */
-
-#include "alignedsimilarity.h"
-#include "ignoregaps.h"
-
-
-//***************************************************************************************************************
-AlignSim::AlignSim(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
-//***************************************************************************************************************
-
-AlignSim::~AlignSim() {
-	try {
-		for (int i = 0; i < querySeqs.size(); i++)		{  delete querySeqs[i];		}
-		for (int i = 0; i < templateSeqs.size(); i++)	{  delete templateSeqs[i];	}
-		delete distCalc;
-	}
-	catch(exception& e) {
-		errorOut(e, "AlignSim", "~AlignSim");
-		exit(1);
-	}
-}	
-//***************************************************************************************************************
-void AlignSim::print(ostream& out) {
-	try {
-		
-		mothurOutEndLine();
-		
-		for (int i = 0; i < querySeqs.size(); i++) {
-			
-			int j = 0;  float largest = -10;
-			//find largest sim value
-			for (int k = 0; k < IS[i].size(); k++) {
-				//is this score larger
-				if (IS[i][k].score > largest) {
-					j = k;
-					largest = IS[i][k].score;
-				}
-			}
-			
-			//find parental similarity
-			distCalc->calcDist(*(IS[i][j].leftParent), *(IS[i][j].rightParent));
-			float dist = distCalc->getDist();
-			
-			//convert to similarity
-			dist = (1 - dist) * 100;
-
-			//warn about parental similarity - if its above 82% may not detect a chimera 
-			if (dist >= 82) { mothurOut("When the chimeras parental similarity is above 82%, detection rates drop signifigantly.");  mothurOutEndLine(); }
-			
-			int index = ceil(dist);
-			
-			if (index == 0) { index=1;  }
-	
-			//is your DE value higher than the 95%
-			string chimera;
-			if (IS[i][j].score > quantile[index-1][4])		{	chimera = "Yes";	}
-			else										{	chimera = "No";		}			
-			
-			out << querySeqs[i]->getName() <<  "\tparental similarity: " << dist << "\tIS: " << IS[i][j].score << "\tbreakpoint: " << IS[i][j].midpoint << "\tchimera flag: " << chimera << endl;
-			
-			if (chimera == "Yes") {
-				mothurOut(querySeqs[i]->getName() + "\tparental similarity: " + toString(dist) + "\tIS: " + toString(IS[i][j].score) + "\tbreakpoint: " + toString(IS[i][j].midpoint) + "\tchimera flag: " + chimera); mothurOutEndLine();
-			}
-			out << "Improvement Score\t";
-			
-			for (int r = 0; r < IS[i].size(); r++) {  out << IS[i][r].score << '\t';  }
-			out << endl;
-		}
-	}
-	catch(exception& e) {
-		errorOut(e, "AlignSim", "print");
-		exit(1);
-	}
-}
-
-//***************************************************************************************************************
-void AlignSim::getChimeras() {
-	try {
-		
-		//read in query sequences and subject sequences
-		mothurOut("Reading sequences and template file... "); cout.flush();
-		querySeqs = readSeqs(fastafile);
-		templateSeqs = readSeqs(templateFile);
-		mothurOut("Done."); mothurOutEndLine();
-		
-		int numSeqs = querySeqs.size();
-		
-		IS.resize(numSeqs);
-		
-		//break up file if needed
-		int linesPerProcess = numSeqs / processors ;
-		
-		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-			//find breakup of sequences for all times we will Parallelize
-			if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
-			else {
-				//fill line pairs
-				for (int i = 0; i < (processors-1); i++) {			
-					lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
-				}
-				//this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
-				int i = processors - 1;
-				lines.push_back(new linePair((i*linesPerProcess), numSeqs));
-			}
-			
-			//find breakup of templatefile for quantiles
-			if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
-			else { 
-				for (int i = 0; i < processors; i++) {
-					templateLines.push_back(new linePair());
-					templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
-					templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
-				}
-			}
-		#else
-			lines.push_back(new linePair(0, numSeqs));
-			templateLines.push_back(new linePair(0, templateSeqs.size()));
-		#endif
-	
-	
-		distCalc = new ignoreGaps();
-		
-		//find window breaks
-		windowBreak = findWindows();
-//for (int i = 0; i < windowBreak.size(); i++) { cout << windowBreak[i] << '\t';  }
-//cout << endl;
-
-		mothurOut("Finding the IS values for your sequences..."); cout.flush();
-		if (processors == 1) { 
-			IS = findIS(lines[0]->start, lines[0]->end, querySeqs);
-		}else {		IS = createProcessesIS(querySeqs, lines);		}
-		mothurOut("Done."); mothurOutEndLine();
-		
-		//quantiles are used to determine whether the de values found indicate a chimera
-		if (quanfile != "") {  quantile = readQuantiles();  }
-		else {
-			
-			mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .alignedsimilarity.quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
-			//get IS quantiles as a reference to these IS scores
-			//you are assuming the template is free of chimeras and therefore will give you a baseline as to the scores you would like to see
-			if (processors == 1) { 
-				templateIS = findIS(templateLines[0]->start, templateLines[0]->end, templateSeqs);
-			}else {		templateIS = createProcessesIS(templateSeqs, templateLines);		}
-//cout << "here" << endl;			
-			ofstream out4;
-			string o;
-			
-			o = getRootName(templateFile) + "alignedsimilarity.quan";
-			
-			openOutputFile(o, out4);
-			
-			//adjust quantiles
-			//construct table
-			quantile.resize(100);
-		
-			for (int i = 0; i < templateIS.size(); i++) {
-				
-				if (templateIS[i].size() != 0) {
-					int j = 0; float score = -1000;
-					//find highest IS score
-	//cout << templateIS[i].size() << endl;
-					for (int k = 0; k < templateIS[i].size(); k++) {
-						if (templateIS[i][k].score > score) {
-							score = templateIS[i][k].score;
-							j = k;
-						}
-					}
-	//cout << j << endl;		
-					//find similarity of parents
-					distCalc->calcDist(*(templateIS[i][j].leftParent), *(templateIS[i][j].rightParent));
-					float dist = distCalc->getDist();
-			
-					//convert to similarity
-					dist = (1 - dist) * 100;
-	//	cout << dist << endl;	
-					int index = ceil(dist);
-	//	cout << "index = " << index << endl;	
-					if (index == 0) {	index = 1;	}
-					quantile[index-1].push_back(templateIS[i][j].score);
-				}
-			}
-		
-		
-			for (int i = 0; i < quantile.size(); i++) {
-				vector<float> temp;
-				
-				if (quantile[i].size() == 0) {
-					//in case this is not a distance found in your template files
-					for (int g = 0; g < 6; g++) {
-						temp.push_back(0.0);
-					}
-				}else{
-					
-					sort(quantile[i].begin(), quantile[i].end());
-					
-					//save 10%
-					temp.push_back(quantile[i][int(quantile[i].size() * 0.10)]);
-					//save 25%
-					temp.push_back(quantile[i][int(quantile[i].size() * 0.25)]);
-					//save 50%
-					temp.push_back(quantile[i][int(quantile[i].size() * 0.5)]);
-					//save 75%
-					temp.push_back(quantile[i][int(quantile[i].size() * 0.75)]);
-					//save 95%
-					temp.push_back(quantile[i][int(quantile[i].size() * 0.95)]);
-					//save 99%
-					temp.push_back(quantile[i][int(quantile[i].size() * 0.99)]);
-					
-				}
-				
-				//output quan value
-				out4 << i+1 << '\t';				
-				for (int u = 0; u < temp.size(); u++) {   out4 << temp[u] << '\t'; }
-				out4 << endl;
-				
-				quantile[i] = temp;
-				
-			}
-			
-			out4.close();
-			
-			mothurOut("Done."); mothurOutEndLine();
-
-		}
-		
-		//free memory
-		for (int i = 0; i < lines.size(); i++)					{	delete lines[i];				}
-		for (int i = 0; i < templateLines.size(); i++)			{	delete templateLines[i];		}
-			
-	}
-	catch(exception& e) {
-		errorOut(e, "AlignSim", "getChimeras");
-		exit(1);
-	}
-}
-//***************************************************************************************************************
-vector<int> AlignSim::findWindows() {
-	try {
-		
-		vector<int> win; 
-		
-		if (increment > querySeqs[0]->getAligned().length()) {  mothurOut("You have selected an increment larger than the length of your sequences.  I will use the default of 25.");  increment = 25; }
-		
-		for (int m = increment;  m < (querySeqs[0]->getAligned().length() - increment); m+=increment) {  win.push_back(m);  }
-
-		return win;
-	
-	}
-	catch(exception& e) {
-		errorOut(e, "AlignSim", "findWindows");
-		exit(1);
-	}
-}
-
-//***************************************************************************************************************
-vector< vector<sim>  > AlignSim::findIS(int start, int end, vector<Sequence*> seqs) {
-	try {
-		
-		vector< vector<sim> >  isValues;
-		isValues.resize(seqs.size());
-		
-		//for each sequence
-		for(int i = start; i < end; i++){
-			
-			vector<sim> temp;  temp.resize(windowBreak.size());
-			
-			mothurOut("Finding IS value for sequence " + toString(i)); mothurOutEndLine();
-			
-			//for each window
-			for (int m = 0; m < windowBreak.size(); m++) {
-				
-				vector<Sequence*> closest;  //left, right, overall
-				vector<int> similarity; //left+right and overall
-				
-				//find closest left, closest right and closest overall
-				closest = findClosestSides(seqs[i], windowBreak[m], similarity, i);
-				
-				int totalNumBases = seqs[i]->getUnaligned().length();
-				
-				//IS = left+right-overall
-				float is = ((similarity[0]+similarity[1]) / (float) totalNumBases) - (similarity[2] / (float) totalNumBases);
-				
-				///save IS, leftparent, rightparent, breakpoint
-				temp[m].leftParent = closest[0];
-				temp[m].rightParent = closest[1];
-				temp[m].score = is;
-				temp[m].midpoint = windowBreak[m];
-//cout << is << '\t';
-			}
-//cout << endl;			
-			isValues[i] = temp;
-		
-		}
-		
-		return isValues;
-	
-	}
-	catch(exception& e) {
-		errorOut(e, "AlignSim", "findIS");
-		exit(1);
-	}
-}
-//***************************************************************************************************************
-vector<Sequence*> AlignSim::findClosestSides(Sequence* seq, int breakpoint, vector<int>& sim, int i) {
-	try{
-	
-		vector<Sequence*> closest;
-		
-		Sequence query, queryLeft, queryRight;
-		string frag = seq->getAligned();
-		string fragLeft = frag.substr(0, breakpoint);
-		string fragRight = frag.substr(breakpoint, frag.length());
-		
-		//get pieces
-		query = *(seq);  
-		queryLeft = *(seq);  
-		queryRight = *(seq);
-		queryLeft.setAligned(fragLeft);
-		queryRight.setAligned(fragRight);
-		
-		//initialize
-		Sequence* overall = templateSeqs[0];
-		Sequence* left = templateSeqs[0];
-		Sequence* right = templateSeqs[0];
-		
-		float smallestOverall, smallestLeft, smallestRight;
-		smallestOverall = 1000;  smallestLeft = 1000;  smallestRight = 1000;
-		
-		//go through the templateSeqs and search for the closest
-		for(int j = 0; j < templateSeqs.size(); j++){
-			
-			//so you don't pick yourself
-			if (j != i) {
-				Sequence temp, tempLeft, tempRight;
-				string fragTemp = templateSeqs[j]->getAligned();
-				string fragTempLeft = fragTemp.substr(0, breakpoint);
-				string fragTempRight = fragTemp.substr(breakpoint, fragTemp.length());
-				
-				//get pieces
-				temp = *(templateSeqs[j]); 
-				tempLeft = *(templateSeqs[j]); 
-				tempRight = *(templateSeqs[j]);
-				tempLeft.setAligned(fragTempLeft);
-				tempRight.setAligned(fragTempRight);
-				
-				//find overall dist
-				distCalc->calcDist(query, temp);
-				float dist = distCalc->getDist();	
-				
-				if (dist < smallestOverall) { 
-					overall = templateSeqs[j];
-					smallestOverall = dist;
-				}
-				
-				//find left dist
-				distCalc->calcDist(queryLeft, tempLeft);
-				dist = distCalc->getDist();
-				
-				if (dist < smallestLeft) { 
-					left = templateSeqs[j];
-					smallestLeft = dist;
-				}
-				
-				//find left dist
-				distCalc->calcDist(queryRight, tempRight);
-				dist = distCalc->getDist();
-				
-				if (dist < smallestRight) { 
-					right = templateSeqs[j];
-					smallestRight = dist;
-				}
-			}
-
-		}
-			
-		closest.push_back(left);
-		closest.push_back(right);
-		closest.push_back(overall);
-		
-		//fill sim with number of matched bases
-		Sequence tempL = *(left);
-		string tempLFrag = tempL.getAligned();
-		tempL.setAligned(tempLFrag.substr(0, breakpoint));
-		
-		int bothMatches = findNumMatchedBases(queryLeft, tempL);
-		sim.push_back(bothMatches);
-
-		Sequence tempR = *(right);
-		string tempRFrag = tempR.getAligned();
-		tempR.setAligned(tempRFrag.substr(breakpoint, tempRFrag.length()));
-		
-		bothMatches = findNumMatchedBases(queryRight, tempR);
-		sim.push_back(bothMatches);
-
-		bothMatches = findNumMatchedBases(query, *(overall));
-		sim.push_back(bothMatches);
-			
-		return closest;
-
-
-	}
-	catch(exception& e) {
-		errorOut(e, "AlignSim", "findClosestSides");
-		exit(1);
-	}
-}
-//***************************************************************************************************************
-
-int AlignSim::findNumMatchedBases(Sequence seq, Sequence temp) {
-	try{
-	
-		int num = 0;
-		
-		string query = seq.getAligned();
-		string subject = temp.getAligned();
-//cout << seq.getName() << endl << endl;
-//cout << temp.getName()  << endl << endl;
-		
-		for (int i = 0; i < query.length(); i++) {
-			//count matches if query[i] is a base and subject is same bases or gap
-			if (isalpha(query[i])) {
-				if(!isalpha(subject[i])) {  num++;  } 
-				else if (query[i] == subject[i])  { num++;  }
-				else {  }
-			}
-		}
-//cout << "num = " << num << endl;		
-		return num;
-	}
-	catch(exception& e) {
-		errorOut(e, "AlignSim", "findNumMatchedBases");
-		exit(1);
-	}
-}
-
-/**************************************************************************************************/
-vector< vector<sim> > AlignSim::createProcessesIS(vector<Sequence*> seqs, vector<linePair*> linesToProcess) {
-	try {
-	vector< vector<sim> > localIs; localIs.resize(seqs.size());
-	
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-		int process = 0;
-		vector<int> processIDS;
-		
-		//loop through and create all the processes you want
-		while (process != processors) {
-			int pid = fork();
-			
-			if (pid > 0) {
-				processIDS.push_back(pid);  
-				process++;
-			}else if (pid == 0){
-				
-				mothurOut("Finding IS values for sequences " + toString(linesToProcess[process]->start) + " to " + toString(linesToProcess[process]->end)); mothurOutEndLine();
-				localIs = findIS(linesToProcess[process]->start, linesToProcess[process]->end, seqs);
-				mothurOut("Done finding IS values for sequences " +  toString(linesToProcess[process]->start) + " to " + toString(linesToProcess[process]->end)); mothurOutEndLine();
-				
-				//write out data to file so parent can read it
-				ofstream out;
-				string s = toString(getpid()) + ".temp";
-				openOutputFile(s, out);
-				
-				//output pairs
-				for (int i = linesToProcess[process]->start; i < linesToProcess[process]->end; i++) {
-					 out << localIs[i].size() << endl;
-					 for (int j = 0; j < localIs[i].size(); j++) {
-						localIs[i][j].leftParent->printSequence(out);
-						localIs[i][j].rightParent->printSequence(out);
-						out << ">" << '\t' << localIs[i][j].score << '\t' << localIs[i][j].midpoint << endl;
-					 }
-				}
-				out.close();
-				
-				exit(0);
-			}else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-		}
-		
-		//force parent to wait until all the processes are done
-		for (int i=0;i<processors;i++) { 
-			int temp = processIDS[i];
-			wait(&temp);
-		}
-		
-		//get data created by processes
-		for (int i=0;i<processors;i++) { 
-			ifstream in;
-			string s = toString(processIDS[i]) + ".temp";
-			openInputFile(s, in);
-			
-			//get pairs
-			for (int k = linesToProcess[i]->start; k < linesToProcess[i]->end; k++) {
-				int size;
-				in >> size;
-				gobble(in);
-				
-				vector<sim> tempVector;
-				
-				for (int j = 0; j < size; j++) {
-				
-					sim temp;
-					
-					temp.leftParent = new Sequence(in);
-					gobble(in);
-	//temp.leftParent->printSequence(cout);				
-					temp.rightParent = new Sequence(in);
-					gobble(in);
-//temp.rightParent->printSequence(cout);
-					string throwaway;
-					in >> throwaway >> temp.score >> temp.midpoint;
-	//cout << temp.score << '\t' << temp.midpoint << endl;				
-					gobble(in);
-					
-					tempVector.push_back(temp);
-				}
-				
-				localIs[k] = tempVector;
-			}
-			
-			in.close();
-			remove(s.c_str());
-		}
-			
-	
-#else
-		localIs = findIS(linesToProcess[0]->start, linesToProcess[0]->end, seqs);
-#endif	
-
-		return localIs;
-	}
-	catch(exception& e) {
-		errorOut(e, "AlignSim", "createProcessesIS");
-		exit(1);
-	}
-}
-
-//***************************************************************************************************************
diff --git a/alignedsimilarity.h b/alignedsimilarity.h
deleted file mode 100644
index 716b8b7..0000000
--- a/alignedsimilarity.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifndef ALIGNSIM_H
-#define ALIGNSIM_H
-
-/*
- *  alignedsimilarity.h
- *  Mothur
- *
- *  Created by westcott on 8/18/09.
- *  Copyright 2009 Schloss Lab. All rights reserved.
- *
- */
-
-
-#include "chimera.h"
-#include "dist.h"
-
-//This class was created using the algorythms described in the 
-//"Evaluation of Nearest Neighbor Methods for Detection of Chimeric Small-Subunit rRna Sequences" paper 
-//by J.F. Robison-Cox 1, M.M. Bateson 2 and D.M. Ward 2
-
-
-/***********************************************************/
-
-class AlignSim : public Chimera {
-	
-	public:
-		AlignSim(string, string);	
-		~AlignSim();
-		
-		void getChimeras();
-		void print(ostream&);
-		
-		void setCons(string){};
-		void setQuantiles(string q) { quanfile = q; };
-		
-		
-	private:
-		
-		Dist* distCalc;
-		vector<linePair*> lines;
-		vector<linePair*> templateLines;
-		
-		vector<Sequence*> querySeqs;
-		vector<Sequence*> templateSeqs;
-		
-		vector< vector<sim> > IS;  //IS[0] is the vector os sim values for each window for querySeqs[0]
-		vector< vector<sim> > templateIS;  //templateIS[0] is the vector os sim values for each window for templateSeqs[0]
-		vector<int> windowBreak;
-		
-		string fastafile, templateFile;
-		int iters;
-		
-		vector< vector<sim> > findIS(int, int, vector<Sequence*>);
-		vector<Sequence*> findClosestSides(Sequence*, int, vector<int>&, int);
-		int findNumMatchedBases(Sequence, Sequence);
-		vector<int> findWindows();
-		
-		vector< vector<float> > quantile;
-		
-		vector< vector<sim> > createProcessesIS(vector<Sequence*>, vector<linePair*>);
-		
-};
-
-/***********************************************************/
-
-#endif
-
diff --git a/alignment.cpp b/alignment.cpp
index 25b2760..2734c38 100644
--- a/alignment.cpp
+++ b/alignment.cpp
@@ -31,7 +31,22 @@ Alignment::Alignment(int A) : nCols(A), nRows(A) {
 		exit(1);
 	}
 }
+/**************************************************************************************************/
+void Alignment::resize(int A) {
+	try {
+		nCols = A;
+		nRows = A;
 
+		alignment.resize(nRows);			
+		for(int i=0;i<nRows;i++){			
+			alignment[i].resize(nCols);		
+		}	
+	}
+	catch(exception& e) {
+		errorOut(e, "Alignment", "resize");
+		exit(1);
+	}
+}
 /**************************************************************************************************/
 
 void Alignment::traceBack(){			//	This traceback routine is used by the dynamic programming algorithms
diff --git a/alignment.hpp b/alignment.hpp
index 77b650c..6dd7f0a 100644
--- a/alignment.hpp
+++ b/alignment.hpp
@@ -36,6 +36,8 @@ public:
 	int getTemplateEndPos();
 	
 	int getPairwiseLength();
+	void resize(int);
+	int getnRows() { return nRows; }
 //	int getLongestTemplateGap();
 
 protected:
diff --git a/ccode.cpp b/ccode.cpp
index ec2e355..0f1dd50 100644
--- a/ccode.cpp
+++ b/ccode.cpp
@@ -177,18 +177,8 @@ void Ccode::getChimeras() {
 				lines.push_back(new linePair((i*linesPerProcess), numSeqs));
 			}
 			
-			//find breakup of templatefile for quantiles
-			if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
-			else { 
-				for (int i = 0; i < processors; i++) {
-					templateLines.push_back(new linePair());
-					templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
-					templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
-				}
-			}
 		#else
 			lines.push_back(new linePair(0, numSeqs));
-			templateLines.push_back(new linePair(0, templateSeqs.size()));
 		#endif
 	
 		distCalc = new eachGapDist();
@@ -305,7 +295,6 @@ void Ccode::getChimeras() {
 		
 		//free memory
 		for (int i = 0; i < lines.size(); i++)					{	delete lines[i];				}
-		for (int i = 0; i < templateLines.size(); i++)			{	delete templateLines[i];		}
 		delete distCalc;
 		delete decalc;
 			
diff --git a/ccode.h b/ccode.h
index 2888aea..bc1aad8 100644
--- a/ccode.h
+++ b/ccode.h
@@ -42,7 +42,6 @@ class Ccode : public Chimera {
 		
 		
 		vector<linePair*> lines;
-		vector<linePair*> templateLines;
 		vector<Sequence*> querySeqs;
 		vector<Sequence*> templateSeqs;
 		vector< map<int, int> > spotMap;
diff --git a/chimera.h b/chimera.h
index 366068b..8d7e08d 100644
--- a/chimera.h
+++ b/chimera.h
@@ -39,8 +39,8 @@ inline bool compareSeqDist(SeqDist left, SeqDist right){
 //********************************************************************************************************************
 
 struct sim {
-		Sequence* leftParent;
-		Sequence* rightParent; 
+		string leftParent;
+		string rightParent; 
 		float score;  
 		int midpoint;
 };
@@ -68,6 +68,7 @@ class Chimera {
 		virtual void setWindow(int w)			{	window = w;			}
 		virtual void setIncrement(int i)		{	increment = i;		}
 		virtual void setNumWanted(int n)		{	numWanted = n;		}
+		virtual void setKmerSize(int k)			{	kmerSize = k;		}
 		
 		virtual void setCons(string){};
 		virtual void setQuantiles(string){};
@@ -85,7 +86,7 @@ class Chimera {
 	protected:
 		
 		bool filter, correction;
-		int processors, window, increment, numWanted;
+		int processors, window, increment, numWanted, kmerSize;
 		string seqMask, quanfile, filterString;
 			
 
diff --git a/chimeracheckrdp.cpp b/chimeracheckrdp.cpp
new file mode 100644
index 0000000..73bc1a0
--- /dev/null
+++ b/chimeracheckrdp.cpp
@@ -0,0 +1,295 @@
+/*
+ *  chimeracheckrdp.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 9/8/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimeracheckrdp.h"
+		
+//***************************************************************************************************************
+ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
+//***************************************************************************************************************
+
+ChimeraCheckRDP::~ChimeraCheckRDP() {
+	try {
+		for (int i = 0; i < querySeqs.size(); i++)		{  delete querySeqs[i];		}
+		delete templateDB;
+		delete kmer;
+	}
+	catch(exception& e) {
+		errorOut(e, "ChimeraCheckRDP", "~AlignSim");
+		exit(1);
+	}
+}	
+//***************************************************************************************************************
+void ChimeraCheckRDP::print(ostream& out) {
+	try {
+		
+		mothurOutEndLine();
+	/*	
+		for (int i = 0; i < querySeqs.size(); i++) {
+			
+			int j = 0;  float largest = -10;
+			//find largest sim value
+			for (int k = 0; k < IS[i].size(); k++) {
+				//is this score larger
+				if (IS[i][k].score > largest) {
+					j = k;
+					largest = IS[i][k].score;
+				}
+			}
+			
+			//find parental similarity
+			distCalc->calcDist(*(IS[i][j].leftParent), *(IS[i][j].rightParent));
+			float dist = distCalc->getDist();
+			
+			//convert to similarity
+			dist = (1 - dist) * 100;
+
+			//warn about parental similarity - if its above 82% may not detect a chimera 
+			if (dist >= 82) { mothurOut("When the chimeras parental similarity is above 82%, detection rates drop signifigantly.");  mothurOutEndLine(); }
+			
+			int index = ceil(dist);
+			
+			if (index == 0) { index=1;  }
+	
+			//is your DE value higher than the 95%
+			string chimera;
+			if (IS[i][j].score > quantile[index-1][4])		{	chimera = "Yes";	}
+			else										{	chimera = "No";		}			
+			
+			out << querySeqs[i]->getName() <<  "\tparental similarity: " << dist << "\tIS: " << IS[i][j].score << "\tbreakpoint: " << IS[i][j].midpoint << "\tchimera flag: " << chimera << endl;
+			
+			if (chimera == "Yes") {
+				mothurOut(querySeqs[i]->getName() + "\tparental similarity: " + toString(dist) + "\tIS: " + toString(IS[i][j].score) + "\tbreakpoint: " + toString(IS[i][j].midpoint) + "\tchimera flag: " + chimera); mothurOutEndLine();
+			}
+			out << "Improvement Score\t";
+			
+			for (int r = 0; r < IS[i].size(); r++) {  out << IS[i][r].score << '\t';  }
+			out << endl;
+		}*/
+	}
+	catch(exception& e) {
+		errorOut(e, "ChimeraCheckRDP", "print");
+		exit(1);
+	}
+}
+
+//***************************************************************************************************************
+void ChimeraCheckRDP::getChimeras() {
+	try {
+		
+		//read in query sequences and subject sequences
+		mothurOut("Reading sequences and template file... "); cout.flush();
+		querySeqs = readSeqs(fastafile);
+		//templateSeqs = readSeqs(templateFile);
+		templateDB = new KmerDB(templateFile, kmerSize);
+		mothurOut("Done."); mothurOutEndLine();
+		
+		int numSeqs = querySeqs.size();
+		
+		IS.resize(numSeqs);
+		closest.resize(numSeqs);
+		
+		//break up file if needed
+		int linesPerProcess = numSeqs / processors ;
+		
+		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+			//find breakup of sequences for all times we will Parallelize
+			if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
+			else {
+				//fill line pairs
+				for (int i = 0; i < (processors-1); i++) {			
+					lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
+				}
+				//this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
+				int i = processors - 1;
+				lines.push_back(new linePair((i*linesPerProcess), numSeqs));
+			}
+			
+		#else
+			lines.push_back(new linePair(0, numSeqs));
+		#endif
+		
+		kmer = new Kmer(kmerSize);
+		
+		//find closest seq to each querySeq
+		for (int i = 0; i < querySeqs.size(); i++) {
+			closest[i] = templateDB->findClosestSequence(querySeqs[i]);  
+		}
+		
+		//fill seqKmerInfo for query seqs
+		for (int i = 0; i < querySeqs.size(); i++) {
+			seqKmerInfo[querySeqs[i]->getName()] = kmer->getKmerCounts(querySeqs[i]->getUnaligned());
+		}
+		
+		//fill seqKmerInfo for closest
+		for (int i = 0; i < closest.size(); i++) {
+			seqKmerInfo[closest[i].getName()] = kmer->getKmerCounts(closest[i].getUnaligned());
+		}
+
+		
+		//for each query find IS value - this should be paralellized, 
+		//but paralellizing may cause you to have to recalculate some seqKmerInfo since the separate processes don't share memory after they split
+		for (int i = 0; i < querySeqs.size(); i++) {
+			IS[i] = findIS(i);  //fills seqKmerInfo
+		}
+		
+		
+		//free memory
+		for (int i = 0; i < lines.size(); i++)					{	delete lines[i];	}
+	
+			
+	}
+	catch(exception& e) {
+		errorOut(e, "ChimeraCheckRDP", "getChimeras");
+		exit(1);
+	}
+}
+//***************************************************************************************************************
+vector<sim> ChimeraCheckRDP::findIS(int query) {
+	try {
+		
+		vector<sim>  isValues;
+		string queryName = querySeqs[query]->getName();
+		string seq = querySeqs[query]->getUnaligned();
+		
+		mothurOut("Finding IS values for sequence " + query); mothurOutEndLine();
+		
+		//find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each
+		int nTotal = calcKmers(seqKmerInfo[queryName][(seqKmerInfo[queryName].size()-1)], seqKmerInfo[closest[query].getName()][(seqKmerInfo[closest[query].getName()].size()-1)]);
+		
+		//you don't want the starting point to be virtually at hte end so move it in 10%
+		int start = seq.length() / 10;
+			
+		//for each window
+		for (int m = start; m < (seq.length() - start); m+=increment) {
+			
+			sim temp;
+			
+			string fragLeft = seq.substr(0, m);  //left side of breakpoint
+			string fragRight = seq.substr(m, seq.length());  //right side of breakpoint
+			
+			//make a sequence of the left side and right side
+			Sequence* left = new Sequence(queryName, fragLeft);
+			Sequence* right = new Sequence(queryName, fragRight);
+			
+			//find seqs closest to each fragment
+			Sequence closestLeft = templateDB->findClosestSequence(left); 
+			Sequence closestRight = templateDB->findClosestSequence(right); 
+			
+			map<int, int>::iterator itleft;
+			map<int, int>::iterator itleftclose;
+			
+			//get kmer in the closest seqs
+			//if it's not found calc kmer info and save, otherwise use already calculated data
+			//left
+			it = seqKmerInfo.find(closestLeft.getName());
+			if (it == seqKmerInfo.end()) {  //you have to calc it
+				seqKmerInfo[closestLeft.getName()] = kmer->getKmerCounts(closestLeft.getUnaligned());
+			}
+			
+			//right
+			it = seqKmerInfo.find(closestRight.getName());
+			if (it == seqKmerInfo.end()) {  //you have to calc it
+				seqKmerInfo[closestRight.getName()] = kmer->getKmerCounts(closestRight.getUnaligned());
+			}
+			
+			//right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side
+			//iterate through left sides map to subtract the number of times you saw things before you got the the right side
+			map<int, int> rightside;
+			for (itleft = seqKmerInfo[queryName][m-kmerSize].begin(); itleft != seqKmerInfo[queryName][m-kmerSize].end(); itleft++) {
+				int howManyTotal = seqKmerInfo[queryName][seqKmerInfo[queryName].size()-1][itleft->first];   //times that kmer was seen in total
+				
+				//itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
+				int howmanyright = howManyTotal - itleft->second;
+				
+				//if any were seen just on the right add that ammount to map
+				if (howmanyright > 0) {
+					rightside[itleft->first] = howmanyright;
+				}
+			}
+			
+			//iterate through left side of the seq closest to the right fragment of query to subtract the number you saw before you reached the right side of the closest right
+			//this way you can get the map for just the fragment you want to compare and not hte whole sequence
+			map<int, int> rightsideclose;
+			for (itleftclose = seqKmerInfo[closestRight.getName()][m-kmerSize].begin(); itleftclose != seqKmerInfo[closestRight.getName()][m-kmerSize].end(); itleftclose++) {
+				int howManyTotal = seqKmerInfo[closestRight.getName()][seqKmerInfo[closestRight.getName()].size()-1][itleftclose->first];   //times that kmer was seen in total
+				
+				//itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
+				int howmanyright = howManyTotal - itleftclose->second;
+				
+				//if any were seen just on the right add that ammount to map
+				if (howmanyright > 0) {
+					rightsideclose[itleftclose->first] = howmanyright;
+				}
+			}
+
+			int nLeft = calcKmers(seqKmerInfo[closestLeft.getName()][m-kmerSize], seqKmerInfo[queryName][m-kmerSize]);
+			int nRight = calcKmers(rightsideclose, rightside);
+			
+			int is = nLeft + nRight - nTotal;
+			
+			//save IS, leftparent, rightparent, breakpoint
+			temp.leftParent = closestLeft.getName();
+			temp.rightParent = closestRight.getName();
+			temp.score = is;
+			temp.midpoint = m;
+			
+			isValues.push_back(temp);
+			
+			delete left;
+			delete right;
+		}	
+		
+		return isValues;
+	
+	}
+	catch(exception& e) {
+		errorOut(e, "ChimeraCheckRDP", "findIS");
+		exit(1);
+	}
+}
+
+//***************************************************************************************************************
+//find the smaller map and iterate through it and count kmers in common
+int ChimeraCheckRDP::calcKmers(map<int, int> query, map<int, int> subject) {
+	try{
+		
+		int common = 0;
+		map<int, int>::iterator small;
+		map<int, int>::iterator large;
+		
+		if (query.size() < subject.size()) {
+		
+			for (small = query.begin(); small != query.end(); small++) {
+				large = subject.find(small->first);
+				
+				//if you found it they have that kmer in common
+				if (large != subject.end()) {	common++;	}
+			}
+			
+		}else { 
+		 
+			for (small = subject.begin(); small != subject.end(); small++) {
+				large = query.find(small->first);
+				
+				//if you found it they have that kmer in common
+				if (large != query.end()) {		common++;	}
+			}
+		}
+		
+		return common;
+		
+	}
+	catch(exception& e) {
+		errorOut(e, "ChimeraCheckRDP", "calcKmers");
+		exit(1);
+	}
+}
+
+//***************************************************************************************************************
+
diff --git a/chimeracheckrdp.h b/chimeracheckrdp.h
new file mode 100644
index 0000000..e31cae7
--- /dev/null
+++ b/chimeracheckrdp.h
@@ -0,0 +1,72 @@
+#ifndef CHIMERACHECK_H
+#define CHIMERACHECK_H
+
+/*
+ *  chimeracheckrdp.h
+ *  Mothur
+ *
+ *  Created by westcott on 9/8/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+#include "chimera.h"
+#include "kmer.hpp"
+#include "kmerdb.hpp"
+#include "database.hpp"
+
+//This class was created using the algorythms described in 
+//CHIMERA_CHECK version 2.7 written by Niels Larsen. 
+
+/***********************************************************/
+
+class ChimeraCheckRDP : public Chimera {
+	
+	public:
+		ChimeraCheckRDP(string, string);	
+		~ChimeraCheckRDP();
+		
+		void getChimeras();
+		void print(ostream&);
+		
+		void setCons(string){};
+		void setQuantiles(string q) {};
+		
+		
+	private:
+		
+		vector<linePair*> lines;
+		vector<Sequence*> querySeqs;
+		Database* templateDB;
+		Kmer* kmer;
+		
+		vector< vector<sim> > IS;  //IS[0] is the vector of IS values for each window for querySeqs[0]
+		
+		//map of vector of maps- I know its a little convaluted but I am trying to save time 
+		//I think that since the window is only sliding 10 bases there is a good probability that the closest seq to each fragment
+		//will be the same for several windows so I want to save the vector of maps containing its kmer info rather than regenerating it.
+		//So...
+		map<string, vector< map<int, int> > > seqKmerInfo;  // outer map - sequence name -> kmer info 
+															// kmer info: inner vector of maps - each entry in the vector is a map of the kmers up to that spot in the unaligned seq
+															//example:  seqKmerInfo["ecoli"][50] = map containing the kmers found in the first 50 + kmersize characters of ecoli.
+															//i chose to store the kmers numbers in a map so you wouldn't have to check for dupilcate entries and could easily find the 
+															//kmers 2 seqs had in common.  There may be a better way to do this thats why I am leaving so many comments...
+		map<string, vector< map<int, int> > >:: iterator it;
+		map<int, int>::iterator it2;
+		
+		vector<Sequence> closest;		//closest[0] is the closest overall seq to querySeqs[0].
+		
+		string fastafile, templateFile;
+		
+		
+		vector<sim> findIS(int);
+		int calcKmers(map<int, int>, map<int, int>);		
+		vector< vector<sim> > createProcessesIS(vector<Sequence*>, vector<linePair*>);
+		
+};
+
+/***********************************************************/
+
+#endif
+
diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp
index 6af8aa3..e8fa34d 100644
--- a/chimeraseqscommand.cpp
+++ b/chimeraseqscommand.cpp
@@ -10,8 +10,8 @@
 #include "chimeraseqscommand.h"
 #include "bellerophon.h"
 #include "pintail.h"
-#include "alignedsimilarity.h"
 #include "ccode.h"
+#include "chimeracheckrdp.h"
 
 
 //***************************************************************************************************************
@@ -25,7 +25,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
 		
 		else {
 			//valid paramters for this command
-			string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted" };
+			string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize" };
 			vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
 			
 			OptionParser parser(option);
@@ -63,7 +63,9 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
 				if (ableToOpen == 1) { abort = true; }
 				in.close();
 			}
-
+			
+			method = validParameter.validFile(parameters, "method", false);			if (method == "not found") { method = "pintail"; }
+			
 			string temp;
 			temp = validParameter.validFile(parameters, "filter", false);			if (temp == "not found") { temp = "F"; }
 			filter = isTrue(temp);
@@ -74,16 +76,21 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
 			temp = validParameter.validFile(parameters, "processors", false);		if (temp == "not found") { temp = "1"; }
 			convert(temp, processors);
 			
+			temp = validParameter.validFile(parameters, "ksize", false);			if (temp == "not found") { temp = "7"; }
+			convert(temp, ksize);
+			
 			temp = validParameter.validFile(parameters, "window", false);			if (temp == "not found") { temp = "0"; }
 			convert(temp, window);
 					
-			temp = validParameter.validFile(parameters, "increment", false);			if (temp == "not found") { temp = "25"; }
+			temp = validParameter.validFile(parameters, "increment", false);		
+			if ((temp == "not found") && (method == "chimeracheck")) { temp = "10"; }
+			else if (temp == "not found") { temp = "25"; }
 			convert(temp, increment);
 			
-			temp = validParameter.validFile(parameters, "numwanted", false);			if (temp == "not found") { temp = "20"; }
+			temp = validParameter.validFile(parameters, "numwanted", false);		if (temp == "not found") { temp = "20"; }
 			convert(temp, numwanted);
 
-			method = validParameter.validFile(parameters, "method", false);		if (method == "not found") { method = "pintail"; }
+			
 			
 			if (((method == "pintail") || (method == "alignsim")) && (templatefile == "")) { mothurOut("You must provide a template file with the pintail and alignsim methods."); mothurOutEndLine(); abort = true;  }
 			
@@ -136,10 +143,10 @@ int ChimeraSeqsCommand::execute(){
 		
 		if (abort == true) { return 0; }
 		
-		if (method == "bellerophon")	{		chimera = new Bellerophon(fastafile);				}
-		else if (method == "pintail")	{		chimera = new Pintail(fastafile, templatefile);		}
-		//else if (method == "alignsim")	{		chimera = new AlignSim(fastafile, templatefile);	}
-		else if (method == "ccode")		{		chimera = new Ccode(fastafile, templatefile);		}
+		if (method == "bellerophon")			{		chimera = new Bellerophon(fastafile);						}
+		else if (method == "pintail")			{		chimera = new Pintail(fastafile, templatefile);				}
+		else if (method == "ccode")				{		chimera = new Ccode(fastafile, templatefile);				}
+		else if (method == "chimeracheck")		{		chimera = new ChimeraCheckRDP(fastafile, templatefile);		}
 		else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0;		}
 		
 		//set user options
@@ -160,6 +167,7 @@ int ChimeraSeqsCommand::execute(){
 		chimera->setWindow(window);
 		chimera->setIncrement(increment);
 		chimera->setNumWanted(numwanted);
+		chimera->setKmerSize(ksize);
 				
 		//find chimeras
 		chimera->getChimeras();
diff --git a/chimeraseqscommand.h b/chimeraseqscommand.h
index 6274c48..b2bce81 100644
--- a/chimeraseqscommand.h
+++ b/chimeraseqscommand.h
@@ -30,7 +30,7 @@ private:
 	bool abort;
 	string method, fastafile, templatefile, consfile, quanfile, maskfile;
 	bool filter, correction;
-	int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted;
+	int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize;
 	Chimera* chimera;
 	
 	
diff --git a/database.cpp b/database.cpp
index 9784ef9..f84c9e1 100644
--- a/database.cpp
+++ b/database.cpp
@@ -92,3 +92,4 @@ int Database::getLongestBase()	{	return longest+1;		}
 
 /**************************************************************************************************/
 
+
diff --git a/kmer.cpp b/kmer.cpp
index b3bf022..6ee84d3 100644
--- a/kmer.cpp
+++ b/kmer.cpp
@@ -38,6 +38,32 @@ string Kmer::getKmerString(string sequence){	//	Calculate kmer for each position
 	
 	return kmerString;	
 }
+/**************************************************************************************************/
+
+vector< map<int, int> > Kmer::getKmerCounts(string sequence){	//	Calculate kmer for each position in the sequence, save info in a map
+	int length = sequence.length();				//	so you know at each spot in the sequence what kmers were found
+	int nKmers = length - kmerSize + 1;			//	
+	vector< map<int, int> > counts; counts.resize(nKmers);  // a map kmer counts for each spot
+	 map<int, int>::iterator it;
+	
+	for(int i=0;i<nKmers;i++){					//	Go though sequence and get the number between 0 and maxKmer for that
+		if (i == 0) {
+			int kmerNumber = getKmerNumber(sequence, i);//	kmer. 
+			counts[i][kmerNumber] = 1;			// add this kmer if not already there
+		}else { 
+			//your count is everything that came before and whatever you find now
+			counts[i] = counts[i-1];
+			int kmerNumber = getKmerNumber(sequence, i);//	kmer.  
+			
+			it = counts[i].find(kmerNumber);
+			if (it!= counts[i].end()) {   counts[i][kmerNumber]++;  }  //increment number of times you have seen this kmer
+			else {   counts[i][kmerNumber] = 1;   }  // add this kmer since not already there
+		}
+	}
+	
+	return counts;	
+}
+	
 	
 /**************************************************************************************************/
 
diff --git a/kmer.hpp b/kmer.hpp
index 31ca9fb..d5b67d6 100644
--- a/kmer.hpp
+++ b/kmer.hpp
@@ -21,7 +21,7 @@ public:
 	string getKmerString(string);
 	int getKmerNumber(string, int);
 	string getKmerBases(int);
-	
+	vector< map<int, int> > getKmerCounts(string sequence);  //for use in chimeraCheck
 	
 private:
 	char getASCII(int);
diff --git a/kmerdb.hpp b/kmerdb.hpp
index 5019858..d50c8b2 100644
--- a/kmerdb.hpp
+++ b/kmerdb.hpp
@@ -20,6 +20,7 @@
  */
 
 #include "mothur.h"
+#include "database.hpp"
 
 class KmerDB : public Database {
 	
diff --git a/nast.cpp b/nast.cpp
index 804e3c1..294940c 100644
--- a/nast.cpp
+++ b/nast.cpp
@@ -94,7 +94,7 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl
 	try {
 	
 		int longAlignmentLength = newTemplateAlign.length();	
-		
+	
 		for(int i=0; i<longAlignmentLength; i++){				//	use the long alignment as the standard
 			int rightIndex, rightRoom, leftIndex, leftRoom;
 			
@@ -123,7 +123,7 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl
 				int insertLength = 0;							//	figure out how long the anomaly is
 				while(!isalpha(newTemplateAlign[i + insertLength]))	{	insertLength++;	}
 				if(insertLength > maxInsertLength){	maxInsertLength = insertLength;	}
-					
+		
 				if((leftRoom + rightRoom) >= insertLength){
 	
 					//	Parts D & E from Fig. 2 of DeSantis et al.
@@ -168,6 +168,7 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl
 									
 						}
 						else{									//	not enough room to the right, have to steal some 
+			
 							//	space to the left lets move left and then right...
 							string leftTemplateString = newTemplateAlign.substr(0,i);
 							string rightTemplateString = newTemplateAlign.substr(i+insertLength);
@@ -183,7 +184,7 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl
 					}
 				}
 				else{
-													//	there could be a case where there isn't enough room in
+			//	there could be a case where there isn't enough room in
 					string leftTemplateString = newTemplateAlign.substr(0,i);			//	either direction to move stuff
 					string rightTemplateString = newTemplateAlign.substr(i+leftRoom+rightRoom);
 					newTemplateAlign = leftTemplateString + rightTemplateString;
@@ -193,9 +194,9 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl
 					string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
 					string rightCandidateString = candAln.substr(rightIndex+rightRoom);
 					candAln = leftCandidateString + insertString + rightCandidateString;
-	
+
 				}
-				
+			
 				i -= insertLength;
 			} 
 		}
@@ -337,12 +338,11 @@ void Nast::regapSequences(){	//This is essentially part B in Fig 2. of DeSantis
 			candAln[i] = toupper(candAln[i]);			//	everything is upper case
 		}
 		
-		
+	
 		if(candAln.length() != tempAln.length()){		//	if the regapped candidate sequence is longer than the official
 			removeExtraGaps(candAln, tempAln, newTemplateAlign);//	template alignment then we need to do steps C-F in Fig.
 		}												//	2 of Desantis et al.
-		
-		
+			
 		candidateSeq->setAligned(candAln);
 	}
 	catch(exception& e) {
diff --git a/needlemanoverlap.cpp b/needlemanoverlap.cpp
index b14d8e3..80ba4ca 100644
--- a/needlemanoverlap.cpp
+++ b/needlemanoverlap.cpp
@@ -42,8 +42,6 @@ gap(gO), match(m), mismatch(mm), Alignment(r) {							//	the gap openning penalt
 		errorOut(e, "NeedlemanOverlap", "NeedlemanOverlap");
 		exit(1);
 	}
-																
-																		
 }
 /**************************************************************************************************/
 
@@ -56,7 +54,7 @@ void NeedlemanOverlap::align(string A, string B){
 		seqA = ' ' + A;	lA = seqA.length();		//	algorithm requires a dummy space at the beginning of each string
 		seqB = ' ' + B;	lB = seqB.length();		//	algorithm requires a dummy space at the beginning of each string
 
-		if (lA > nRows) { mothurOut("Your one of your candidate sequences is longer than you longest template sequence."); mothurOutEndLine();  }
+		if (lA > nRows) { mothurOut("One of your candidate sequences is longer than you longest template sequence. Your longest template sequence is " + toString(nRows) + ". Your candidate is " + toString(lA) + "."); mothurOutEndLine();  }
 		
 		for(int i=1;i<lB;i++){					//	This code was largely translated from Perl code provided in Ex 3.1 
 			for(int j=1;j<lA;j++){				//	of the O'Reilly BLAST book.  I found that the example output had a
@@ -90,10 +88,11 @@ void NeedlemanOverlap::align(string A, string B){
 				}
 			}
 		}
+
 		Overlap over;						
 		over.setOverlap(alignment, lA, lB, 0);		//	Fix gaps at the beginning and end of the sequences
 		traceBack();								//	Traceback the alignment to populate seqAaln and seqBaln
-		
+	
 	}
 	catch(exception& e) {
 		errorOut(e, "NeedlemanOverlap", "align");
diff --git a/needlemanoverlap.hpp b/needlemanoverlap.hpp
index a5e2140..ea7bee5 100644
--- a/needlemanoverlap.hpp
+++ b/needlemanoverlap.hpp
@@ -32,6 +32,7 @@ public:
 	NeedlemanOverlap(float, float, float, int);
 	~NeedlemanOverlap();
 	void align(string, string);
+
 	
 private:	
 	float gap;
diff --git a/sequence.cpp b/sequence.cpp
index 10a9648..b2c2b53 100644
--- a/sequence.cpp
+++ b/sequence.cpp
@@ -291,6 +291,7 @@ void Sequence::reverseComplement(){
 		else						{	temp += 'N';	}
 	}
 	unaligned = temp;
+	aligned = temp;
 	
 }
 
diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp
index 2a982c7..455cdc2 100644
--- a/trimseqscommand.cpp
+++ b/trimseqscommand.cpp
@@ -160,7 +160,7 @@ int TrimSeqsCommand::execute(){
 		if(qFileName != "")	{	openInputFile(qFileName, qFile);	}
 		
 		bool success;
-		
+			
 		while(!inFASTA.eof()){
 			Sequence currSeq(inFASTA);
 			string origSeq = currSeq.getUnaligned();
@@ -173,6 +173,7 @@ int TrimSeqsCommand::execute(){
 				if(!success)			{	trashCode += 'q';								}
 			}
 			if(barcodes.size() != 0){
+	
 				success = stripBarcode(currSeq, group);
 				if(!success){	trashCode += 'b';	}
 			}
@@ -186,7 +187,8 @@ int TrimSeqsCommand::execute(){
 			}
 			if(minLength > 0 || maxLength > 0){
 				success = cullLength(currSeq);
-				if(!success){	trashCode += 'l';	}
+			if ((currSeq.getUnaligned().length() > 300) && (success)) {  cout << "too long " << currSeq.getUnaligned().length() << endl;  }
+				if(!success){	trashCode += 'l'; }
 			}
 			if(maxHomoP > 0){
 				success = cullHomoP(currSeq);
@@ -200,6 +202,7 @@ int TrimSeqsCommand::execute(){
 			if(flip){	currSeq.reverseComplement();	}		// should go last			
 			
 			if(trashCode.length() == 0){
+				currSeq.setAligned(currSeq.getUnaligned());  //this is because of a modification we made to the sequence class to fix a bug.  all seqs have an aligned version, which is the version that gets printed.
 				currSeq.printSequence(outFASTA);
 				if(barcodes.size() != 0){
 					outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
-- 
2.39.2