]> git.donarmstrong.com Git - mothur.git/commitdiff
working on chimeras
authorwestcott <westcott>
Fri, 12 Feb 2010 19:50:54 +0000 (19:50 +0000)
committerwestcott <westcott>
Fri, 12 Feb 2010 19:50:54 +0000 (19:50 +0000)
44 files changed:
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
bellerophon.cpp
blastdb.cpp
blastdb.hpp
ccode.cpp
ccode.h
chimera.cpp
chimera.h
chimeracheckrdp.cpp
chimeracheckrdp.h
chimerarealigner.cpp [new file with mode: 0644]
chimerarealigner.h [new file with mode: 0644]
chimeraseqscommand.cpp
chimeraseqscommand.h
chimeraslayer.cpp
chimeraslayer.h
commandfactory.cpp
database.hpp
decalc.cpp
decalc.h
engine.cpp
globaldata.cpp
maligner.cpp
maligner.h
mergefilecommand.cpp
mothur.cpp
mothur.h
nast.cpp
otuhierarchycommand.cpp
otuhierarchycommand.h
pintail.cpp
pintail.h
readtree.cpp
readtreecommand.cpp
slayer.cpp
slayer.h
tree.cpp
tree.h
treemap.cpp
treenode.cpp
treenode.h
unweighted.cpp
weighted.cpp

index 21dbf8dfbb074de24a0b70e523bd95e56d414695..51a592b21cdd2b1e18b1f7357b1b34993276c5c2 100644 (file)
                A7E0AAB610D2886D00CF407B /* mgclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0AAB510D2886D00CF407B /* mgclustercommand.cpp */; };
                A7E4A783106913F900688F62 /* getsharedotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E4A782106913F900688F62 /* getsharedotucommand.cpp */; };
                A7E4A824106A9AD700688F62 /* maligner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E4A823106A9AD700688F62 /* maligner.cpp */; };
+               A7E8A22F1125939F0011D39C /* chimerarealigner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E8A22E1125939F0011D39C /* chimerarealigner.cpp */; };
                A7F5759710CEBC0600E20E47 /* libreadline.a in Frameworks */ = {isa = PBXBuildFile; fileRef = A7F5759610CEBC0600E20E47 /* libreadline.a */; };
                EB1216880F619B83004A865F /* bergerparker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216870F619B83004A865F /* bergerparker.cpp */; };
                EB1216E50F61ACFB004A865F /* bstick.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216E40F61ACFB004A865F /* bstick.cpp */; };
                A7E4A782106913F900688F62 /* getsharedotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getsharedotucommand.cpp; sourceTree = SOURCE_ROOT; };
                A7E4A822106A9AD700688F62 /* maligner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = maligner.h; sourceTree = SOURCE_ROOT; };
                A7E4A823106A9AD700688F62 /* maligner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = maligner.cpp; sourceTree = SOURCE_ROOT; };
+               A7E8A22D1125939F0011D39C /* chimerarealigner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerarealigner.h; sourceTree = SOURCE_ROOT; };
+               A7E8A22E1125939F0011D39C /* chimerarealigner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerarealigner.cpp; sourceTree = SOURCE_ROOT; };
                A7F5759610CEBC0600E20E47 /* libreadline.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libreadline.a; path = "../readline-6.0/libreadline.a"; sourceTree = SOURCE_ROOT; };
                C6859E8B029090EE04C91782 /* Mothur.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = Mothur.1; sourceTree = "<group>"; };
                EB1216860F619B83004A865F /* bergerparker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bergerparker.h; sourceTree = SOURCE_ROOT; };
                                A75B887B104C16860083C454 /* ccode.cpp */,
                                A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */,
                                A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */,
+                               A7E8A22D1125939F0011D39C /* chimerarealigner.h */,
+                               A7E8A22E1125939F0011D39C /* chimerarealigner.cpp */,
                                A7B04491106CC3E60046FC83 /* chimeraslayer.h */,
                                A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */,
                                372A55531017922B00C5194B /* decalc.h */,
                                A704E8A31106045D00870157 /* otuhierarchycommand.cpp in Sources */,
                                A70B00C8110885EB002F453A /* setdircommand.cpp in Sources */,
                                A794201111107897003AECCD /* distancedb.cpp in Sources */,
+                               A7E8A22F1125939F0011D39C /* chimerarealigner.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 7782d1eac5354261c6727e240a33ae2d89984224..708a85472dc53fecec1f022ce86512a82d620c30 100644 (file)
@@ -218,6 +218,7 @@ int AlignCommand::execute(){
                        string alignFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align";
                        string reportFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align.report";
                        string accnosFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "flip.accnos";
+                       bool hasAccnos = true;
                        
                        int numFastaSeqs = 0;
                        for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
@@ -307,7 +308,7 @@ int AlignCommand::execute(){
                                                mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
                                        }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
                                        mothurOutEndLine();
-                               }
+                               }else{ hasAccnos = false;  }
                        }
 #else
                        ifstream inFASTA;
@@ -320,7 +321,7 @@ int AlignCommand::execute(){
                        driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
                        
                        //delete accnos file if its blank else report to user
-                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  }
+                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
                        else { 
                                mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
                                if (!flip) {
@@ -331,7 +332,10 @@ int AlignCommand::execute(){
                        
 #endif
                        
-                       
+                       mothurOut("Output File Names: " + alignFileName + ", " + reportFileName);
+                       if (hasAccnos)  {       mothurOut(", " + accnosFileName + ".");         }
+                       else                    {       mothurOut(".");                                                         }
+                       mothurOutEndLine();
                        
                        mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences.");
                        mothurOutEndLine();
index 7c64d4703a4bd3700e0757052c392639f16aa7eb..afb88d2ea29e7b59e971fb6cee9c95f8ffd6e475 100644 (file)
@@ -230,7 +230,7 @@ int Bellerophon::getChimeras() {
                sort(pref.begin(), pref.end(), comparePref);
                
                return 0;
-
+               
        }
        catch(exception& e) {
                errorOut(e, "Bellerophon", "getChimeras");
index 6b45a48ce379168e82300209a7668c3e07e4d0ab..50a0401b2cb8780765ba56a3ba2c025380c4e212 100644 (file)
@@ -81,6 +81,55 @@ vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
 
 }
 /**************************************************************************************************/
+//assumes you have added all the template sequences using the addSequence function and run generateDB.
+map<int, float> BlastDB::findClosest(Sequence* seq, int n) {
+       try{
+               map<int, float> topMatches;
+               
+               ofstream queryFile;
+               openOutputFile(queryFileName, queryFile);
+               queryFile << '>' << seq->getName() << endl;
+               queryFile << seq->getUnaligned() << endl;
+               queryFile.close();
+                               
+               //      the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
+               //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
+               //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
+       
+               string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -b " + toString(n) + " -v " + toString(n);
+               blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
+               system(blastCommand.c_str());
+               
+               ifstream m8FileHandle;
+               openInputFile(blastFileName, m8FileHandle);
+       
+               string dummy;
+               int templateAccession;
+               gobble(m8FileHandle);
+//string name = seq->getName();
+//ofstream out;
+//openOutputFileAppend(name, out);     
+               while(!m8FileHandle.eof()){
+                       m8FileHandle >> dummy >> templateAccession >> searchScore;
+//out << dummy << '\t' <<  templateAccession   << '\t' << searchScore << endl;
+                       //get rest of junk in line
+                       while (!m8FileHandle.eof())     {       char c = m8FileHandle.get(); 
+                       //out << c; 
+                       if (c == 10 || c == 13){        break;  }       } 
+                       
+                       gobble(m8FileHandle);
+                       topMatches[templateAccession] = searchScore;
+               }
+               m8FileHandle.close();
+//out.close();         
+               return topMatches;
+       }
+       catch(exception& e) {
+               errorOut(e, "BlastDB", "findClosest");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 void BlastDB::addSequence(Sequence seq) {
        try {
        
@@ -103,7 +152,7 @@ void BlastDB::addSequence(Sequence seq) {
 void BlastDB::generateDB() {
        try {
        
-               mothurOut("Generating the temporary BLAST database...\t");      cout.flush();
+               //mothurOut("Generating the temporary BLAST database...\t");    cout.flush();
                
                path = globaldata->argv;
                path = path.substr(0, (path.find_last_of('m')));
@@ -111,7 +160,7 @@ void BlastDB::generateDB() {
                string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;        //      format the database, -o option gives us the ability
                system(formatdbCommand.c_str());                                                                //      to get the right sequence names, i think. -p F
                                                                                                                                        //      option tells formatdb that seqs are DNA, not prot
-               mothurOut("DONE."); mothurOutEndLine(); mothurOutEndLine(); cout.flush();
+               //mothurOut("DONE."); mothurOutEndLine();       mothurOutEndLine(); cout.flush();
        }
        catch(exception& e) {
                errorOut(e, "BlastDB", "generateDB");
index c718bb43fb29562b755cdd44e37fa893c7b57f25..fea55c8256e561af5477356b3c5bc545b30d6960 100644 (file)
@@ -23,6 +23,7 @@ public:
        void generateDB();
        void addSequence(Sequence);
        vector<int> findClosestSequences(Sequence*, int);
+       map<int, float> findClosest(Sequence*, int); //template index -> searchscore
 
 private:
        string dbFileName;
index c474d7a0ad7cb54729f809cf9921ed234f48d5db..bfcd44d8239a30f8ad18db73f0c06268dc0e7474 100644 (file)
--- a/ccode.cpp
+++ b/ccode.cpp
 
 
 //***************************************************************************************************************
-Ccode::Ccode(string filename, string temp, string o) {  fastafile = filename;  templateFile = temp;  outputDir = o; }
+Ccode::Ccode(string filename, string o) {  
+       fastafile = filename;  outputDir = o; 
+       distCalc = new eachGapDist();
+       decalc = new DeCalculator();
+       
+       mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo";
+       ofstream out2;
+       openOutputFile(mapInfo, out2);
+               
+       out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
+       out2.close();
+}
 //***************************************************************************************************************
-
 Ccode::~Ccode() {
-       try {
-               for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
-               for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "~Ccode");
-               exit(1);
-       }
+       delete distCalc;
+       delete decalc;
 }      
 //***************************************************************************************************************
+void Ccode::printHeader(ostream& out) {
+       out << "For full window mapping info refer to " << mapInfo << endl << endl;
+}
+//***************************************************************************************************************
 void Ccode::print(ostream& out) {
        try {
                
                mothurOutEndLine();
                
-               string mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo";
                ofstream out2;
-               openOutputFile(mapInfo, out2);
-               
-               out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
+               openOutputFileAppend(mapInfo, out2);
                
-               for (int j = 0; j < querySeqs.size(); j++) {
-                       out2 << querySeqs[j]->getName() << endl;
-                       for (it = spotMap[j].begin(); it!= spotMap[j].end(); it++) {
-                               out2 << it->first << '\t' << it->second << endl;
-                       }
+               out2 << querySeq->getName() << endl;
+               for (it = spotMap.begin(); it!= spotMap.end(); it++) {
+                       out2 << it->first << '\t' << it->second << endl;
                }
                out2.close();
                
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
                
-               out << "For full window mapping info refer to " << mapInfo << endl << endl;
-               
-               for (int i = 0; i < querySeqs.size(); i++) {
-               
-                       out << querySeqs[i]->getName() << endl << endl << "Reference sequences used and distance to query:" << endl;
+               out << querySeq->getName() << endl << endl << "Reference sequences used and distance to query:" << endl;
                        
-                       for (int j = 0; j < closest[i].size(); j++) {
-                               out << setprecision(3) << closest[i][j].seq->getName() << '\t' << closest[i][j].dist << endl;
-                       }
-                       out << endl << endl;
+               for (int j = 0; j < closest.size(); j++) {
+                       out << setprecision(3) << closest[j].seq->getName() << '\t' << closest[j].dist << endl;
+               }
+               out << endl << endl;
                
-                       //for each window
-                       //window mapping info.
-                       out << "Mapping information: ";
-                       //you mask and did not filter
-                       if ((seqMask != "") && (!filter)) { out << "mask and trim."; }
+               //for each window
+               //window mapping info.
+               out << "Mapping information: ";
+               //you mask and did not filter
+               if ((seqMask != "") && (!filter)) { out << "mask and trim."; }
                                
-                       //you filtered and did not mask
-                       if ((seqMask == "") && (filter)) { out << "filter and trim."; }
+               //you filtered and did not mask
+               if ((seqMask == "") && (filter)) { out << "filter and trim."; }
                                
-                       //you masked and filtered
-                       if ((seqMask != "") && (filter)) { out << "mask, filter and trim."; }
+               //you masked and filtered
+               if ((seqMask != "") && (filter)) { out << "mask, filter and trim."; }
                        
-                       out << endl << "Window\tStartPos\tEndPos" << endl;
-                       it = trim[i].begin();
+               out << endl << "Window\tStartPos\tEndPos" << endl;
+               it = trim.begin();
                        
-                       for (int k = 0; k < windows[i].size()-1; k++) {
-                               out << k+1 << '\t' << spotMap[i][windows[i][k]-it->first] << '\t' << spotMap[i][windows[i][k]-it->first+windowSizes[i]] << endl;
-                       }
+               for (int k = 0; k < windows.size()-1; k++) {
+                       out << k+1 << '\t' << spotMap[windows[k]-it->first] << '\t' << spotMap[windows[k]-it->first+windowSizes] << endl;
+               }
                        
-                       out << windows[i].size() << '\t' << spotMap[i][windows[i][windows[i].size()-1]-it->first] << '\t' << spotMap[i][it->second-it->first-1] << endl;
-                       out << endl;
+               out << windows.size() << '\t' << spotMap[windows[windows.size()-1]-it->first] << '\t' << spotMap[it->second-it->first-1] << endl;
+               out << endl;
                        
-                       out << "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova" << endl;
-                       for (int k = 0; k < windows[i].size(); k++) {
-                               float ds = averageQuery[i][k] / averageRef[i][k]; 
-                               out << k+1 << '\t' << averageQuery[i][k] << '\t' << sdQuery[i][k] << '\t' << averageRef[i][k] << '\t'<< sdRef[i][k] << '\t' << ds << '\t' << anova[i][k] << endl;
-                       }
-                       out << endl;
+               out << "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova" << endl;
+               for (int k = 0; k < windows.size(); k++) {
+                       float ds = averageQuery[k] / averageRef[k]; 
+                       out << k+1 << '\t' << averageQuery[k] << '\t' << sdQuery[k] << '\t' << averageRef[k] << '\t'<< sdRef[k] << '\t' << ds << '\t' << anova[k] << endl;
+               }
+               out << endl;
                        
-                       //varRef
-                       //varQuery
-                       /* F test for differences among variances.
-                       * varQuery is expected to be higher or similar than varRef */
-                       //float fs = varQuery[query][i] / varRef[query][i];     /* F-Snedecor, test for differences of variances */
+               //varRef
+               //varQuery
+               /* F test for differences among variances.
+               * varQuery is expected to be higher or similar than varRef */
+               //float fs = varQuery[query] / varRef[query];   /* F-Snedecor, test for differences of variances */
                        
-                       bool results = false;   
+               bool results = false;   
                                                
-                       //confidence limit, t - Student, anova
-                       out << "Window\tConfidenceLimit\tt-Student\tAnova" << endl;
+               //confidence limit, t - Student, anova
+               out << "Window\tConfidenceLimit\tt-Student\tAnova" << endl;
                        
-                       for (int k = 0; k < windows[i].size(); k++) {
-                               string temp = "";
-                               if (isChimericConfidence[i][k]) {  temp += "*\t"; }
-                               else { temp += "\t"; }
+               for (int k = 0; k < windows.size(); k++) {
+                       string temp = "";
+                       if (isChimericConfidence[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
                                
-                               if (isChimericTStudent[i][k]) {  temp += "*\t"; }
-                               else { temp += "\t"; }
+                       if (isChimericTStudent[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
                                
-                               if (isChimericANOVA[i][k]) {  temp += "*\t"; }
-                               else { temp += "\t"; }
+                       if (isChimericANOVA[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
                        
-                               out << k+1 << '\t' << temp << endl;
+                       out << k+1 << '\t' << temp << endl;
                                
-                               if (temp == "*\t*\t*\t") {  results = true;  }
-                       }
-                       out << endl;    
+                       if (temp == "*\t*\t*\t") {  results = true;  }
+               }
+               out << endl;    
                        
-                       if (results) {
-                               mothurOut(querySeqs[i]->getName() + " was found have at least one chimeric window."); mothurOutEndLine();
-                       }
+               if (results) {
+                       mothurOut(querySeq->getName() + " was found have at least one chimeric window."); mothurOutEndLine();
                }
+               
        }
        catch(exception& e) {
                errorOut(e, "Ccode", "print");
                exit(1);
        }
 }
-
 //***************************************************************************************************************
-int Ccode::getChimeras() {
+int Ccode::getChimeras(Sequence* query) {
        try {
-               
-               //read in query sequences and subject sequences
-               mothurOut("Reading sequences and template file... "); cout.flush();
-               querySeqs = readSeqs(fastafile);
-               templateSeqs = readSeqs(templateFile);
-               mothurOut("Done."); mothurOutEndLine();
-               
-               int numSeqs = querySeqs.size();
-               
-               if (unaligned) { mothurOut("Your sequences need to be aligned when you use the bellerophon ccode."); mothurOutEndLine(); return 1;  }
-               
-               closest.resize(numSeqs);
-               
-               refCombo.resize(numSeqs, 0);
-               sumRef.resize(numSeqs); 
-               varRef.resize(numSeqs); 
-               varQuery.resize(numSeqs); 
-               sdRef.resize(numSeqs); 
-               sdQuery.resize(numSeqs);     
-               sumQuery.resize(numSeqs);
-               sumSquaredRef.resize(numSeqs); 
-               sumSquaredQuery.resize(numSeqs); 
-               averageRef.resize(numSeqs);
-               averageQuery.resize(numSeqs);
-               anova.resize(numSeqs);
-               isChimericConfidence.resize(numSeqs);
-               isChimericTStudent.resize(numSeqs);
-               isChimericANOVA.resize(numSeqs);
-               trim.resize(numSeqs);
-               spotMap.resize(numSeqs);
-               windowSizes.resize(numSeqs, window);
-               windows.resize(numSeqs);
-               
-               //break up file if needed
-               int linesPerProcess = numSeqs / processors ;
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       //find breakup of sequences for all times we will Parallelize
-                       if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
-                       else {
-                               //fill line pairs
-                               for (int i = 0; i < (processors-1); i++) {                      
-                                       lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
-                               }
-                               //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
-                               int i = processors - 1;
-                               lines.push_back(new linePair((i*linesPerProcess), numSeqs));
-                       }
-                       
-               #else
-                       lines.push_back(new linePair(0, numSeqs));
-               #endif
        
-               distCalc = new eachGapDist();
-               decalc = new DeCalculator();
-               
-               //find closest
-               if (processors == 1) { 
-                       mothurOut("Finding top matches for sequences... "); cout.flush();
-                       closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
-                       mothurOut("Done."); mothurOutEndLine();
-               }else {         createProcessesClosest();               }
+               closest.clear();
+               refCombo = 0;
+               sumRef.clear(); 
+               varRef.clear(); 
+               varQuery.clear(); 
+               sdRef.clear(); 
+               sdQuery.clear();     
+               sumQuery.clear();
+               sumSquaredRef.clear(); 
+               sumSquaredQuery.clear(); 
+               averageRef.clear();
+               averageQuery.clear();
+               anova.clear();
+               isChimericConfidence.clear();
+               isChimericTStudent.clear();
+               isChimericANOVA.clear();
+               trim.clear();
+               spotMap.clear();
+               windowSizes = window;
+               windows.clear();
 
-               //initialize spotMap
-               for (int j = 0; j < numSeqs; j++) {
-                       for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
-                               spotMap[j][i] = i;
-                       }
-               }
+       
+               querySeq = query;
+               
+               //find closest matches to query
+               closest = findClosest(query, numWanted);
                
+               //initialize spotMap
+               for (int i = 0; i < query->getAligned().length(); i++) {        spotMap[i] = i;         }
+       
                //mask sequences if the user wants to 
                if (seqMask != "") {
                        decalc->setMask(seqMask);
                        
-                       //mask querys
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               decalc->runMask(querySeqs[i]);
-                       }
-               
-                       //mask templates
-                       for (int i = 0; i < templateSeqs.size(); i++) {
-                               decalc->runMask(templateSeqs[i]);
-                       }
+                       decalc->runMask(query);
                        
-                       for (int i = 0; i < numSeqs; i++) {
-                               spotMap[i] = decalc->getMaskMap();
-                       }
+                       //mask closest
+                       for (int i = 0; i < closest.size(); i++) {      decalc->runMask(closest[i].seq);        }
+                       
+                       spotMap = decalc->getMaskMap();
                }
                
                if (filter) {
-                       vector<Sequence*> temp = templateSeqs;
-                       for (int i = 0; i < querySeqs.size(); i++) { temp.push_back(querySeqs[i]);  }
+                       vector<Sequence*> temp;
+                       for (int i = 0; i < closest.size(); i++) { temp.push_back(closest[i].seq);  }
+                       temp.push_back(query);  
                        
                        createFilter(temp);
                
-                       runFilter(querySeqs);
-                       runFilter(templateSeqs);
+                       for (int i = 0; i < temp.size(); i++) { runFilter(temp[i]);  }
                        
                        //update spotMap
                        map<int, int> newMap;
                        int spot = 0;
-                       int j = 0;
                        
                        for (int i = 0; i < filterString.length(); i++) {
                                if (filterString[i] == '1') {
                                        //add to newMap
-                                       newMap[spot] = spotMap[j][i];
+                                       newMap[spot] = spotMap[i];
                                        spot++;  
                                }
                        }
-                       
-                       for (int i = 0; i < numSeqs; i++) {
-                               spotMap[i] = newMap;
-                       }
+                       spotMap = newMap;
                }
 
                //trim sequences - this follows ccodes remove_extra_gaps 
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       trimSequences(i);
-               }
+               trimSequences(query);
+               
                
                //windows are equivalent to words - ccode paper recommends windows are between 5% and 20% on alignment length().  
                //Our default will be 10% and we will warn if user tries to use a window above or below these recommendations
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       windows[i] = findWindows(i);  
-               }
+               windows = findWindows();  
+               
 
                //remove sequences that are more than 20% different and less than 0.5% different - may want to allow user to specify this later 
-               if (processors == 1) { 
-                       for (int i = 0; i < closest.size(); i++) {
-                               removeBadReferenceSeqs(closest[i], i);
-                       }
-               }else {         createProcessesRemoveBad();             }
-
+               removeBadReferenceSeqs(closest);
                
-               if (processors == 1) { 
-                       //find the averages for each querys references
-                       for (int i = 0; i < numSeqs; i++) {
-                               getAverageRef(closest[i], i);  //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
-                       }
-                       
-                       //find the averages for the query 
-                       for (int i = 0; i < numSeqs; i++) {
-                               getAverageQuery(closest[i], i);  //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
-                       }
-               }else {         createProcessesAverages();              }
                
-               if (processors == 1) { 
-                       //find the averages for each querys references 
-                       for (int i = 0; i < numSeqs; i++) {
-                               findVarianceRef(i);  //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                       }
-                       
-                       //find the averages for the query 
-                       for (int i = 0; i < numSeqs; i++) {
-                               findVarianceQuery(i);  //fills varQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                       }
-               }else {         createProcessesVariances();             }
+               //find the averages for each querys references
+               getAverageRef(closest);  //fills sumRef, averageRef, sumSquaredRef and refCombo.
+               getAverageQuery(closest, query);  //fills sumQuery, averageQuery, sumSquaredQuery.
+                                       
                
-               if (processors == 1) { 
-                       for (int i = 0; i < numSeqs; i++) {
-                               determineChimeras(i);  //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i]. 
-                       }
-               }else {         createProcessesDetermine();             }
+               //find the averages for each querys references 
+               findVarianceRef();  //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0.
+                       
+                       
+               //find the averages for the query 
+               findVarianceQuery();  //fills varQuery and sdQuery also sets minimum error rate to 0.001 to avoid divide by 0.
+                                       
+               determineChimeras();  //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA. 
                
                //free memory
-               for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
-               delete distCalc;
-               delete decalc;
+               for (int i = 0; i < closest.size(); i++) {  delete closest[i].seq;  }
                
                return 0;
        }
@@ -309,17 +233,17 @@ int Ccode::getChimeras() {
 }
 /***************************************************************************************************************/
 //ccode algo says it does this to "Removes the initial and final gaps to avoid biases due to incomplete sequences."
-void Ccode::trimSequences(int query) {
+void Ccode::trimSequences(Sequence* query) {
        try {
                
                int frontPos = 0;  //should contain first position in all seqs that is not a gap character
-               int rearPos = querySeqs[query]->getAligned().length();
+               int rearPos = query->getAligned().length();
                
                //********find first position in closest seqs that is a non gap character***********//
                //find first position all query seqs that is a non gap character
-               for (int i = 0; i < closest[query].size(); i++) {
+               for (int i = 0; i < closest.size(); i++) {
                        
-                       string aligned = closest[query][i].seq->getAligned();
+                       string aligned = closest[i].seq->getAligned();
                        int pos = 0;
                        
                        //find first spot in this seq
@@ -335,7 +259,7 @@ void Ccode::trimSequences(int query) {
                }
                
                //find first position all querySeq[query] that is a non gap character
-               string aligned = querySeqs[query]->getAligned();
+               string aligned = query->getAligned();
                int pos = 0;
                        
                //find first spot in this seq
@@ -351,9 +275,9 @@ void Ccode::trimSequences(int query) {
                
                
                //********find last position in closest seqs that is a non gap character***********//
-               for (int i = 0; i < closest[query].size(); i++) {
+               for (int i = 0; i < closest.size(); i++) {
                        
-                       string aligned = closest[query][i].seq->getAligned();
+                       string aligned = closest[i].seq->getAligned();
                        int pos = aligned.length();
                        
                        //find first spot in this seq
@@ -369,7 +293,7 @@ void Ccode::trimSequences(int query) {
                }
                
                //find last position all querySeqs[query] that is a non gap character
-               aligned = querySeqs[query]->getAligned();
+               aligned = query->getAligned();
                pos = aligned.length();
                
                //find first spot in this seq
@@ -391,7 +315,7 @@ void Ccode::trimSequences(int query) {
                tempTrim[frontPos] = rearPos;
                
                //save trimmed locations
-               trim[query] = tempTrim;
+               trim = tempTrim;
                                                
                //update spotMask
                map<int, int> newMap;
@@ -399,56 +323,45 @@ void Ccode::trimSequences(int query) {
                
                for (int i = frontPos; i < rearPos; i++) {
                        //add to newMap
-//cout << query << '\t' << i << '\t' << spotMap[query][i] << endl;
-                       newMap[spot] = spotMap[query][i];
+                       newMap[spot] = spotMap[i];
                        spot++;  
                }
-               
-               //for (it = newMap.begin(); it!=newMap.end(); it++) {
-                       //cout << query << '\t' << it->first << '\t' << it->second << endl;
-               //}
-               
-               spotMap[query] = newMap;
-
-               
+               spotMap = newMap;
        }
        catch(exception& e) {
                errorOut(e, "Ccode", "trimSequences");
                exit(1);
        }
-
 }
 /***************************************************************************************************************/
-vector<int> Ccode::findWindows(int query) {
+vector<int> Ccode::findWindows() {
        try {
                
                vector<int> win; 
-               it = trim[query].begin();
+               it = trim.begin();
                
                int length = it->second - it->first;
        
                //default is wanted = 10% of total length
-               if (windowSizes[query] > length) { 
+               if (windowSizes > length) { 
                        mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length.");
-                       windowSizes[query] = length / 10;
-               }else if (windowSizes[query] == 0) { windowSizes[query] = length / 10;  }
-               else if (windowSizes[query] > (length * 0.20)) {
+                       windowSizes = length / 10;
+               }else if (windowSizes == 0) { windowSizes = length / 10;  }
+               else if (windowSizes > (length * 0.20)) {
                        mothurOut("You have selected a window that is larger than 20% of your sequence length.  This is not recommended, but I will continue anyway."); mothurOutEndLine();
-               }else if (windowSizes[query] < (length * 0.05)) {
+               }else if (windowSizes < (length * 0.05)) {
                        mothurOut("You have selected a window that is smaller than 5% of your sequence length.  This is not recommended, but I will continue anyway."); mothurOutEndLine();
                }
                
                //save starting points of each window
-               for (int m = it->first;  m < (it->second-windowSizes[query]); m+=windowSizes[query]) {  win.push_back(m);  }
+               for (int m = it->first;  m < (it->second-windowSizes); m+=windowSizes) {  win.push_back(m);  }
                
                //save last window
                if (win[win.size()-1] < (it->first+length)) {
-                       win.push_back(win[win.size()-1]+windowSizes[query]); // ex. string length is 115, window is 25, without this you would get 0, 25, 50, 75
+                       win.push_back(win[win.size()-1]+windowSizes); // ex. string length is 115, window is 25, without this you would get 0, 25, 50, 75
                }                                                                                                                                                                                                       //with this you would get 1,25,50,75,100
                
-
                return win;
-       
        }
        catch(exception& e) {
                errorOut(e, "Ccode", "findWindows");
@@ -512,7 +425,7 @@ int Ccode::getDiff(string seqA, string seqB) {
 }
 //***************************************************************************************************************
 //tried to make this look most like ccode original implementation
-void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs, int query) {
+void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs) {
        try {
                
                vector< vector<int> > numDiffBases;
@@ -520,7 +433,7 @@ void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs, int query) {
                //initialize to 0
                for (int i = 0; i < numDiffBases.size(); i++) { numDiffBases[i].resize(seqs.size(),0); }
                
-               it = trim[query].begin();
+               it = trim.begin();
                int length = it->second - it->first;
                
                //calc differences from each sequence to everyother seq in the set
@@ -576,9 +489,8 @@ void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs, int query) {
                        seqs = goodSeqs;
                        
                }else { //warn, but dont remove any
-                       mothurOut(querySeqs[query]->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity.  I will continue, but please check."); mothurOutEndLine();  
+                       mothurOut(querySeq->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity.  I will continue, but please check."); mothurOutEndLine();  
                }
-                       
 
        }
        catch(exception& e) {
@@ -587,46 +499,33 @@ void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs, int query) {
        }
 }
 //***************************************************************************************************************
-vector< vector<SeqDist> > Ccode::findClosest(int start, int end, int numWanted) {
+//makes copy of templateseq for filter
+vector<SeqDist>  Ccode::findClosest(Sequence* q, int numWanted) {
        try{
        
-               vector< vector<SeqDist> > topMatches;  topMatches.resize(querySeqs.size());
-       
-               float smallestOverall, smallestLeft, smallestRight;
-               smallestOverall = 1000;  smallestLeft = 1000;  smallestRight = 1000;
+               vector<SeqDist>  topMatches;  
                
-               //for each sequence in querySeqs - find top matches to use as reference
-               for(int j = start; j < end; j++){
-                       
-                       Sequence query = *(querySeqs[j]);
+               Sequence query = *(q);
                        
-                       vector<SeqDist> distances;
+               //calc distance to each sequence in template seqs
+               for (int i = 0; i < templateSeqs.size(); i++) {
                        
-                       //calc distance to each sequence in template seqs
-                       for (int i = 0; i < templateSeqs.size(); i++) {
-                       
-                               Sequence ref = *(templateSeqs[i]); 
+                       Sequence ref = *(templateSeqs[i]); 
                                        
-                               //find overall dist
-                               distCalc->calcDist(query, ref);
-                               float dist = distCalc->getDist();       
+                       //find overall dist
+                       distCalc->calcDist(query, ref);
+                       float dist = distCalc->getDist();       
                                
-                               //save distance
-                               SeqDist temp;
-                               temp.seq = templateSeqs[i];
-                               temp.dist = dist;
+                       //save distance
+                       SeqDist temp;
+                       temp.seq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+                       temp.dist = dist;
 
-                               distances.push_back(temp);
-                       }
-                       
-                       sort(distances.begin(), distances.end(), compareSeqDist);
-                       
-                       //save the number of top matches wanted
-                       for (int h = 0; h < numWanted; h++) {
-                               topMatches[j].push_back(distances[h]);
-                       }
+                       topMatches.push_back(temp);
                }
                        
+               sort(topMatches.begin(), topMatches.end(), compareSeqDist);
+                       
                return topMatches;
 
        }
@@ -637,21 +536,21 @@ vector< vector<SeqDist> > Ccode::findClosest(int start, int end, int numWanted)
 }
 /**************************************************************************************************/
 //find the distances from each reference sequence to every other reference sequence for each window for this query
-void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
+void Ccode::getAverageRef(vector<SeqDist> ref) {
        try {
                
-               vector< vector< vector<int> > > diffs;  //diffs[0][1][2] is the number of differences between ref seq 0 and ref seq 1 at window 2.
+               vector< vector< vector<int> > >  diffs;  //diffs[0][1][2] is the number of differences between ref seq 0 and ref seq 1 at window 2.
                
                //initialize diffs vector
                diffs.resize(ref.size());
                for (int i = 0; i < diffs.size(); i++) {  
                        diffs[i].resize(ref.size());
                        for (int j = 0; j < diffs[i].size(); j++) {
-                               diffs[i][j].resize(windows[query].size(), 0);
+                               diffs[i][j].resize(windows.size(), 0);
                        }
                }
                
-               it = trim[query].begin();
+               it = trim.begin();
                                
                //find the distances from each reference sequence to every other reference sequence for each window for this query              
                for (int i = 0; i < ref.size(); i++) {
@@ -663,23 +562,23 @@ void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
                        
                                string refJ = ref[j].seq->getAligned();
                        
-                               for (int k = 0; k < windows[query].size(); k++) {
+                               for (int k = 0; k < windows.size(); k++) {
                                        
                                        string refIWindowk, refJWindowk;
                                        
-                                       if (k < windows[query].size()-1) {
+                                       if (k < windows.size()-1) {
                                                //get window strings
-                                               refIWindowk = refI.substr(windows[query][k], windowSizes[query]);
-                                               refJWindowk = refJ.substr(windows[query][k], windowSizes[query]);
+                                               refIWindowk = refI.substr(windows[k], windowSizes);
+                                               refJWindowk = refJ.substr(windows[k], windowSizes);
                                        }else { //last window may be smaller than rest - see findwindows
                                                //get window strings
-                                               refIWindowk = refI.substr(windows[query][k], (it->second-windows[query][k]));
-                                               refJWindowk = refJ.substr(windows[query][k], (it->second-windows[query][k]));
+                                               refIWindowk = refI.substr(windows[k], (it->second-windows[k]));
+                                               refJWindowk = refJ.substr(windows[k], (it->second-windows[k]));
                                        }
                                        
                                        //find differences
                                        int diff = getDiff(refIWindowk, refJWindowk);
-//cout <<  i << '\t' << j << '\t' << k << '\t' << diff << endl;
+
                                        //save differences in [i][j][k] and [j][i][k] since they are the same
                                        diffs[i][j][k] = diff;
                                        diffs[j][i][k] = diff;
@@ -691,20 +590,20 @@ void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
                }//i
                
                //initialize sumRef for this query 
-               sumRef[query].resize(windows[query].size(), 0);
-               sumSquaredRef[query].resize(windows[query].size(), 0);
-               averageRef[query].resize(windows[query].size(), 0);
+               sumRef.resize(windows.size(), 0);
+               sumSquaredRef.resize(windows.size(), 0);
+               averageRef.resize(windows.size(), 0);
                
                //find the sum of the differences for hte reference sequences
                for (int i = 0; i < diffs.size(); i++) {
                        for (int j = 0; j < i; j++) {
                        
                                //increment this querys reference sequences combos
-                               refCombo[query]++;
+                               refCombo++;
                                
                                for (int k = 0; k < diffs[i][j].size(); k++) {
-                                       sumRef[query][k] += diffs[i][j][k];
-                                       sumSquaredRef[query][k] += (diffs[i][j][k]*diffs[i][j][k]);
+                                       sumRef[k] += diffs[i][j][k];
+                                       sumSquaredRef[k] += (diffs[i][j][k]*diffs[i][j][k]);
                                }//k
                                
                        }//j
@@ -712,8 +611,8 @@ void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
 
                
                //find the average of the differences for the references for each window
-               for (int i = 0; i < windows[query].size(); i++) {
-                       averageRef[query][i] = sumRef[query][i] / (float) refCombo[query];
+               for (int i = 0; i < windows.size(); i++) {
+                       averageRef[i] = sumRef[i] / (float) refCombo;
                }
                
        }
@@ -723,7 +622,7 @@ void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
        }
 }
 /**************************************************************************************************/
-void Ccode::getAverageQuery (vector<SeqDist> ref, int query) {
+void Ccode::getAverageQuery (vector<SeqDist> ref, Sequence* query) {
        try {
        
                vector< vector<int> >  diffs;  //diffs[1][2] is the number of differences between querySeqs[query] and ref seq 1 at window 2.
@@ -731,35 +630,35 @@ void Ccode::getAverageQuery (vector<SeqDist> ref, int query) {
                //initialize diffs vector
                diffs.resize(ref.size());
                for (int j = 0; j < diffs.size(); j++) {
-                       diffs[j].resize(windows[query].size(), 0);
+                       diffs[j].resize(windows.size(), 0);
                }
                
-               it = trim[query].begin();
+               it = trim.begin();
                                                        
-               string refQuery = querySeqs[query]->getAligned();
+               string refQuery = query->getAligned();
                         
                //j<i, so you don't find distances from i to j and then j to i.
                for (int j = 0; j < ref.size(); j++) {
                         
                         string refJ = ref[j].seq->getAligned();
                         
-                        for (int k = 0; k < windows[query].size(); k++) {
+                        for (int k = 0; k < windows.size(); k++) {
                                        
                                        string QueryWindowk, refJWindowk;
                                        
-                                       if (k < windows[query].size()-1) {
+                                       if (k < windows.size()-1) {
                                                //get window strings
-                                               QueryWindowk = refQuery.substr(windows[query][k], windowSizes[query]);
-                                               refJWindowk = refJ.substr(windows[query][k], windowSizes[query]);                                       
+                                               QueryWindowk = refQuery.substr(windows[k], windowSizes);
+                                               refJWindowk = refJ.substr(windows[k], windowSizes);                                     
                                        }else { //last window may be smaller than rest - see findwindows
                                                //get window strings
-                                               QueryWindowk = refQuery.substr(windows[query][k], (it->second-windows[query][k]));
-                                               refJWindowk = refJ.substr(windows[query][k], (it->second-windows[query][k]));
+                                               QueryWindowk = refQuery.substr(windows[k], (it->second-windows[k]));
+                                               refJWindowk = refJ.substr(windows[k], (it->second-windows[k]));
                                        }
                                        
                                        //find differences
                                        int diff = getDiff(QueryWindowk, refJWindowk);
-//cout  << j << '\t' << k << '\t' << diff << endl;                                             
+                                       
                                        //save differences 
                                        diffs[j][k] = diff;
                         
@@ -768,25 +667,23 @@ void Ccode::getAverageQuery (vector<SeqDist> ref, int query) {
                         
                
                //initialize sumRef for this query 
-               sumQuery[query].resize(windows[query].size(), 0);
-               sumSquaredQuery[query].resize(windows[query].size(), 0);
-               averageQuery[query].resize(windows[query].size(), 0);
+               sumQuery.resize(windows.size(), 0);
+               sumSquaredQuery.resize(windows.size(), 0);
+               averageQuery.resize(windows.size(), 0);
                
                //find the sum of the differences 
                for (int j = 0; j < diffs.size(); j++) {
                        for (int k = 0; k < diffs[j].size(); k++) {
-                               sumQuery[query][k] += diffs[j][k];
-                               sumSquaredQuery[query][k] += (diffs[j][k]*diffs[j][k]);
+                               sumQuery[k] += diffs[j][k];
+                               sumSquaredQuery[k] += (diffs[j][k]*diffs[j][k]);
                        }//k
                }//j
                
                
                //find the average of the differences for the references for each window
-               for (int i = 0; i < windows[query].size(); i++) {
-                       averageQuery[query][i] = sumQuery[query][i] / (float) ref.size();
+               for (int i = 0; i < windows.size(); i++) {
+                       averageQuery[i] = sumQuery[i] / (float) ref.size();
                }
-
-       
        }
        catch(exception& e) {
                errorOut(e, "Ccode", "getAverageQuery");
@@ -794,23 +691,23 @@ void Ccode::getAverageQuery (vector<SeqDist> ref, int query) {
        }
 }
 /**************************************************************************************************/
-void Ccode::findVarianceRef (int query) {
+void Ccode::findVarianceRef() {
        try {
                
-               varRef[query].resize(windows[query].size(), 0);
-               sdRef[query].resize(windows[query].size(), 0);
+               varRef.resize(windows.size(), 0);
+               sdRef.resize(windows.size(), 0);
                
                //for each window
-               for (int i = 0; i < windows[query].size(); i++) {
-                       varRef[query][i] = (sumSquaredRef[query][i] - ((sumRef[query][i]*sumRef[query][i])/(float)refCombo[query])) / (float)(refCombo[query]-1);
-                       sdRef[query][i] = sqrt(varRef[query][i]);
+               for (int i = 0; i < windows.size(); i++) {
+                       varRef[i] = (sumSquaredRef[i] - ((sumRef[i]*sumRef[i])/(float)refCombo)) / (float)(refCombo-1);
+                       sdRef[i] = sqrt(varRef[i]);
                        
                        //set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation
-                       if (averageRef[query][i] < 0.001)                       {       averageRef[query][i] = 0.001;           }
-                       if (sumRef[query][i] < 0.001)                           {       sumRef[query][i] = 0.001;                       }
-                       if (varRef[query][i] < 0.001)                           {       varRef[query][i] = 0.001;                       }
-                       if (sumSquaredRef[query][i] < 0.001)            {       sumSquaredRef[query][i] = 0.001;        }
-                       if (sdRef[query][i] < 0.001)                            {       sdRef[query][i] = 0.001;                        }
+                       if (averageRef[i] < 0.001)                      {       averageRef[i] = 0.001;          }
+                       if (sumRef[i] < 0.001)                          {       sumRef[i] = 0.001;                      }
+                       if (varRef[i] < 0.001)                          {       varRef[i] = 0.001;                      }
+                       if (sumSquaredRef[i] < 0.001)           {       sumSquaredRef[i] = 0.001;       }
+                       if (sdRef[i] < 0.001)                           {       sdRef[i] = 0.001;                       }
                                
                }
        }
@@ -820,22 +717,22 @@ void Ccode::findVarianceRef (int query) {
        }
 }
 /**************************************************************************************************/
-void Ccode::findVarianceQuery (int query) {
+void Ccode::findVarianceQuery() {
        try {
-               varQuery[query].resize(windows[query].size(), 0);
-               sdQuery[query].resize(windows[query].size(), 0);
+               varQuery.resize(windows.size(), 0);
+               sdQuery.resize(windows.size(), 0);
                
                //for each window
-               for (int i = 0; i < windows[query].size(); i++) {
-                       varQuery[query][i] = (sumSquaredQuery[query][i] - ((sumQuery[query][i]*sumQuery[query][i])/(float) closest[query].size())) / (float) (closest[query].size()-1);
-                       sdQuery[query][i] = sqrt(varQuery[query][i]);
+               for (int i = 0; i < windows.size(); i++) {
+                       varQuery[i] = (sumSquaredQuery[i] - ((sumQuery[i]*sumQuery[i])/(float) closest.size())) / (float) (closest.size()-1);
+                       sdQuery[i] = sqrt(varQuery[i]);
                        
                        //set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation
-                       if (averageQuery[query][i] < 0.001)                     {       averageQuery[query][i] = 0.001;         }
-                       if (sumQuery[query][i] < 0.001)                         {       sumQuery[query][i] = 0.001;                     }
-                       if (varQuery[query][i] < 0.001)                         {       varQuery[query][i] = 0.001;                     }
-                       if (sumSquaredQuery[query][i] < 0.001)          {       sumSquaredQuery[query][i] = 0.001;      }
-                       if (sdQuery[query][i] < 0.001)                          {       sdQuery[query][i] = 0.001;                      }
+                       if (averageQuery[i] < 0.001)                    {       averageQuery[i] = 0.001;                }
+                       if (sumQuery[i] < 0.001)                                {       sumQuery[i] = 0.001;                    }
+                       if (varQuery[i] < 0.001)                                {       varQuery[i] = 0.001;                    }
+                       if (sumSquaredQuery[i] < 0.001)         {       sumSquaredQuery[i] = 0.001;     }
+                       if (sdQuery[i] < 0.001)                         {       sdQuery[i] = 0.001;                     }
                }
 
        }
@@ -845,44 +742,44 @@ void Ccode::findVarianceQuery (int query) {
        }
 }
 /**************************************************************************************************/
-void Ccode::determineChimeras (int query) {
+void Ccode::determineChimeras() {
        try {
                
-               isChimericConfidence[query].resize(windows[query].size(), false);
-               isChimericTStudent[query].resize(windows[query].size(), false);
-               isChimericANOVA[query].resize(windows[query].size(), false);
-               anova[query].resize(windows[query].size());
+               isChimericConfidence.resize(windows.size(), false);
+               isChimericTStudent.resize(windows.size(), false);
+               isChimericANOVA.resize(windows.size(), false);
+               anova.resize(windows.size());
 
                
                //for each window
-               for (int i = 0; i < windows[query].size(); i++) {
+               for (int i = 0; i < windows.size(); i++) {
                
                        //get confidence limits
-                       float t = getT(closest[query].size()-1);  //how many seqs you are comparing to this querySeq
-                       float dsUpper = (averageQuery[query][i] + (t * sdQuery[query][i])) / averageRef[query][i]; 
-                       float dsLower = (averageQuery[query][i] - (t * sdQuery[query][i])) / averageRef[query][i]; 
-//cout << t << '\t' << "ds upper = " << dsUpper << " dsLower = " << dsLower << endl;                   
-                       if ((dsUpper > 1.0) && (dsLower > 1.0) && (averageQuery[query][i] > averageRef[query][i])) {  /* range does not include 1 */
-                                       isChimericConfidence[query][i] = true;   /* significantly higher at P<0.05 */ 
-//cout << i << " made it here" << endl;
+                       float t = getT(closest.size()-1);  //how many seqs you are comparing to this querySeq
+                       float dsUpper = (averageQuery[i] + (t * sdQuery[i])) / averageRef[i]; 
+                       float dsLower = (averageQuery[i] - (t * sdQuery[i])) / averageRef[i]; 
+               
+                       if ((dsUpper > 1.0) && (dsLower > 1.0) && (averageQuery[i] > averageRef[i])) {  /* range does not include 1 */
+                                       isChimericConfidence[i] = true;   /* significantly higher at P<0.05 */ 
+
                        }
                        
                        //student t test
-                       int degreeOfFreedom = refCombo[query] + closest[query].size() - 2;
-                       float denomForT = (((refCombo[query]-1) * varQuery[query][i] + (closest[query].size() - 1) * varRef[query][i]) / (float) degreeOfFreedom) * ((refCombo[query] + closest[query].size()) / (float) (refCombo[query] * closest[query].size()));    /* denominator, without sqrt(), for ts calculations */
+                       int degreeOfFreedom = refCombo + closest.size() - 2;
+                       float denomForT = (((refCombo-1) * varQuery[i] + (closest.size() - 1) * varRef[i]) / (float) degreeOfFreedom) * ((refCombo + closest.size()) / (float) (refCombo * closest.size()));    /* denominator, without sqrt(), for ts calculations */
                                
-                       float ts = fabs((averageQuery[query][i] - averageRef[query][i]) / (sqrt(denomForT)));  /* value of ts for t-student test */     
+                       float ts = fabs((averageQuery[i] - averageRef[i]) / (sqrt(denomForT)));  /* value of ts for t-student test */   
                        t = getT(degreeOfFreedom);
-//cout << i << '\t' << t << '\t' << ts << endl;                        
-                       if ((ts >= t) && (averageQuery[query][i] > averageRef[query][i])) {   
-                               isChimericTStudent[query][i] = true;   /* significantly higher at P<0.05 */ 
+       
+                       if ((ts >= t) && (averageQuery[i] > averageRef[i])) {   
+                               isChimericTStudent[i] = true;   /* significantly higher at P<0.05 */ 
                        }
                
                        //anova test
-                       float value1 = sumQuery[query][i] + sumRef[query][i];
-                       float value2 = sumSquaredQuery[query][i] + sumSquaredRef[query][i];
-                       float value3 = ((sumQuery[query][i]*sumQuery[query][i]) / (float) (closest[query].size())) + ((sumRef[query][i] * sumRef[query][i]) / (float) refCombo[query]);
-                       float value4 = (value1 * value1) / ( (float) (closest[query].size() + refCombo[query]) );
+                       float value1 = sumQuery[i] + sumRef[i];
+                       float value2 = sumSquaredQuery[i] + sumSquaredRef[i];
+                       float value3 = ((sumQuery[i]*sumQuery[i]) / (float) (closest.size())) + ((sumRef[i] * sumRef[i]) / (float) refCombo);
+                       float value4 = (value1 * value1) / ( (float) (closest.size() + refCombo) );
                        float value5 = value2 - value4;
                        float value6 = value3 - value4;
                        float value7 = value5 - value6;
@@ -891,13 +788,13 @@ void Ccode::determineChimeras (int query) {
                        
                        float f = getF(degreeOfFreedom);
                        
-                        if ((anovaValue >= f) && (averageQuery[query][i] > averageRef[query][i]))  {
-                      isChimericANOVA[query][i] = true;   /* significant P<0.05 */
+                        if ((anovaValue >= f) && (averageQuery[i] > averageRef[i]))  {
+                      isChimericANOVA[i] = true;   /* significant P<0.05 */
                }
                        
                        if (isnan(anovaValue) || isinf(anovaValue)) { anovaValue = 0.0; }
                        
-                       anova[query][i] = anovaValue;
+                       anova[i] = anovaValue;
                }
                
        }
@@ -1009,597 +906,6 @@ float Ccode::getF(int numseq) {
                exit(1);
        }
 }
-
-/**************************************************************************************************/
-void Ccode::createProcessesClosest() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                               
-                               mothurOut("Finding top matches for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               closest = findClosest(lines[process]->start, lines[process]->end, numWanted);
-                               mothurOut("Done finding top matches for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                                                       
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                        for (int j = 0; j < closest[i].size(); j++) {
-                                               closest[i][j].seq->printSequence(out);
-                                        }
-                               }
-                               out << ">" << endl; //to stop sequence read
-                               
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                        for (int j = 0; j < closest[i].size(); j++) {
-                                               out << closest[i][j].dist << '\t';
-                                        }
-                                        out << endl;
-                               }
-                               out.close();
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-               
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-                       
-                       vector< vector<Sequence*> > tempClosest;  tempClosest.resize(querySeqs.size());
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               vector<Sequence*> tempVector;
-       
-                               for (int j = 0; j < numWanted; j++) {
-                               
-                                       Sequence* temp = new Sequence(in);
-                                       gobble(in);
-                                               
-                                       tempVector.push_back(temp);
-                               }
-                               
-                               tempClosest[k] = tempVector;
-                       }
-                       
-                       string junk;
-                       in >> junk;  gobble(in);  // to get ">"
-               
-                       vector< vector<float> > dists; dists.resize(querySeqs.size());
-                       
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               dists[k].resize(numWanted);
-                               for (int j = 0; j < numWanted; j++) {
-                                       in >> dists[k][j];
-                               }
-                               gobble(in);
-                       
-                       } 
-
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               closest[k].resize(numWanted);
-                               for (int j = 0; j < closest[k].size(); j++) {
-                                       closest[k][j].seq = tempClosest[k][j];
-                                       closest[k][j].dist = dists[k][j]; 
-                               }
-                       } 
-
-                       in.close();
-                       remove(s.c_str());
-               }
-                       
-       
-#else
-               closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
-#endif 
-
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "createProcessesClosest");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-void Ccode::createProcessesRemoveBad() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                                                       
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       removeBadReferenceSeqs(closest[i], i);
-                               }
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << closest[i].size() << endl;
-                                        for (int j = 0; j < closest[i].size(); j++) {
-                                               closest[i][j].seq->printSequence(out);
-                                        }
-                                        out << ">" << endl; //to stop sequence read
-                               }
-                               
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                        for (int j = 0; j < closest[i].size(); j++) {
-                                               out << closest[i][j].dist << '\t';
-                                        }
-                                        out << endl;
-                               }
-
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-       
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-               
-                       vector< vector<Sequence*> > tempClosest;  tempClosest.resize(querySeqs.size());
-                       vector<int> sizes; 
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-               
-                               int num;
-                               in >> num; 
-                               sizes.push_back(num);
-                               gobble(in);
-                               
-                               vector<Sequence*> tempVector;
-                               
-                               for (int j = 0; j < num; j++) {
-                               
-                                       Sequence* temp = new Sequence(in);
-                                       gobble(in);
-                                               
-                                       tempVector.push_back(temp);
-                               }
-                               string junk;
-                               in >> junk;  gobble(in);  // to get ">"
-
-                               tempClosest[k] = tempVector;
-                       }
-                       
-                       vector< vector<float> > dists; dists.resize(querySeqs.size());
-                       int count = 0;
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               dists[k].resize(sizes[count]);
-                               for (int j = 0; j < sizes[count]; j++) {
-                                       in >> dists[k][j];
-                               }
-                               gobble(in);
-                               count++;
-                       } 
-                       
-                       count = 0;
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               for (int j = 0; j < sizes[count]; j++) {
-                                       closest[k][j].seq = tempClosest[k][j];
-                                       closest[k][j].dist = dists[k][j]; 
-                               }
-                               count++;
-                       } 
-
-                       in.close();
-                       remove(s.c_str());
-               }
-#else
-               for (int i = 0; i < closest.size(); i++) {
-                       removeBadReferenceSeqs(closest[i], i);
-               }
-#endif         
-       
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "createProcessesRemoveBad");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-void Ccode::createProcessesAverages() {
-       try {
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                                                       
-                               //find the averages for each querys references 
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       getAverageRef(closest[i], i);  //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
-                               }
-                               
-                               //find the averages for the query 
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       getAverageQuery(closest[i], i);  //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
-                               }
-                               
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sumRef[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << averageRef[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sumSquaredRef[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sumQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << averageQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sumSquaredQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       out << refCombo[i] << endl;
-                               }
-
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-       
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-               
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               sumRef[k].resize(windows[k].size());
-                               averageRef[k].resize(windows[k].size());
-                               sumSquaredRef[k].resize(windows[k].size());
-                               averageQuery[k].resize(windows[k].size());
-                               sumQuery[k].resize(windows[k].size());
-                               sumSquaredQuery[k].resize(windows[k].size());
-                               
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sumRef[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> averageRef[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sumSquaredRef[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sumQuery[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> averageQuery[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sumSquaredQuery[k][j];
-                               }
-                               gobble(in);
-                               in >> refCombo[k];
-                               gobble(in);
-                       }
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-#else
-               //find the averages for each querys references 
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       getAverageRef(closest[i], i);  //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
-               }
-       
-               //find the averages for the query 
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       getAverageQuery(closest[i], i);  //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
-               }
-
-#endif         
-       
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "createProcessesAverages");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-void Ccode::createProcessesVariances() {
-       try {
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                                                       
-                               //find the averages for each querys references 
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       findVarianceRef(i);  //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                               }
-                               
-                               //find the averages for the query 
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       findVarianceQuery(i);  //fills varQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                               }
-                               
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << varRef[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sdRef[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << varQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sdQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                               }
-
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-       
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-               
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               varRef[k].resize(windows[k].size());
-                               sdRef[k].resize(windows[k].size());
-                               varQuery[k].resize(windows[k].size());
-                               sdQuery[k].resize(windows[k].size());
-                                                               
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> varRef[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sdRef[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> varQuery[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sdQuery[k][j];
-                               }
-                               gobble(in);
-                       }
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-#else
-                       //find the averages for each querys references 
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               findVarianceRef(i);  //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                       }
-                       
-                       //find the averages for the query 
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               findVarianceQuery(i);  //fills v arQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                       }
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "createProcessesVariances");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-void Ccode::createProcessesDetermine() {
-       try {
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                                                       
-                               //find the averages for each querys references 
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       determineChimeras(i);  //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i]. 
-                               }
-
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << anova[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << isChimericConfidence[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << isChimericTStudent[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << isChimericANOVA[i][j] << '\t';
-                                       }
-                                       out << endl;
-                               }
-
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-       
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-               
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               anova[k].resize(windows[k].size());
-                               isChimericConfidence[k].resize(windows[k].size(), false);
-                               isChimericTStudent[k].resize(windows[k].size(), false);
-                               isChimericANOVA[k].resize(windows[k].size(), false);
-                               int tempBool;
-                                                               
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> anova[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> tempBool;
-                                       if (tempBool == 1) {  isChimericConfidence[k][j] = true;  }
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> tempBool;
-                                       if (tempBool == 1) {  isChimericTStudent[k][j] = true;  }
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> tempBool;
-                                       if (tempBool == 1) {  isChimericANOVA[k][j] = true;  }
-                               }
-                               gobble(in);
-                       }
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-       #else
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               determineChimeras(i);  //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
-                       }
-       #endif          
-
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "createProcessesDetermine");
-               exit(1);
-       }
-}
 //***************************************************************************************************************
 
 
diff --git a/ccode.h b/ccode.h
index 52592f9c8e19ef4ef80e26b62199e3c9daf489d8..c60b1302f9802af48e3d34f847ab67e8d425cf52 100644 (file)
--- a/ccode.h
+++ b/ccode.h
 class Ccode : public Chimera {
        
        public:
-               Ccode(string, string, string);  
+               Ccode(string, string);  
                ~Ccode();
                
-               int getChimeras();
+               int getChimeras(Sequence* query);
                void print(ostream&);
-               
-               void setCons(string c)          {}
-               void setQuantiles(string q) {}
-               
-               
+               void printHeader(ostream&);             
        private:
        
                Dist* distCalc;
                DeCalculator* decalc;
                int iters;
-               string fastafile, templateFile;
+               string fastafile, mapInfo;
                
+               Sequence* querySeq;
                
-               vector<linePair*> lines;
-               vector<Sequence*> querySeqs;
-               vector<Sequence*> templateSeqs;
-               vector< map<int, int> > spotMap;
+               map<int, int> spotMap;
                map<int, int>::iterator it;
                
-               vector< vector<int> > windows; //windows[0] is the vector of window breaks for querySeqs[0]
-               vector<int> windowSizes;  //windowSizes[0] is the size of the windows for querySeqs[0]
-               vector< map<int, int> > trim;  //trim[0] is the map containing the starting and ending positions for querySeqs[0] set of seqs
-               vector< vector<SeqDist> > closest;  //closest[0] is a vector of sequence at are closest to queryseqs[0]...
-               vector< vector<float> > averageRef;  //averageRef[0] is the average distance at each window for the references for querySeqs[0]
-               vector< vector<float> > averageQuery;  //averageQuery[0] is the average distance at each winow for the query for querySeqs[0]
-               vector< vector<float> >  sumRef;  //sumRef[0] is the sum of distances at each window for the references for querySeqs[0]
-               vector< vector<float> >  sumSquaredRef;  //sumSquaredRef[0] is the sum of squared distances at each window for the references for querySeqs[0]
-               vector< vector<float> > sumQuery;  //sumQuery[0] is the sum of distances at each window for the comparison of query to references for querySeqs[0]
-               vector< vector<float> >  sumSquaredQuery;  //sumSquaredQuery[0] is the sum of squared distances at each window for the comparison of query to references for querySeqs[0]
-               vector< vector<float> > varRef;  //varRef[0] is the variance among references seqs at each window for querySeqs[0]
-               vector< vector<float> > varQuery;  //varQuery[0] is the variance among references and querySeqs[0] at each window
-               vector< vector<float> > sdRef;  //sdRef[0] is the standard deviation of references seqs at each window for querySeqs[0]
-               vector< vector<float> > sdQuery;  //sdQuery[0] is the standard deviation of references and querySeqs[0] at each window
-               vector< vector<float> > anova;  //anova[0] is the vector of anova scores for each window for querySeqs[0]
-               vector<int> refCombo;  //refCombo[0] is the number of reference sequences combinations for querySeqs[0]
-               vector< vector<bool> > isChimericConfidence;  //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
-               vector< vector<bool> > isChimericTStudent;  //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
-               vector< vector<bool> > isChimericANOVA;  //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
+               vector<int>  windows; //windows is the vector of window breaks for query
+               int windowSizes;  //windowSizes is the size of the windows for query
+               map<int, int> trim;  //trim is the map containing the starting and ending positions for query
+               vector<SeqDist>  closest;  //closest is a vector of sequence at are closest to query
+               vector<float>  averageRef;  //averageRef is the average distance at each window for the references for query
+               vector<float>  averageQuery;  //averageQuery is the average distance at each winow for the query for query
+               vector<float>   sumRef;  //sumRef is the sum of distances at each window for the references for query
+               vector<float>   sumSquaredRef;  //sumSquaredRef is the sum of squared distances at each window for the references for query
+               vector<float> sumQuery;  //sumQuery is the sum of distances at each window for the comparison of query to references for query
+               vector<float>  sumSquaredQuery;  //sumSquaredQuery is the sum of squared distances at each window for the comparison of query to references for query
+               vector<float> varRef;  //varRef is the variance among references seqs at each window for query
+               vector<float> varQuery;  //varQuery is the variance among references and query at each window
+               vector<float> sdRef;  //sdRef is the standard deviation of references seqs at each window for query
+               vector<float> sdQuery;  //sdQuery is the standard deviation of references and query at each window
+               vector<float> anova;  //anova is the vector of anova scores for each window for query
+               int refCombo;  //refCombo is the number of reference sequences combinations for query
+               vector<bool>  isChimericConfidence;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
+               vector<bool>  isChimericTStudent;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
+               vector<bool>  isChimericANOVA;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
                
-               vector< vector<SeqDist> > findClosest(int, int, int); 
-               void removeBadReferenceSeqs(vector<SeqDist>&, int);  //removes sequences from closest that are to different of too similar to eachother. 
-               void trimSequences(int);
-               vector<int> findWindows(int);
-               void getAverageRef(vector<SeqDist>, int);               //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
-               void getAverageQuery (vector<SeqDist>, int);    //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
-               void findVarianceRef (int);                                             //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-               void findVarianceQuery (int);                                   //fills varQuery[i] and sdQuery[i]
-               void determineChimeras (int);                                   //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
+               vector<SeqDist>  findClosest(Sequence*, int); 
+               void removeBadReferenceSeqs(vector<SeqDist>&);  //removes sequences from closest that are to different of too similar to eachother. 
+               void trimSequences(Sequence*);
+               vector<int> findWindows();
+               void getAverageRef(vector<SeqDist>);            //fills sumRef, averageRef, sumSquaredRef and refCombo.
+               void getAverageQuery (vector<SeqDist>, Sequence*);      //fills sumQuery, averageQuery, sumSquaredQuery.
+               void findVarianceRef ();                                                //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0.
+               void findVarianceQuery ();                                      //fills varQuery and sdQuery
+               void determineChimeras ();                                      //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA.
                
                int getDiff(string, string);  //return number of mismatched bases, a gap to base is not counted as a mismatch
                float getT(int); 
                float getF(int); 
-               
-               void createProcessesClosest();
-               void createProcessesRemoveBad();
-               void createProcessesAverages();
-               void createProcessesVariances();
-               void createProcessesDetermine();
-                               
 };
 
 /***********************************************************/
index 4ae4991c8d78ebe7a90920972175554dd83b6d36..bf16de4e34b795694d1a894ea1005f32353be110 100644 (file)
@@ -13,7 +13,7 @@
 //this is a vertical soft filter
 void Chimera::createFilter(vector<Sequence*> seqs) {
        try {
-               
+               filterString = "";
                int threshold = int (0.5 * seqs.size());
 //cout << "threshhold = " << threshold << endl;
                
@@ -60,33 +60,29 @@ void Chimera::createFilter(vector<Sequence*> seqs) {
        }
 }
 //***************************************************************************************************************
-void Chimera::runFilter(vector<Sequence*> seqs) {
+void Chimera::runFilter(Sequence* seq) {
        try {
                
-               //for each sequence
-               for (int i = 0; i < seqs.size(); i++) {
-               
-                       string seqAligned = seqs[i]->getAligned();
-                       string newAligned = "";
+               string seqAligned = seq->getAligned();
+               string newAligned = "";
                        
-                       for (int j = 0; j < seqAligned.length(); j++) {
-                               //if this spot is a gap
-                               if (filterString[j] == '1') { newAligned += seqAligned[j]; }
-                       }
-                       
-                       seqs[i]->setAligned(newAligned);
+               for (int j = 0; j < seqAligned.length(); j++) {
+                       //if this spot is a gap
+                       if (filterString[j] == '1') { newAligned += seqAligned[j]; }
                }
-               
+                       
+               seq->setAligned(newAligned);
        }
        catch(exception& e) {
                errorOut(e, "Chimera", "runFilter");
                exit(1);
        }
 }
-
 //***************************************************************************************************************
 vector<Sequence*> Chimera::readSeqs(string file) {
        try {
+       
+               mothurOut("Reading sequences... "); cout.flush();
                ifstream in;
                openInputFile(file, in);
                vector<Sequence*> container;
@@ -108,6 +104,8 @@ vector<Sequence*> Chimera::readSeqs(string file) {
                }
                
                in.close();
+               mothurOut("Done."); mothurOutEndLine();
+               
                return container;
        }
        catch(exception& e) {
@@ -187,6 +185,31 @@ vector< vector<float> > Chimera::readQuantiles() {
        }
 }
 //***************************************************************************************************************
+Sequence* Chimera::getSequence(string name) {
+       try{
+               Sequence* temp;
+               
+               //look through templateSeqs til you find it
+               int spot = -1;
+               for (int i = 0; i < templateSeqs.size(); i++) {
+                       if (name == templateSeqs[i]->getName()) {  
+                               spot = i;
+                               break;
+                       }
+               }
+               
+               if(spot == -1) { mothurOut("Error: Could not find sequence."); mothurOutEndLine(); return NULL; }
+               
+               temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
+               
+               return temp;
+       }
+       catch(exception& e) {
+               errorOut(e, "Chimera", "getSequence");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
 
 
 
index 8b3b0eb713beb96cc391f18a733029298508145a..a968a02fa61cb06d7bef2d92386db5d55cf020f3 100644 (file)
--- a/chimera.h
+++ b/chimera.h
@@ -14,7 +14,7 @@
 #include "mothur.h"
 #include "sequence.hpp"
 
-
+/***********************************************************************/
 struct Preference {
                string name;
                vector<string> leftParent; //keep the name of closest left associated with the two scores
@@ -25,12 +25,52 @@ struct Preference {
                int midpoint;
 
 };
+/***********************************************************************/
+struct score_struct {
+       int prev;
+       int score;
+       int row;
+       int col;
+};
+/***********************************************************************/
+struct trace_struct {
+       int col;
+       int oldCol;
+       int row;
+};
+/***********************************************************************/
+struct results {
+       int regionStart;
+       int regionEnd;
+       int nastRegionStart;
+       int nastRegionEnd;
+       string parent;
+       float queryToParent;
+       float queryToParentLocal;
+       float divR;
+};
+/***********************************************************************/
+struct rank {
+               int num;
+               float score;
+               rank(int n, float s) : num(n), score(s) {}
+};
+/***********************************************************************/
 
 struct SeqDist {
        Sequence* seq;
        float dist;
 };
-
+//********************************************************************************************************************
+//sorts highest to lowest
+inline bool compareMembers(rank left, rank right){
+       return (left.score > right.score);      
+} 
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareRegionStart(results left, results right){
+       return (left.nastRegionStart < right.nastRegionStart);  
+} 
 //********************************************************************************************************************
 //sorts lowest to highest
 inline bool compareSeqDist(SeqDist left, SeqDist right){
@@ -59,8 +99,8 @@ class Chimera {
        public:
        
                Chimera(){};
+               Chimera(string);
                Chimera(string, string);
-               Chimera(string, string, string);
                virtual ~Chimera(){};
                virtual void setFilter(bool f)                  {       filter = f;                     }
                virtual void setCorrection(bool c)              {       correction = c;         }
@@ -76,30 +116,37 @@ class Chimera {
                virtual void setDivR(float d)                   {       divR = d;                       }
                virtual void setParents(int p)                  {       parents = p;            }
                virtual void setMinSim(int s)                   {       minSim = s;                     }
+               virtual void setMinCoverage(int c)              {       minCov = c;                     }
+               virtual void setMinBS(int b)                    {       minBS = b;                      }
+               virtual void setMinSNP(int s)                   {       minSNP = s;                     }
                virtual void setIters(int i)                    {       iters = i;                      }
-
+               virtual void setTemplateSeqs(vector<Sequence*> t)       {       templateSeqs = t;       }
+               virtual bool getUnaligned()                             {       return unaligned;                       }
+               virtual void setTemplateFile(string t)  {   templateFileName = t;       }
                
                virtual void setCons(string){};
                virtual void setQuantiles(string){};
+               virtual void doPrep(){};
                virtual vector<Sequence*> readSeqs(string);
                virtual vector< vector<float> > readQuantiles();
                virtual void setMask(string);
-               virtual void runFilter(vector<Sequence*>);
+               virtual void runFilter(Sequence*);
                virtual void createFilter(vector<Sequence*>);
                
+               virtual void printHeader(ostream&){};
+               virtual int getChimeras(Sequence*){ return 0; }
+               virtual int getChimeras(){ return 0; }
+               virtual void print(ostream&){}; 
                
-               //pure functions
-               virtual int getChimeras() = 0;  
-               virtual void print(ostream&) = 0;       
                
        protected:
                
+               vector<Sequence*> templateSeqs;
                bool filter, correction, svg, unaligned;
-               int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents, iters;
+               int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters;
                float divR;
-               string seqMask, quanfile, filterString, name, outputDir;
-                       
-
+               string seqMask, quanfile, filterString, name, outputDir, templateFileName;
+               Sequence* getSequence(string);  //find sequence from name       
 };
 
 /***********************************************************************/
index 04c143fecc50207a682f033567372959bcb831be..39ed4870fd1f6668839b3abf6299b537dc4c3ebf 100644 (file)
 #include "chimeracheckrdp.h"
                
 //***************************************************************************************************************
-ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string o) {  fastafile = filename;  templateFile = temp; outputDir = o;  }
+ChimeraCheckRDP::ChimeraCheckRDP(string filename,  string o) {  fastafile = filename;  outputDir = o;  }
 //***************************************************************************************************************
 
 ChimeraCheckRDP::~ChimeraCheckRDP() {
        try {
-               for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
                delete templateDB;
                delete kmer;
        }
@@ -28,113 +27,65 @@ ChimeraCheckRDP::~ChimeraCheckRDP() {
 void ChimeraCheckRDP::print(ostream& out) {
        try {
                
-               mothurOutEndLine();
-               
-               //vector<bool> isChimeric;  isChimeric.resize(querySeqs.size(), false);
+               mothurOut("Processing: " + querySeq->getName()); mothurOutEndLine();
                
-               for (int i = 0; i < querySeqs.size(); i++) {
+               out << querySeq->getName() << endl;
+               out << "IS scores: " << '\t';
                        
-                               out << querySeqs[i]->getName() << endl;
-                               out << "IS scores: " << '\t';
-                               
-                               //int lastChimericWindowFound = 0;
-                               
-                               for (int k = 0; k < IS[i].size(); k++) {
-                                       out << IS[i][k].score << '\t'; 
-                                       //if (IS[i][k].score > chimeraCutoff) {  isChimeric[i] = true;   lastChimericWindowFound = k;           }                       
-                               }
-                               out << endl;
-                               //if (isChimeric[i]) { 
-                                       //mothurOut(querySeqs[i]->getName() + "\tIS: " + toString(IS[i][lastChimericWindowFound].score) + "\tbreakpoint: " + toString(IS[i][lastChimericWindowFound].midpoint) + "\tleft parent: " + IS[i][lastChimericWindowFound].leftParent + "\tright parent: " + IS[i][lastChimericWindowFound].rightParent); mothurOutEndLine();
-                                       //out << endl << "chimera: YES" << endl;
-                               //}else{
-                                       //out << endl << "chimera: NO" << endl;
-                               //}
+               for (int k = 0; k < IS.size(); k++) {
+                       out << IS[k].score << '\t'; 
+               }
+               out << endl;
+               
+               if (svg) {
+                       if (name != "") { //if user has specific names
+                               map<string, string>::iterator it = names.find(querySeq->getName());
                                
-                               if (svg) {
-                                       
-                                       if (name != "") { //if user has specific names
-                                               map<string, string>::iterator it = names.find(querySeqs[i]->getName());
-                                       
-                                               if (it != names.end()) { //user wants pic of this
-                                                       makeSVGpic(IS[i], i);  //zeros out negative results
-                                               }
-                                       }else{//output them all
-                                               makeSVGpic(IS[i], i);  //zeros out negative results
-                                       }
+                               if (it != names.end()) { //user wants pic of this
+                                       makeSVGpic(IS);  //zeros out negative results
                                }
+                       }else{//output them all
+                               makeSVGpic(IS);  //zeros out negative results
+                       }
                }
-               
-               mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine();
        }
        catch(exception& e) {
                errorOut(e, "ChimeraCheckRDP", "print");
                exit(1);
        }
 }
-
 //***************************************************************************************************************
-int ChimeraCheckRDP::getChimeras() {
+void ChimeraCheckRDP::doPrep() {
        try {
-               
-               //read in query sequences and subject sequences
-               mothurOutEndLine();
-               mothurOut("Reading query sequences... "); cout.flush();
-               querySeqs = readSeqs(fastafile);
-               mothurOut("Done."); 
-               //templateSeqs = readSeqs(templateFile);
-               templateDB = new AlignmentDB(templateFile, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
+               templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
                mothurOutEndLine();
                
-               int numSeqs = querySeqs.size();
-               
-               IS.resize(numSeqs);
-               closest.resize(numSeqs);
-               
-               //break up file if needed
-               int linesPerProcess = numSeqs / processors ;
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       //find breakup of sequences for all times we will Parallelize
-                       if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
-                       else {
-                               //fill line pairs
-                               for (int i = 0; i < (processors-1); i++) {                      
-                                       lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
-                               }
-                               //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
-                               int i = processors - 1;
-                               lines.push_back(new linePair((i*linesPerProcess), numSeqs));
-                       }
-                       
-               #else
-                       lines.push_back(new linePair(0, numSeqs));
-               #endif
-               
                kmer = new Kmer(kmerSize);
                
                if (name != "") { 
                        readName(name);  //fills name map with names of seqs the user wants to have .svg for.  
                }
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraCheckRDP", "doPrep");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int ChimeraCheckRDP::getChimeras(Sequence* query) {
+       try {
                
-               //find closest seq to each querySeq
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       closest[i] = templateDB->findClosestSequence(querySeqs[i]);  
-               }
-
-               //for each query find IS value  
-               if (processors == 1) {
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               IS[i] = findIS(i); 
-                       }
-               }else {         createProcessesIS();    }
-               
+               IS.clear();
+                               
+               querySeq = query;
+                       
+               closest = templateDB->findClosestSequence(query);  
+       
+               IS = findIS(); 
+                                       
                //determine chimera report cutoff - window score above 95%
                //getCutoff();  - not very acurate predictor
                
-               //free memory
-               for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];        }
-       
                return 0;
        }
        catch(exception& e) {
@@ -143,7 +94,7 @@ int ChimeraCheckRDP::getChimeras() {
        }
 }
 //***************************************************************************************************************
-vector<sim> ChimeraCheckRDP::findIS(int query) {
+vector<sim> ChimeraCheckRDP::findIS() {
        try {
                
                
@@ -154,13 +105,11 @@ vector<sim> ChimeraCheckRDP::findIS(int query) {
                vector< map<int, int> > subjectKmerInfo;
                
                vector<sim>  isValues;
-               string queryName = querySeqs[query]->getName();
-               string seq = querySeqs[query]->getUnaligned();
-               
-               mothurOut("Finding IS values for sequence " + toString(query+1)); mothurOutEndLine();
+               string queryName = querySeq->getName();
+               string seq = querySeq->getUnaligned();
                
                queryKmerInfo = kmer->getKmerCounts(seq);
-               subjectKmerInfo = kmer->getKmerCounts(closest[query].getUnaligned());
+               subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned());
                
                //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each
                int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]);
@@ -238,7 +187,7 @@ vector<sim> ChimeraCheckRDP::findIS(int query) {
                        
                        delete left;
                        delete right;
-               }       
+               }
                
                return isValues;
        
@@ -309,36 +258,10 @@ int ChimeraCheckRDP::calcKmers(map<int, int> query, map<int, int> subject) {
 }
 
 //***************************************************************************************************************
-void ChimeraCheckRDP::getCutoff() {
-       try{
-               
-               vector<float> temp;
-               
-               //store all is scores for all windows
-               for (int i = 0; i < IS.size(); i++) {
-                       for (int j = 0; j < IS[i].size(); j++) {
-                               temp.push_back(IS[i][j].score);
-                       }
-               }
-               
-               //sort them
-               sort(temp.begin(), temp.end());
-               
-               //get 95%
-               chimeraCutoff = temp[int(temp.size() * 0.95)];
-
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "getCutoff");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-void ChimeraCheckRDP::makeSVGpic(vector<sim> info, int query) {
+void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
        try{
                
-               string file = outputDir + querySeqs[query]->getName() + ".chimeracheck.svg";
+               string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
                ofstream outsvg;
                openOutputFile(file, outsvg);
                
@@ -346,7 +269,7 @@ void ChimeraCheckRDP::makeSVGpic(vector<sim> info, int query) {
                
                outsvg << "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
                outsvg << "<g>\n";
-               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeqs[query]->getName() + "</text>\n";
+               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
                
                outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";  
                outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
@@ -392,96 +315,5 @@ void ChimeraCheckRDP::makeSVGpic(vector<sim> info, int query) {
        }
 }
 //***************************************************************************************************************
-void ChimeraCheckRDP::createProcessesIS() {
-       try {
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                                                       
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       IS[i] = findIS(i);  
-                               }                               
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << IS[i].size() << endl;
-                                       for (int j = 0; j < IS[i].size(); j++) {
-                                               out << IS[i][j].leftParent << '\t'<< IS[i][j].rightParent << '\t' << IS[i][j].midpoint << '\t' << IS[i][j].score << endl;
-                                       }
-                               }
-
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-       
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-               
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                       
-                               int size;
-                               in >> size; gobble(in);
-                               
-                               string left, right;
-                               int mid;
-                               float score;
-                               
-                               IS[k].clear();
-                               
-                               for (int j = 0; j < size; j++) {
-                                       in >> left >> right >> mid >> score;  gobble(in);
-                                       
-                                       sim temp;
-                                       temp.leftParent = left;
-                                       temp.rightParent = right;
-                                       temp.midpoint = mid;
-                                       temp.score = score;
-                                       
-                                       IS[k].push_back(temp);
-                               }
-                       }
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-#else
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               IS[i] = findIS(i);
-                       }
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "createProcessesIS");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
 
 
index 6193d6d64d47a3dcec90d47844fe62d16ccd6fb7..e12e7d3d9d6cf3a08e44d453e7ea5afd2a34f06b 100644 (file)
 class ChimeraCheckRDP : public Chimera {
        
        public:
-               ChimeraCheckRDP(string, string, string);        
+               ChimeraCheckRDP(string, string);        
                ~ChimeraCheckRDP();
                
-               int getChimeras();
+               int getChimeras(Sequence*);
                void print(ostream&);
-               
-               void setCons(string){};
-               void setQuantiles(string q) {};
-               
+               void doPrep();
                
        private:
                
-               vector<linePair*> lines;
-               vector<Sequence*> querySeqs;
+               
+               Sequence* querySeq;
                AlignmentDB* templateDB;
                Kmer* kmer;
-               
-               vector< vector<sim> > IS;  //IS[0] is the vector of IS values for each window for querySeqs[0]
-               float chimeraCutoff;
-               
-               
-               //map<string, vector< map<int, int> > >:: iterator it;
-               //map<int, int>::iterator it2;
-               
-               vector<Sequence> closest;               //closest[0] is the closest overall seq to querySeqs[0].
-               
-               string fastafile, templateFile;
+               Sequence closest;               //closest is the closest overall seq to query
+
+               vector<sim>  IS;  //IS is the vector of IS values for each window for query
+               string fastafile;
                map<string, string> names;
                
-               vector<sim> findIS(int);
+               vector<sim> findIS();
                int calcKmers(map<int, int>, map<int, int>);
-               void getCutoff();
-               void makeSVGpic(vector<sim>, int);
+               void makeSVGpic(vector<sim>);
                void readName(string);
-                               
-               void createProcessesIS();
-               
 };
-
 /***********************************************************/
 
 #endif
diff --git a/chimerarealigner.cpp b/chimerarealigner.cpp
new file mode 100644 (file)
index 0000000..57c3641
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ *  chimerarealigner.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 2/12/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimerarealigner.h"
+#include "needlemanoverlap.hpp"
+#include "nast.hpp"
+
+//***************************************************************************************************************
+ChimeraReAligner::ChimeraReAligner(vector<Sequence*> t, int m, int mm) : match(m), misMatch(mm) {  templateSeqs = t;  }
+//***************************************************************************************************************
+ChimeraReAligner::~ChimeraReAligner() {}       
+//***************************************************************************************************************
+void ChimeraReAligner::reAlign(Sequence* query, vector<results> parents) {
+       try {
+               if (parents.size() != 0) {
+                       vector<Sequence*> queryParts;
+                       vector<Sequence*> parentParts;  //queryParts[0] relates to parentParts[0]
+                       
+                       string qAligned = query->getAligned();
+                       string newQuery = "";
+                       
+                       //sort parents by region start
+                       sort(parents.begin(), parents.end(), compareRegionStart);
+
+                       //make sure you don't cutoff beginning of query 
+                       if (parents[0].nastRegionStart > 0) {  newQuery += qAligned.substr(0, parents[0].nastRegionStart+1);  }
+                       int longest = 0;
+                       //take query and break apart into pieces using breakpoints given by results of parents
+                       for (int i = 0; i < parents.size(); i++) {
+                               int length = parents[i].nastRegionEnd - parents[i].nastRegionStart+1;
+                               string q = qAligned.substr(parents[i].nastRegionStart, length);
+                               Sequence* queryFrag = new Sequence(query->getName(), q);
+                               
+                               queryParts.push_back(queryFrag);
+                               
+                               Sequence* parent = getSequence(parents[i].parent);
+                               string p = parent->getAligned();
+                               p = p.substr(parents[i].nastRegionStart, length);
+                               parent->setAligned(p);
+                               
+                               parentParts.push_back(parent);
+                               
+                               if (q.length() > longest)       { longest = q.length(); }
+                               if (p.length() > longest)       { longest = p.length(); }
+                       }
+                                                                                       
+                       //align each peice to correct parent from results
+                       for (int i = 0; i < queryParts.size(); i++) {
+                               alignment = new NeedlemanOverlap(-2.0, match, misMatch, longest+1); //default gapopen, match, mismatch, longestbase
+                               Nast nast(alignment, queryParts[i], parentParts[i]);
+                               delete alignment;
+                       }
+                                                                                               
+                       //recombine pieces to form new query sequence
+                       for (int i = 0; i < queryParts.size(); i++) {
+                               newQuery += queryParts[i]->getAligned();
+                       }
+                       
+                       //make sure you don't cutoff end of query 
+                       if (parents[parents.size()-1].nastRegionEnd < qAligned.length()) {  newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd-1);  }
+                       
+                       //set query to new aligned string
+                       query->setAligned(newQuery);
+                       
+                       //free memory
+                       for (int i = 0; i < queryParts.size(); i++) { delete queryParts[i];  }
+                       for (int i = 0; i < parentParts.size(); i++) { delete parentParts[i];  }
+                       
+               } //else leave query alone, you have bigger problems...
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraReAligner", "reAlign");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+Sequence* ChimeraReAligner::getSequence(string name) {
+       try{
+               Sequence* temp;
+               
+               //look through templateSeqs til you find it
+               int spot = -1;
+               for (int i = 0; i < templateSeqs.size(); i++) {
+                       if (name == templateSeqs[i]->getName()) {  
+                               spot = i;
+                               break;
+                       }
+               }
+               
+               if(spot == -1) { mothurOut("Error: Could not find sequence."); mothurOutEndLine(); return NULL; }
+               
+               temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
+               
+               return temp;
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraReAligner", "getSequence");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
diff --git a/chimerarealigner.h b/chimerarealigner.h
new file mode 100644 (file)
index 0000000..2b53841
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef CHIMERAREALIGNER_H
+#define CHIMERAREALIGNER_H
+
+/*
+ *  chimerarealigner.h
+ *  Mothur
+ *
+ *  Created by westcott on 2/12/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimera.h"
+#include "alignment.hpp"
+
+/***********************************************************/
+
+class ChimeraReAligner  {
+       
+       public:
+               ChimeraReAligner(vector<Sequence*>, int, int);   
+               ~ChimeraReAligner();
+               
+               void reAlign(Sequence*, vector<results>);
+                               
+       private:
+               Sequence* querySeq;
+               Alignment* alignment;
+               vector<Sequence*> templateSeqs;
+               int match, misMatch;
+               
+               Sequence* getSequence(string);  //find sequence from name
+};
+/***********************************************************/
+
+#endif
+
index b06d3693259c7f2361e428cb965dbd99301ad7d8..e1eebf400c052f7f7926e0029f6090ae0e93ede3 100644 (file)
@@ -26,7 +26,8 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "iters","outputdir","inputdir" };
+                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", 
+                       "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir" };
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -148,7 +149,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        svg = isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "window", false);   
-                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }                     
+                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "50"; }                      
                        else if (temp == "not found") { temp = "0"; }
                        convert(temp, window);
                        
@@ -158,25 +159,37 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        temp = validParameter.validFile(parameters, "mismatch", false);                 if (temp == "not found") { temp = "-4"; }
                        convert(temp, mismatch);
                        
-                       temp = validParameter.validFile(parameters, "divergence", false);               if (temp == "not found") { temp = "1.0"; }
+                       temp = validParameter.validFile(parameters, "divergence", false);               if (temp == "not found") { temp = "1.007"; }
                        convert(temp, divR);
                        
                        temp = validParameter.validFile(parameters, "minsim", false);                   if (temp == "not found") { temp = "90"; }
                        convert(temp, minSimilarity);
                        
-                       temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "5"; }
+                       temp = validParameter.validFile(parameters, "mincov", false);                   if (temp == "not found") { temp = "70"; }
+                       convert(temp, minCoverage);
+                       
+                       temp = validParameter.validFile(parameters, "minbs", false);                    if (temp == "not found") { temp = "90"; }
+                       convert(temp, minBS);
+                       
+                       temp = validParameter.validFile(parameters, "minsnp", false);                   if (temp == "not found") { temp = "10"; }
+                       convert(temp, minSNP);
+
+                       temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "3"; }
                        convert(temp, parents); 
                        
-                       temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
+                       temp = validParameter.validFile(parameters, "iters", false);    
+                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }             
+                       else if (temp == "not found") { temp = "1000"; }
                        convert(temp, iters); 
                         
                        temp = validParameter.validFile(parameters, "increment", false);                
-                       if ((temp == "not found") && ((method == "chimeracheck") || (method == "chimeraslayer"))) { temp = "10"; }
+                       if ((temp == "not found") && (method == "chimeracheck")) { temp = "10"; }
+                       else if ((temp == "not found") && (method == "chimeraslayer")) { temp = "5"; }
                        else if (temp == "not found") { temp = "25"; }
                        convert(temp, increment);
                        
                        temp = validParameter.validFile(parameters, "numwanted", false);
-                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "10"; }              
+                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "15"; }              
                        else if (temp == "not found") { temp = "20"; }
                        convert(temp, numwanted);
 
@@ -216,7 +229,11 @@ void ChimeraSeqsCommand::help(){
                mothurOut("The ksize parameter allows you to input kmersize. \n");
                mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
                mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
-               //mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
+               mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
+               mothurOut("The minsim parameter allows you .... \n");
+               mothurOut("The mincov parameter allows you to specify minimum coverage by closest matches found in template. Default is 70, meaning 70%. \n");
+               mothurOut("The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n");
+               mothurOut("The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n");
                mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
                mothurOut("Details for each method: \n"); 
                mothurOut("\tpintail: \n"); 
@@ -252,22 +269,20 @@ int ChimeraSeqsCommand::execute(){
                
                if (abort == true) { return 0; }
                
+               int start = time(NULL); 
+               
                if (method == "bellerophon")                    {               chimera = new Bellerophon(fastafile, outputDir);                        }
-               else if (method == "pintail")                   {               chimera = new Pintail(fastafile, templatefile, outputDir);      }
-               else if (method == "ccode")                             {               chimera = new Ccode(fastafile, templatefile, outputDir);                        }
-               else if (method == "chimeracheck")              {               chimera = new ChimeraCheckRDP(fastafile, templatefile, outputDir);      }
-               else if (method == "chimeraslayer")             {               chimera = new ChimeraSlayer(fastafile, templatefile);           }
+               else if (method == "pintail")                   {               chimera = new Pintail(fastafile, outputDir);                            }
+               else if (method == "ccode")                             {               chimera = new Ccode(fastafile, outputDir);                                      }
+               else if (method == "chimeracheck")              {               chimera = new ChimeraCheckRDP(fastafile, outputDir);            }
+               else if (method == "chimeraslayer")             {               chimera = new ChimeraSlayer("blast");                                           }
                else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0;          }
                
                //set user options
                if (maskfile == "default") { mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); mothurOutEndLine();  }
                
-               //saves time to avoid generating it
                chimera->setCons(consfile);     
-               
-               //saves time to avoid generating it
                chimera->setQuantiles(quanfile);                                
-               
                chimera->setMask(maskfile);
                chimera->setFilter(filter);
                chimera->setCorrection(correction);
@@ -283,25 +298,126 @@ int ChimeraSeqsCommand::execute(){
                chimera->setDivR(divR);
                chimera->setParents(parents);
                chimera->setMinSim(minSimilarity);
+               chimera->setMinCoverage(minCoverage);
+               chimera->setMinBS(minBS);
+               chimera->setMinSNP(minSNP);
                chimera->setIters(iters);
+               chimera->setTemplateFile(templatefile);
+
                
-                               
-               //find chimeras
-               int error = chimera->getChimeras();
                
-               //there was a problem
-               if (error == 1) {  return 0;  }
+               vector<Sequence*> templateSeqs;
+               if ((method != "bellerophon") && (method != "chimeracheck")) {   
+                       templateSeqs = chimera->readSeqs(templatefile);   
+                       if (chimera->getUnaligned()) { 
+                               mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine(); 
+                               //free memory
+                               for (int i = 0; i < templateSeqs.size(); i++)           {  delete templateSeqs[i];              }
+                               return 0; 
+                       }
+                       
+                       //set options
+                       chimera->setTemplateSeqs(templateSeqs);
 
+               }else if (method == "bellerophon") {//run bellerophon separately since you need to read entire fastafile to run it
+                       chimera->getChimeras();
+                       
+                       string outputFName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
+                       ofstream out;
+                       openOutputFile(outputFName, out);
+                       
+                       chimera->print(out);
+                       out.close();
+                       return 0;
+               }
+               
+               //some methods need to do prep work before processing the chimeras
+               chimera->doPrep(); 
+               
+               ofstream outHeader;
+               string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras.tempHeader";
+               openOutputFile(tempHeader, outHeader);
+               
+               chimera->printHeader(outHeader);
+               outHeader.close();
+               
                string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
-               ofstream out;
-               openOutputFile(outputFileName, out);
+
+               //break up file
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       if(processors == 1){
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               inFASTA.close();
+                               
+                               lines.push_back(new linePair(0, numSeqs));
+                               
+                               driver(lines[0], outputFileName, fastafile);
+                               
+                       }else{
+                               vector<int> positions;
+                               processIDS.resize(0);
+                               
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               
+                               string input;
+                               while(!inFASTA.eof()){
+                                       input = getline(inFASTA);
+                                       if (input.length() != 0) {
+                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                       }
+                               }
+                               inFASTA.close();
+                               
+                               numSeqs = positions.size();
+                               
+                               int numSeqsPerProcessor = numSeqs / processors;
+                               
+                               for (int i = 0; i < processors; i++) {
+                                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                                       if(i == processors - 1){
+                                               numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                                       }
+                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                               }
+                               
+                               
+                               createProcesses(outputFileName, fastafile); 
+                               
+                               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                                               
+                               //append alignment and report files
+                               for(int i=1;i<processors;i++){
+                                       appendOutputFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+                                       remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                               }
+                       }
+
+               #else
+                       ifstream inFASTA;
+                       openInputFile(candidateFileNames[s], inFASTA);
+                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                       inFASTA.close();
+                       lines.push_back(new linePair(0, numSeqs));
+                       
+                       driver(lines[0], outputFileName, fastafile);
+               #endif
                
-               //print results
-               chimera->print(out);
+               //mothurOut("Output File Names: ");
+               //if ((filter) && (method == "bellerophon")) { mothurOut(
+               //if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; }
+               //      else                             { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; }
                
-               out.close();
+               appendOutputFiles(tempHeader, outputFileName);
+               remove(tempHeader.c_str());
+
+               for (int i = 0; i < templateSeqs.size(); i++)           {   delete templateSeqs[i];     }
+               
+               if (method == "chimeracheck") { mothurOutEndLine(); mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine();  }
                
-               delete chimera;
+               mothurOutEndLine(); mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");       mothurOutEndLine();
                
                return 0;
                
@@ -310,6 +426,107 @@ int ChimeraSeqsCommand::execute(){
                errorOut(e, "ChimeraSeqsCommand", "execute");
                exit(1);
        }
+}//**********************************************************************************************************************
+
+int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filename){
+       try {
+               ofstream out;
+               openOutputFile(outputFName, out);
+               
+               ifstream inFASTA;
+               openInputFile(filename, inFASTA);
+
+               inFASTA.seekg(line->start);
+               
+               for(int i=0;i<line->numSeqs;i++){
+               
+                       Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
+                               
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                                       
+                               //find chimeras
+                               chimera->getChimeras(candidateSeq);
+               
+                               //print results
+                               chimera->print(out);
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){   mothurOut("Processing sequence: " + toString(i+1)); mothurOutEndLine();         }
+               }
+               //report progress
+               if((line->numSeqs) % 100 != 0){ mothurOut("Processing sequence: " + toString(line->numSeqs)); mothurOutEndLine();               }
+               
+               out.close();
+               inFASTA.close();
+                               
+               return 1;
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSeqsCommand", "driver");
+               exit(1);
+       }
 }
+
 /**************************************************************************************************/
 
+void ChimeraSeqsCommand::createProcesses(string outputFileName, string filename) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               //              processIDS.resize(0);
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename);
+                               exit(0);
+                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
+               }
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+#endif         
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSeqsCommand", "createProcesses");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void ChimeraSeqsCommand::appendOutputFiles(string temp, string filename) {
+       try{
+               
+               ofstream output;
+               ifstream input;
+               
+               openOutputFileAppend(temp, output);
+               openInputFile(filename, input);
+               
+               while(char c = input.get()){
+                       if(input.eof())         {       break;                  }
+                       else                            {       output << c;    }
+               }
+               
+               input.close();
+               output.close();
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSeqsCommand", "appendOuputFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
index 46e2bc88300c7a3e89f8fb985100d6edd50129a1..bac02852744952477c97c0ea7dc284f56ff589bc 100644 (file)
@@ -26,11 +26,23 @@ public:
        
                
 private:
+
+       struct linePair {
+               int start;
+               int numSeqs;
+               linePair(long int i, int j) : start(i), numSeqs(j) {}
+       };
+       vector<int> processIDS;   //processid
+       vector<linePair*> lines;
        
+       int driver(linePair*, string, string);
+       void createProcesses(string, string);
+       void appendOutputFiles(string, string); 
+
        bool abort;
        string method, fastafile, templatefile, consfile, quanfile, maskfile, namefile, outputDir;
        bool filter, correction, svg, printAll;
-       int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity;
+       int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs;
        float divR;
        Chimera* chimera;
        
index 5d3d6706ede49b38c5385d29c1d127b1488c4000..b2c0a0561a0f6baf035f83fc4113d3a0f16ef643 100644 (file)
  */
 
 #include "chimeraslayer.h"
+#include "chimerarealigner.h"
 
 //***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
+ChimeraSlayer::ChimeraSlayer(string mode) : searchMethod(mode) {       decalc = new DeCalculator();      }
 //***************************************************************************************************************
-
-ChimeraSlayer::~ChimeraSlayer() {
-       try {
-               for (int i = 0; i < querySeqs.size(); i++)                      {  delete querySeqs[i];                 }
-               for (int i = 0; i < templateSeqs.size(); i++)           {  delete templateSeqs[i];              }
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSlayer", "~ChimeraSlayer");
-               exit(1);
-       }
-}      
+ChimeraSlayer::~ChimeraSlayer() {      delete decalc;   }
+//***************************************************************************************************************
+void ChimeraSlayer::printHeader(ostream& out) {
+       mothurOutEndLine();
+       mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
+       mothurOutEndLine();
+}
 //***************************************************************************************************************
 void ChimeraSlayer::print(ostream& out) {
        try {
-               mothurOutEndLine();
-               mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
-               mothurOutEndLine();
-               
-               for (int i = 0; i < querySeqs.size(); i++) {
-               
-                       if (chimeraFlags[i] == "yes") { 
-//                             cout << i << endl;
-                               if ((chimeraResults[i][0].bsa >= 90.0) || (chimeraResults[i][0].bsb >= 90.0)) {
-                                       mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
-                                       out << querySeqs[i]->getName() << "\tyes" << endl;
-                               }else {
-                                       out << querySeqs[i]->getName() << "\tyes" << endl;
-                                       //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
+               if (chimeraFlags == "yes") {
+                       string chimeraFlag = "no";
+                       if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
+                          ||
+                          (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
+                       
+                       
+                       if (chimeraFlag == "yes") {     
+                               if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
+                                       mothurOut(querySeq->getName() + "\tyes"); mothurOutEndLine();
                                }
-
-                               printBlock(chimeraResults[i][0], out, i);
-                               
-                               out << endl;
-                       }else{
-                               out << querySeqs[i]->getName() << "\tno" << endl;
-                               //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
                        }
-               }
-                               
+                       out << querySeq->getName() << "\tyes" << endl;
+                       printBlock(chimeraResults[0], out);
+                       out << endl;
+               }else {  out << querySeq->getName() << "\tno" << endl;  }
+               
        }
        catch(exception& e) {
                errorOut(e, "ChimeraSlayer", "print");
                exit(1);
        }
 }
-
 //***************************************************************************************************************
-int ChimeraSlayer::getChimeras() {
+int ChimeraSlayer::getChimeras(Sequence* query) {
        try {
+               chimeraFlags = "no";
+               querySeq = query;
                
-               //read in query sequences and subject sequences
-               mothurOut("Reading sequences and template file... "); cout.flush();
-               querySeqs = readSeqs(fastafile);
-               templateSeqs = readSeqs(templateFile);
-               mothurOut("Done."); mothurOutEndLine();
-               
-               int numSeqs = querySeqs.size();
-               
-               if (unaligned) { mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine(); return 1;  }
-               
-               chimeraResults.resize(numSeqs);
-               chimeraFlags.resize(numSeqs, "no");
-               spotMap.resize(numSeqs);
-               
-               //break up file if needed
-               int linesPerProcess = numSeqs / processors ;
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       //find breakup of sequences for all times we will Parallelize
-                       if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
-                       else {
-                               //fill line pairs
-                               for (int i = 0; i < (processors-1); i++) {                      
-                                       lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
-                               }
-                               //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
-                               int i = processors - 1;
-                               lines.push_back(new linePair((i*linesPerProcess), numSeqs));
-                       }
-               #else
-                       lines.push_back(new linePair(0, numSeqs));
-               #endif
-               
-               if (seqMask != "") {    decalc = new DeCalculator();    } //to use below
-               
-               //initialize spotMap
-               for (int j = 0; j < numSeqs; j++) {
-                       for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
-                               spotMap[j][i] = i;
-                       }
+               for (int i = 0; i < query->getAligned().length(); i++) {
+                       spotMap[i] = i;
                }
                
                //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
-               maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
-               slayer = new Slayer(window, increment, minSim, divR, iters);
-               
-               for (int i = 0; i < querySeqs.size(); i++) {
+               maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod);
+               slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
                
-                       string chimeraFlag = maligner->getResults(querySeqs[i]);
-                       //float percentIdentical = maligner->getPercentID();
-                       vector<results> Results = maligner->getOutput();
+               string chimeraFlag = maligner->getResults(query, decalc);
+               vector<results> Results = maligner->getOutput();
+
+               //realign query to parents to improve slayers detection rate
+               //ChimeraReAligner realigner(templateSeqs, match, misMatch);
+               //realigner.reAlign(query, Results);
                        
-                       cout << "Processing sequence: " << i+1 << endl;
+                       //if (chimeraFlag == "yes") {
                        
-                       //for (int j = 0; j < Results.size(); j++) {
-                               //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
-                       //}
+               //get sequence that were given from maligner results
+               vector<SeqDist> seqs;
+               map<string, float> removeDups;
+               map<string, float>::iterator itDup;
+               for (int j = 0; j < Results.size(); j++) {
+                       float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
+                       //only add if you are not a duplicate
+                       itDup = removeDups.find(Results[j].parent);
+                       if (itDup == removeDups.end()) { //this is not duplicate
+                               removeDups[Results[j].parent] = dist;
+                       }else if (dist > itDup->second) { //is this a stronger number for this parent
+                               removeDups[Results[j].parent] = dist;
+                       }
+               }
+               
+               for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
+                       Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
                        
-                       //if (chimeraFlag == "yes") {
+                       SeqDist member;
+                       member.seq = seq;
+                       member.dist = itDup->second;
                        
-                               //get sequence that were given from maligner results
-                               vector<SeqDist> seqs;
-                               map<string, float> removeDups;
-                               map<string, float>::iterator itDup;
-                               for (int j = 0; j < Results.size(); j++) {
-                                       float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
-                                       //only add if you are not a duplicate
-                                       itDup = removeDups.find(Results[j].parent);
-                                       if (itDup == removeDups.end()) { //this is not duplicate
-                                               removeDups[Results[j].parent] = dist;
-                                       }else if (dist > itDup->second) { //is this a stronger number for this parent
-                                               removeDups[Results[j].parent] = dist;
-                                       }
-                               }
-                               
-                               for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
-                                       Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
-                                       
-                                       SeqDist member;
-                                       member.seq = seq;
-                                       member.dist = itDup->second;
-                                       
-                                       seqs.push_back(member);
-                               }
-                               
-                               //limit number of parents to explore - default 5
-                               if (Results.size() > parents) {
-                                       //sort by distance
-                                       sort(seqs.begin(), seqs.end(), compareSeqDist);
-                                       //prioritize larger more similiar sequence fragments
-                                       reverse(seqs.begin(), seqs.end());
-                                       
-                                       for (int k = seqs.size()-1; k > (parents-1); k--)  {  
-                                               delete seqs[k].seq;
-                                               seqs.pop_back();        
-                                       }
-                               }
+                       seqs.push_back(member);
+               }
+               
+               //limit number of parents to explore - default 3
+               if (Results.size() > parents) {
+                       //sort by distance
+                       sort(seqs.begin(), seqs.end(), compareSeqDist);
+                       //prioritize larger more similiar sequence fragments
+                       reverse(seqs.begin(), seqs.end());
+                       
+                       for (int k = seqs.size()-1; k > (parents-1); k--)  {  
+                               delete seqs[k].seq;
+                               seqs.pop_back();        
+                       }
+               }
                
-                               //put seqs into vector to send to slayer
-                               vector<Sequence*> seqsForSlayer;
-                               for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
+               //put seqs into vector to send to slayer
+               vector<Sequence*> seqsForSlayer;
+               for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
+               //cout << i+1 << "num parents = " << seqsForSlayer.size() << '\t' << chimeraFlag << endl;
 //ofstream out;
 //string name = querySeqs[i]->getName();
 //cout << name << endl;
@@ -172,50 +120,42 @@ int ChimeraSlayer::getChimeras() {
 //cout << name << endl;
 //name = name.substr(0, name.find_last_of("|"));
 //cout << name << endl;
-//string filename = name + ".seqsforslayer";
+//string filename = toString(i+1) + ".seqsforslayer";
 //openOutputFile(filename, out);       
-//for (int u = 0; u < seqsForSlayer.size(); u++) { seqsForSlayer[u]->printSequence(out);       }
+//cout << querySeqs[i]->getName() << endl;
+//for (int u = 0; u < seqsForSlayer.size(); u++) { cout << seqsForSlayer[u]->getName() << '\t'; seqsForSlayer[u]->printSequence(out);  }
+//cout << endl;
 //out.close();
-//filename = name + ".fasta";
+//filename = toString(i+1) + ".fasta";
 //openOutputFile(filename, out);       
-//q->printSequence(out);
+//querySeqs[i]->printSequence(out);
 //out.close();
 
-                       
-                               //mask then send to slayer...
-                               if (seqMask != "") {
-                                       decalc->setMask(seqMask);
-
-                                       //mask querys
-                                       decalc->runMask(querySeqs[i]);
-                                       
-                                       //mask parents
-                                       for (int k = 0; k < seqsForSlayer.size(); k++) {
-                                               decalc->runMask(seqsForSlayer[k]);
-                                       }
-                                       
-                                       for (int i = 0; i < numSeqs; i++) {
-                                               spotMap[i] = decalc->getMaskMap();
-                                       }
 
-                               }
-                               
-                               //send to slayer
-                               chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer);
-                               chimeraResults[i] = slayer->getOutput();
-                       
-                               //free memory
-                               for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
-                       //}
                        
-               }       
-               //free memory
-               for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];        }
-               
+               //mask then send to slayer...
                if (seqMask != "") {
-                       delete decalc; 
+                       decalc->setMask(seqMask);
+                       
+                       //mask querys
+                       decalc->runMask(query);
+                       
+                       //mask parents
+                       for (int k = 0; k < seqsForSlayer.size(); k++) {
+                               decalc->runMask(seqsForSlayer[k]);
+                       }
+                       
+                       spotMap = decalc->getMaskMap();
                }
                
+               //send to slayer
+               chimeraFlags = slayer->getResults(query, seqsForSlayer);
+               chimeraResults = slayer->getOutput();
+       
+               //free memory
+               for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
+               //}
+                       
                return 0;
        }
        catch(exception& e) {
@@ -224,37 +164,12 @@ int ChimeraSlayer::getChimeras() {
        }
 }
 //***************************************************************************************************************
-Sequence* ChimeraSlayer::getSequence(string name) {
-       try{
-               Sequence* temp;
-               
-               //look through templateSeqs til you find it
-               int spot = -1;
-               for (int i = 0; i < templateSeqs.size(); i++) {
-                       if (name == templateSeqs[i]->getName()) {  
-                               spot = i;
-                               break;
-                       }
-               }
-               
-               if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; }
-               
-               temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
-               
-               return temp;
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSlayer", "getSequence");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
+void ChimeraSlayer::printBlock(data_struct data, ostream& out){
        try {
                
                out << "parentA: " << data.parentA.getName() << "  parentB: " << data.parentB.getName()  << endl;
-               out << "Left Window: " << spotMap[i][data.winLStart] << " " << spotMap[i][data.winLEnd] << endl;
-               out << "Right Window: " << spotMap[i][data.winRStart] << " " << spotMap[i][data.winREnd] << endl;
+               out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
+               out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
                
                out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
                out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
@@ -263,6 +178,8 @@ void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
                out << "Similarity of query to parentA: " << data.qa << endl;
                out << "Similarity of query to parentB: " << data.qb << endl;
                
+               out << "Percent_ID QLA_QRB: " << data.qla_qrb << endl;
+               out << "Percent_ID QLB_QRA: " << data.qlb_qra << endl;
                //out << "Per_id(QL,A): " << data.qla << endl;
                //out << "Per_id(QL,B): " << data.qlb << endl;
                //out << "Per_id(QR,A): " << data.qra << endl;
index 5343ec2a58c8a043ae23ac7a58dbeae56744c1b6..4095b86a4b0855e31b05bbfd775504801cc4ed1f 100644 (file)
 class ChimeraSlayer : public Chimera {
        
        public:
-               ChimeraSlayer(string, string);  
+               ChimeraSlayer(string);  
                ~ChimeraSlayer();
                
-               int getChimeras();
+               int getChimeras(Sequence*);
                void print(ostream&);
-               
-               void setCons(string){};
-               void setQuantiles(string q) {};
-               
+               void printHeader(ostream&);
                
        private:
+               Sequence* querySeq;
                DeCalculator* decalc;
                Maligner* maligner;
                Slayer* slayer;
-               vector<linePair*> lines;
-               vector<Sequence*> querySeqs;
-               vector<Sequence*> templateSeqs;
-               vector< map<int, int> > spotMap;
+               map<int, int>  spotMap;
                
-               vector< vector<data_struct> > chimeraResults;
-               vector<string> chimeraFlags;
-                               
-               string fastafile, templateFile;
-               
-               Sequence* getSequence(string);  //find sequence from name
-               void printBlock(data_struct, ostream&, int i);
+               vector<data_struct>  chimeraResults;
+               string chimeraFlags, searchMethod;
+               string fastafile;
+       
+               void printBlock(data_struct, ostream&);
 };
 
 /************************************************************************/
index c787d07e066ae08db298efb849966dcf0782113d..964847478ae6e9e42f7ebb7c8ef35b5f452ff3f5 100644 (file)
@@ -137,6 +137,7 @@ CommandFactory::CommandFactory(){
        commands["pcoa"]                                = "pcoa";
        commands["otu.hierarchy"]               = "otu.hierarchy";
        commands["set.dir"]                             = "set.dir";
+       commands["merge.files"]                 = "merge.files";
 }
 /***********************************************************/
 
index f52cdd58be86d31be87951df68d13983dc9bc921..79050777acd7d695754d7a382741538d35409b94 100644 (file)
@@ -48,6 +48,7 @@ public:
        virtual void generateDB() = 0; 
        virtual void addSequence(Sequence) = 0;  //add sequence to search engine
        virtual vector<int> findClosestSequences(Sequence*, int) = 0;  // returns indexes of n closest sequences to query
+       virtual map<int, float> findClosest(Sequence*, int){ return results; }  // returns of n closest sequences to query and their search scores
        virtual float getSearchScore();
        virtual int getLongestBase(); 
        virtual void readKmerDB(ifstream&){};
@@ -59,6 +60,7 @@ public:
 protected:
        int numSeqs, longest;
        float searchScore;
+       map<int, float> results;
 };
 /**************************************************************************************************/
 #endif
index 177636ccd1214e5ea713fcaeeb388ceb0c715b33..911efe268b6cd71817c28c7a8c008f850ff63e3e 100644 (file)
@@ -105,7 +105,7 @@ vector<int>  DeCalculator::findWindows(Sequence* query, int front, int back, int
                if (size == 0) {  if (cutoff > 1200) {  size = 300; }
                                                        else{  size = (cutoff / 4); }  } 
                else if (size > (cutoff / 4)) { 
-                               mothurOut("You have selected to large a window size for sequence " + query->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
+                               mothurOut("You have selected too large a window size for sequence " + query->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
                                size = (cutoff / 4); 
                }
        
@@ -679,6 +679,7 @@ float DeCalculator::getCoef(vector<float> obs, vector<float> qav) {
        }
 }
 //***************************************************************************************************************
+//gets closest matches to each end, since chimeras will most likely have different parents on each end
 vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*> db, int numWanted) {
        try {
                
@@ -687,14 +688,29 @@ vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*
                
                Dist* distcalculator = new eachGapDist();
                
-               Sequence query = *(querySeq);
+               string queryAligned = querySeq->getAligned();
+               string leftQuery = queryAligned.substr(0, (queryAligned.length() / 3)); //first 1/3 of the sequence
+               string rightQuery = queryAligned.substr(((queryAligned.length() / 3)*2)); //last 1/3 of the sequence
                
+               Sequence queryLeft(querySeq->getName(), leftQuery);
+               Sequence queryRight(querySeq->getName(), rightQuery);
+                               
                for(int j = 0; j < db.size(); j++){
                        
-                       Sequence temp = *(db[j]);
+                       string dbAligned = db[j]->getAligned();
+                       string leftDB = dbAligned.substr(0, (dbAligned.length() / 3)); //first 1/3 of the sequence
+                       string rightDB = dbAligned.substr(((dbAligned.length() / 3)*2)); //last 1/3 of the sequence
+                       
+                       Sequence dbLeft(db[j]->getName(), leftDB);
+                       Sequence dbRight(db[j]->getName(), rightDB);
+                       
+                       distcalculator->calcDist(queryLeft, dbLeft);
+                       float distLeft = distcalculator->getDist();
+                       
+                       distcalculator->calcDist(queryRight, dbRight);
+                       float distRight = distcalculator->getDist();
                        
-                       distcalculator->calcDist(query, temp);
-                       float dist = distcalculator->getDist();
+                       float dist = min(distLeft, distRight); //keep smallest distance
                        
                        SeqDist subject;
                        subject.seq = db[j];
@@ -720,7 +736,7 @@ vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*
        }
 }
 /***************************************************************************************************************/
-void DeCalculator::trimSeqs(Sequence* query, vector<Sequence*> topMatches) {
+map<int, int> DeCalculator::trimSeqs(Sequence* query, vector<Sequence*> topMatches) {
        try {
                
                int frontPos = 0;  //should contain first position in all seqs that is not a gap character
@@ -809,6 +825,13 @@ void DeCalculator::trimSeqs(Sequence* query, vector<Sequence*> topMatches) {
                        topMatches[i]->setAligned(newAligned);
                }
                
+               map<int, int> trimmedPos;
+               
+               for (int i = 0; i < newAligned.length(); i++) {
+                       trimmedPos[i] = i+frontPos;
+               }
+               
+               return trimmedPos;
        }
        catch(exception& e) {
                errorOut(e, "DeCalculator", "trimSequences");
index 5f0a82025c557a75c3adc4715526fa17bd5fe5a5..011993652618dc38294ade4638ad0657238b9377 100644 (file)
--- a/decalc.h
+++ b/decalc.h
@@ -45,7 +45,7 @@ class DeCalculator {
                void setAlignmentLength(int l) {  alignLength = l;  }
                void runMask(Sequence*);
                void trimSeqs(Sequence*, Sequence*, map<int, int>&);
-               void trimSeqs(Sequence*, vector<Sequence*>);
+               map<int, int> trimSeqs(Sequence*, vector<Sequence*>);
                void removeObviousOutliers(vector< vector<quanMember> >&, int);
                vector<float> calcFreq(vector<Sequence*>, string);
                vector<int> findWindows(Sequence*, int, int, int&, int);
index 593c2c3128d08cfe8abddfa81a1e384a1d17aa62..18e1a595c06343a521d9bcd6458b95db4d863db7 100644 (file)
@@ -86,19 +86,27 @@ string Engine::getCommand()  {
                        #ifdef USE_READLINE
                                char* nextCommand = NULL;
                                nextCommand = readline("mothur > ");
-                               if(nextCommand != NULL) {  add_history(nextCommand);  }         
+                               
+                               if(nextCommand != NULL) {  add_history(nextCommand);  } 
+                               else{ //^D causes null string and we want it to quit mothur
+                                       nextCommand = "quit"; 
+                                       cout << nextCommand << endl;
+                               }       
+                               
                                mothurOutJustToLog("mothur > " + toString(nextCommand));
                                return nextCommand;
                        #else
                                string nextCommand = "";
                                mothurOut("mothur > ");
                                getline(cin, nextCommand);
+                               mothurOutJustToLog("mothur > " + toString(nextCommand));
                                return nextCommand;
                        #endif
                #else
                        string nextCommand = "";
                        mothurOut("mothur > ");
                        getline(cin, nextCommand);
+                       mothurOutJustToLog("mothur > " + toString(nextCommand));
                        return nextCommand;
                #endif
                
index c2871694e2feaa9219ed5d31575e57a84a3022dc..8f8d1ad5da1addc5dac09f68c6dec5a5b99c1568 100644 (file)
@@ -123,6 +123,7 @@ void GlobalData::newRead() {
                        labels.clear(); Groups.clear();
                        allLines = 1;
                        runParse = true;
+                       names.clear();
        }
        catch(exception& e) {
                errorOut(e, "GlobalData", "newRead");
index 26ac563032bd93c8f46810d84b1c1a1ceb16c231..de6004d7e387d0e241dfbc205695d359dbcf6dda 100644 (file)
@@ -8,12 +8,14 @@
  */
 
 #include "maligner.h"
+#include "database.hpp"
+#include "blastdb.hpp"
 
 /***********************************************************************/
-Maligner::Maligner(vector<Sequence*> temp, int num, int match, int misMatch, float div, int minCov) :
-               db(temp), numWanted(num), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minCoverage(minCov) {}
+Maligner::Maligner(vector<Sequence*> temp, int num, int match, int misMatch, float div, int ms, int minCov, string mode) :
+               db(temp), numWanted(num), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov), searchMethod(mode) {}
 /***********************************************************************/
-string Maligner::getResults(Sequence* q) {
+string Maligner::getResults(Sequence* q, DeCalculator* decalc) {
        try {
                
                outputResults.clear();
@@ -22,14 +24,36 @@ string Maligner::getResults(Sequence* q) {
                query = new Sequence(q->getName(), q->getAligned());
                
                string chimera;
-       
-               decalc = new DeCalculator();
                
-               //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
-               refSeqs = decalc->findClosest(query, db, numWanted);
-       
-               refSeqs = minCoverageFilter(refSeqs);
+               if (searchMethod != "blast") {
+                       //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
+                       refSeqs = decalc->findClosest(query, db, numWanted);
+               }else{
+                       refSeqs = getBlastSeqs(query, numWanted);
+               }
                
+//ofstream out;
+//string name = toString(numi+1); 
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(0, name.find_last_of("|"));
+//cout << name << endl;
+//string filename = name + ".seqsformaligner";
+//openOutputFile(filename, out);       
+//for (int u = 0; u < refSeqs.size(); u++) {  refSeqs[u]->printSequence(out);  }
+//out.close();
+//filename = name + ".fasta";
+//openOutputFile(filename, out);       
+//query->printSequence(out);
+//out.close();
+
+//for (int i = 0; i < refSeqs.size(); i++)  { cout << refSeqs[i]->getName() << endl;  }        
+//cout << "before = " << refSeqs.size() << endl;
+               refSeqs = minCoverageFilter(refSeqs);
+//cout << "after = " << refSeqs.size() << endl;                
                if (refSeqs.size() < 2)  { 
                        for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
                        percentIdenticalQueryChimera = 0.0;
@@ -37,9 +61,30 @@ string Maligner::getResults(Sequence* q) {
                }
                
                int chimeraPenalty = computeChimeraPenalty();
-       
+//cout << chimeraPenalty << endl;              
+               //fills outputResults
+               chimera = chimeraMaligner(chimeraPenalty, decalc);
+               
+                               
+               //free memory
+               delete query;
+               for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
+               
+               return chimera;
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "getResults");
+               exit(1);
+       }
+}
+/***********************************************************************/
+string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) {
+       try {
+               
+               string chimera;
+               
                //trims seqs to first non gap char in all seqs and last non gap char in all seqs
-               decalc->trimSeqs(query, refSeqs);
+               spotMap = decalc->trimSeqs(query, refSeqs);
                
                vector<Sequence*> temp = refSeqs;
                temp.push_back(query);
@@ -67,8 +112,6 @@ string Maligner::getResults(Sequence* q) {
                
                percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq);
                
-               delete decalc;
-               
                //save output results
                for (int i = 0; i < trace.size(); i++) {
                        int regionStart = trace[i].col;
@@ -78,6 +121,8 @@ string Maligner::getResults(Sequence* q) {
                        results temp;
                        
                        temp.parent = refSeqs[seqIndex]->getName();
+                       temp.nastRegionStart = spotMap[regionStart];
+                       temp.nastRegionEnd = spotMap[regionEnd];
                        temp.regionStart = regionStart;
                        temp.regionEnd = regionEnd;
                        
@@ -97,15 +142,11 @@ string Maligner::getResults(Sequence* q) {
                
                        outputResults.push_back(temp);
                }
-               
-               //free memory
-               delete query;
-               for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
-               
+
                return chimera;
        }
        catch(exception& e) {
-               errorOut(e, "Maligner", "getResults");
+               errorOut(e, "Maligner", "chimeraMaligner");
                exit(1);
        }
 }
@@ -193,21 +234,27 @@ void Maligner::verticalFilter(vector<Sequence*> seqs) {
                        if(gaps[i] == seqs.size())      {       filterString[i] = '0';  numColRemoved++;  }
                }
                
+               map<int, int> newMap;
                //for each sequence
                for (int i = 0; i < seqs.size(); i++) {
                
                        string seqAligned = seqs[i]->getAligned();
                        string newAligned = "";
+                       int count = 0;
                        
                        for (int j = 0; j < seqAligned.length(); j++) {
                                //if this spot is not a gap
-                               if (filterString[j] == '1') { newAligned += seqAligned[j]; }
+                               if (filterString[j] == '1') { 
+                                       newAligned += seqAligned[j]; 
+                                       newMap[count] = spotMap[j];
+                                       count++;
+                               }
                        }
                        
                        seqs[i]->setAligned(newAligned);
                }
 
-       
+               spotMap = newMap;
        }
        catch(exception& e) {
                errorOut(e, "Maligner", "verticalFilter");
@@ -452,4 +499,78 @@ float Maligner::computePercentID(string queryAlign, string chimera) {
        }
 }
 //***************************************************************************************************************
+vector<Sequence*> Maligner::getBlastSeqs(Sequence* q, int num) {
+       try {
+cout << q->getName() << endl;  
+               //generate blastdb
+               Database* database = new BlastDB(-2.0, -1.0, matchScore, misMatchPenalty);
+               for (int i = 0; i < db.size(); i++) {   database->addSequence(*db[i]);  }
+               database->generateDB();
+               database->setNumSeqs(db.size());
+               
+               //get parts of query
+               string queryAligned = q->getAligned();
+               string leftQuery = queryAligned.substr(0, (queryAligned.length() / 3)); //first 1/3 of the sequence
+               string rightQuery = queryAligned.substr(((queryAligned.length() / 3)*2)); //last 1/3 of the sequence
+               
+               Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
+               Sequence* queryRight = new Sequence(q->getName(), rightQuery);
+               
+               map<int, float> tempIndexesRight = database->findClosest(queryRight, num);
+               map<int, float> tempIndexesLeft = database->findClosest(queryLeft, num);
+               
+               //merge results
+               vector<rank> mergedResults;
+               
+               map<int, float>::iterator it;
+               map<int, float>::iterator it2;
+               
+               //add in right guys merging common finds
+               for (it = tempIndexesRight.begin(); it != tempIndexesRight.end(); it++) {
+                       it2 = tempIndexesLeft.find(it->first);
+                       
+                       if (it2 == tempIndexesLeft.end()) {  //result only present in right
+                               rank temp(it->first, it->second);
+                               mergedResults.push_back(temp); 
+
+                       }else { //result present in both save best score
+                               float bestscore;
+                               if (it->second > it2->second)   {  bestscore = it->second;  }
+                               else                                                    {  bestscore = it2->second; }
+                               
+                               rank temp(it->first, bestscore);
+                               mergedResults.push_back(temp);
+                               
+                               tempIndexesLeft.erase(it2);
+                       }
+               }
+               
+               //add in unique left guys
+               for (it = tempIndexesLeft.begin(); it != tempIndexesLeft.end(); it++) {
+                       rank temp(it->first, it->second);
+                       mergedResults.push_back(temp); 
+               }
+               
+               sort(mergedResults.begin(), mergedResults.end(), compareMembers);
+               
+               vector<Sequence*> refResults;
+               for (int i = 0; i < numWanted; i++) {
+                       Sequence* temp = new Sequence(db[mergedResults[i].num]->getName(), db[mergedResults[i].num]->getAligned());
+                       refResults.push_back(temp);
+cout << db[mergedResults[i].num]->getName() << endl;
+               }
+               
+               delete queryRight;
+               delete queryLeft;
+               delete database;
+               
+               return refResults;
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "getBlastSeqs");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
 
index 6277e7300a02fbe815468e223298f5f969813a51..abe18a3cb1ad6c6b65d41077b8dd1fb56d802f96 100644 (file)
  */
  
 #include "decalc.h"
+#include "chimera.h"
 
 /***********************************************************************/
 //This class was modeled after the chimeraMaligner written by the Broad Institute
-/***********************************************************************/
-struct score_struct {
-       int prev;
-       int score;
-       int row;
-       int col;
-};
-/***********************************************************************/
-struct trace_struct {
-       int col;
-       int oldCol;
-       int row;
-};
-/***********************************************************************/
-struct results {
-       int regionStart;
-       int regionEnd;
-       string parent;
-       float queryToParent;
-       float queryToParentLocal;
-       float divR;
-};
-
 /**********************************************************************/
 class Maligner {
 
        public:
                
-               Maligner(vector<Sequence*>, int, int, int, float, int);
+               Maligner(vector<Sequence*>, int, int, int, float, int, int, string);
                ~Maligner() {};
                
-               string getResults(Sequence*);
+               string getResults(Sequence*, DeCalculator*);
                float getPercentID() {  return percentIdenticalQueryChimera;    }
                vector<results> getOutput()  {  return outputResults;                   }
                
                                
        private:
-               DeCalculator* decalc;
                Sequence* query;
                vector<Sequence*> refSeqs;
                vector<Sequence*> db;
-               int numWanted, matchScore, misMatchPenalty, minCoverage;
+               int numWanted, matchScore, misMatchPenalty, minCoverage, minSimilarity;
+               string searchMethod;
                float minDivR, percentIdenticalQueryChimera;
                vector<results> outputResults;
+               map<int, int> spotMap;
                
                vector<Sequence*> minCoverageFilter(vector<Sequence*>);  //removes top matches that do not have minimum coverage with query.
                int computeChimeraPenalty();
@@ -68,6 +47,8 @@ class Maligner {
                vector<trace_struct> mapTraceRegionsToAlignment(vector<score_struct>, vector<Sequence*>);
                string constructChimericSeq(vector<trace_struct>, vector<Sequence*>);
                float computePercentID(string, string);
+               string chimeraMaligner(int, DeCalculator*);
+               vector<Sequence*> getBlastSeqs(Sequence*, int);
                
 };
 
index d5973394e5a097e879bf1526689d137266a417c1..0a8c0d8cda62808664a83f46c44cebb770e14f4a 100644 (file)
@@ -96,7 +96,11 @@ int MergeFileCommand::execute(){
                        
                        openInputFile(fileNames[i], inputFile);
                        
-                       while(!inputFile.eof()){        c = inputFile.get(); outputFile << c;   }
+                       while(!inputFile.eof()){        
+                               c = inputFile.get(); 
+                               //-1 is eof char
+                               if (int(c) != -1) { outputFile << c; }   
+                       }
                        
                        inputFile.close();
                }
index b71057a24550aaad2b07bc1d7794a6710191409e..8203c5716f02122ed77feef18f31428c23148552 100644 (file)
@@ -41,7 +41,11 @@ int main(int argc, char *argv[]){
                        mothurOutJustToLog("Windows version");
                        mothurOutEndLine(); mothurOutEndLine();
                #endif          
-
+               
+               #ifdef USE_READLINE
+                       mothurOutJustToLog("Using ReadLine");
+                       mothurOutEndLine(); mothurOutEndLine();
+               #endif
                
                //header
                mothurOut("mothur v.1.8");
index 8190152b8be3c15ee0a3559b93d846174d6c992d..5f8a2fe75b41d57e345d50cb63606921c866ff67 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -831,6 +831,21 @@ inline bool inUsersGroups(string groupname, vector<string> Groups) {
                exit(1);
        }
 }
+/**************************************************************************************************/
+//returns true if any of the strings in first vector are in second vector
+inline bool inUsersGroups(vector<string> groupnames, vector<string> Groups) {
+       try {
+               
+               for (int i = 0; i < groupnames.size(); i++) {
+                       if (inUsersGroups(groupnames[i], Groups)) { return true; }
+               }
+               return false;
+       }
+       catch(exception& e) {
+               errorOut(e, "mothur", "inUsersGroups");
+               exit(1);
+       }
+}
 
 /**************************************************************************************************/
 
index 6a4ae3db2f679ee4ccadf2b0ff3270f5da4f15aa..c91c9fab537172b3e02808b700af881affffc368 100644 (file)
--- a/nast.cpp
+++ b/nast.cpp
@@ -26,7 +26,6 @@ Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method
                maxInsertLength = 0;
                pairwiseAlignSeqs();    //      This is part A in Fig. 2 of DeSantis et al.
                regapSequences();               //      This is parts B-F in Fig. 2 of DeSantis et al.
-
        }
        catch(exception& e) {
                errorOut(e, "Nast", "Nast");
@@ -41,7 +40,7 @@ void Nast::pairwiseAlignSeqs(){       //      Here we call one of the pairwise alignment me
        try {
 
                alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
-       
+
                string candAln = alignment->getSeqAAln();
                string tempAln = alignment->getSeqBAln();
        
index bc87211e0bde9a4581119a91592683e34262d006..c39a638cf11375644a82e4d56c3c5659c56b35ee 100644 (file)
@@ -18,7 +18,7 @@ OtuHierarchyCommand::OtuHierarchyCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"list","label","outputdir","inputdir"};
+                       string Array[] =  {"list","label","output","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -63,7 +63,11 @@ OtuHierarchyCommand::OtuHierarchyCommand(string option){
                        else { 
                                splitAtDash(label, labels);
                                if (labels.size() != 2) { mothurOut("You must provide 2 labels."); mothurOutEndLine(); abort = true; }
-                       }                               
+                       }       
+                       
+                       output = validParameter.validFile(parameters, "output", false);                 if (output == "not found") { output = "name"; }
+                       
+                       if ((output != "name") && (output != "number")) { mothurOut("output options are name and number. I will use name."); mothurOutEndLine(); output = "name"; }
                }
                
        }
@@ -77,7 +81,8 @@ OtuHierarchyCommand::OtuHierarchyCommand(string option){
 void OtuHierarchyCommand::help(){
        try {
                mothurOut("The otu.hierarchy command is used to see how otus relate at two distances. \n");
-               mothurOut("The otu.hierarchy command parameters are list and label.  Both parameters are required. \n");
+               mothurOut("The otu.hierarchy command parameters are list, label and output.  list and label parameters are required. \n");
+               mothurOut("The output parameter allows you to output the names of the sequence in the OTUs or the OTU numbers. Options are name and number, default is name. \n");
                mothurOut("The otu.hierarchy command should be in the following format: \n");
                mothurOut("otu.hierarchy(list=yourListFile, label=yourLabels).\n");
                mothurOut("Example otu.hierarchy(list=amazon.fn.list, label=0.01-0.03).\n");
@@ -140,7 +145,8 @@ int OtuHierarchyCommand::execute(){
                        string names = lists[1].get(i);
                        
                        //output column 1
-                       out << names << '\t';
+                       if (output == "name")   {   out << names << '\t';       }
+                       else                                    {       out << i << '\t';               }
                        
                        map<int, int> bins; //bin numbers in little that are in this bin in big
                        map<int, int>::iterator it;
@@ -157,7 +163,8 @@ int OtuHierarchyCommand::execute(){
                        
                        string col2 = "";
                        for (it = bins.begin(); it != bins.end(); it++) {
-                               col2 += lists[0].get(it->first) + "\t";
+                               if (output == "name")   {   col2 += lists[0].get(it->first) + "\t";     }
+                               else                                    {       col2 += toString(it->first) + "\t";             }
                        }
                        
                        //output column 2
index c6749ef3841800971f82f5a01c54d76de713b096..1cd427f1a7af8442d4747e65662bbf431ccdcbbd 100644 (file)
@@ -25,7 +25,7 @@ public:
 private:
        bool abort;
        set<string> labels; //holds labels to be used
-       string label, listFile, outputDir;
+       string label, listFile, outputDir, output;
        
        vector<ListVector> getListVectors();
                
index 6adfdf91ebeaf30847bee05d90f2c9b22d3c5946..74e1ce47a80e60ec4c27e685f53e6dbfedfb6da9 100644 (file)
@@ -18,106 +18,39 @@ inline bool compareQuanMembers(quanMember left, quanMember right){
 } 
 //***************************************************************************************************************
 
-Pintail::Pintail(string filename, string temp, string o) {  fastafile = filename;  templateFile = temp; outputDir = o; }
+Pintail::Pintail(string filename, string o) {  
+       fastafile = filename;  outputDir = o; 
+       distcalculator = new eachGapDist();
+       decalc = new DeCalculator();
+}
 //***************************************************************************************************************
 
 Pintail::~Pintail() {
        try {
-               for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
-               for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
-               for (int i = 0; i < bestfit.size(); i++)                {  delete bestfit[i];           }  
-       }
-       catch(exception& e) {
-               errorOut(e, "Pintail", "~Pintail");
-               exit(1);
-       }
-}      
-//***************************************************************************************************************
-void Pintail::print(ostream& out) {
-       try {
-               
-               mothurOutEndLine();
                
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       
-                       int index = ceil(deviation[i]);
-                                               
-                       //is your DE value higher than the 95%
-                       string chimera;
-                       if (quantiles[index][4] == 0.0) {
-                               chimera = "Your template does not include sequences that provide quantile values at distance " + toString(index);
-                       }else {
-                               if (DE[i] > quantiles[index][4])                {       chimera = "Yes";        }
-                               else                                                                    {       chimera = "No";         }
-                       }
-                       
-                       out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
-                       if (chimera == "Yes") {
-                               mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
-                       }
-                       out << "Observed\t";
-                       
-                       for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
-                       out << endl;
-                       
-                       out << "Expected\t";
-                       
-                       for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
-                       out << endl;
-                       
-               }
+               delete distcalculator;
+               delete decalc; 
        }
        catch(exception& e) {
-               errorOut(e, "Pintail", "print");
+               errorOut(e, "Pintail", "~Pintail");
                exit(1);
        }
 }
-
 //***************************************************************************************************************
-int Pintail::getChimeras() {
+void Pintail::doPrep() {
        try {
-               
-               //read in query sequences and subject sequences
-               mothurOut("Reading sequences and template file... "); cout.flush();
-               querySeqs = readSeqs(fastafile);
-               templateSeqs = readSeqs(templateFile);
-               mothurOut("Done."); mothurOutEndLine();
-               
-               int numSeqs = querySeqs.size();
-               
-               if (unaligned) { mothurOut("Your sequences need to be aligned when you use the pintail method."); mothurOutEndLine(); return 1;  }
-               
-               obsDistance.resize(numSeqs);
-               expectedDistance.resize(numSeqs);
-               seqCoef.resize(numSeqs);
-               DE.resize(numSeqs);
-               Qav.resize(numSeqs);
-               bestfit.resize(numSeqs);
-               deviation.resize(numSeqs);
-               trimmed.resize(numSeqs);
-               windowSizes.resize(numSeqs, window);
+       
                windowSizesTemplate.resize(templateSeqs.size(), window);
-               windowsForeachQuery.resize(numSeqs);
-               h.resize(numSeqs);
                quantiles.resize(100);  //one for every percent mismatch
                quantilesMembers.resize(100);  //one for every percent mismatch
                
-               //break up file if needed
-               int linesPerProcess = numSeqs / processors ;
+               //if the user does not enter a mask then you want to keep all the spots in the alignment
+               if (seqMask.length() == 0)      {       decalc->setAlignmentLength(templateSeqs[0]->getAligned().length());     }
+               else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
+               
+               decalc->setMask(seqMask);
                
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       //find breakup of sequences for all times we will Parallelize
-                       if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
-                       else {
-                               //fill line pairs
-                               for (int i = 0; i < (processors-1); i++) {                      
-                                       lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
-                               }
-                               //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
-                               int i = processors - 1;
-                               lines.push_back(new linePair((i*linesPerProcess), numSeqs));
-                       }
-                       
                        //find breakup of templatefile for quantiles
                        if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
                        else { 
@@ -128,149 +61,38 @@ int Pintail::getChimeras() {
                                }
                        }
                #else
-                       lines.push_back(new linePair(0, numSeqs));
                        templateLines.push_back(new linePair(0, templateSeqs.size()));
                #endif
-               
-               distcalculator = new eachGapDist();
-               decalc = new DeCalculator();
-               
-               //if the user does enter a mask then you want to keep all the spots in the alignment
-               if (seqMask.length() == 0)      {       decalc->setAlignmentLength(querySeqs[0]->getAligned().length());        }
-               else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
-               
-               decalc->setMask(seqMask);
-               
-               //find pairs
-               if (processors == 1) { 
-                       mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
-                       bestfit = findPairs(lines[0]->start, lines[0]->end);
-                       mothurOut("Done."); mothurOutEndLine();
-               }else {         createProcessesPairs();         }
-               
-//string o = "closestmatch.eachgap.fasta";
-//ofstream out7;
-//openOutputFile(o, out7);
 
-//for (int i = 0; i < bestfit.size(); i++) {
-       //out7 << ">" << querySeqs[i]->getName() << "-"<< bestfit[i]->getName() << endl;
-       //out7 << bestfit[i]->getAligned() << endl;
-//}            
-//out7.close();        
-               //find P
+               
                mothurOut("Getting conservation... "); cout.flush();
                if (consfile == "") { 
                        mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command.  Providing the .freq file will improve speed.    "); cout.flush();
-                       probabilityProfile = decalc->calcFreq(templateSeqs, outputDir + getSimpleName(templateFile)); 
+                       probabilityProfile = decalc->calcFreq(templateSeqs, outputDir + getSimpleName(templateFileName)); 
                        mothurOut("Done."); mothurOutEndLine();
-               }else                           {   probabilityProfile = readFreq();                      }
-
-               //make P into Q
-               for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
-               mothurOut("Done."); mothurOutEndLine();
-               
-               //mask sequences if the user wants to 
-               if (seqMask != "") {
-                       //mask querys
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               decalc->runMask(querySeqs[i]);
-                       }
-               
-                       //mask templates
-                       for (int i = 0; i < templateSeqs.size(); i++) {
-                               decalc->runMask(templateSeqs[i]);
-                       }
-                       
-                       for (int i = 0; i < bestfit.size(); i++) { 
-                               decalc->runMask(bestfit[i]);
-                       }
-
-               }
-               
-               if (filter) {
-                       vector<Sequence*> temp = templateSeqs;
-                       for (int i = 0; i < querySeqs.size(); i++) { temp.push_back(querySeqs[i]);  }
-                       
-                       createFilter(temp);
-                       
-                       runFilter(querySeqs);
-                       runFilter(templateSeqs);
-                       runFilter(bestfit);
-               }
-                                                                               
-                                                                                                                                               
-               if (processors == 1) { 
-       
-                       for (int j = 0; j < bestfit.size(); j++) { 
-                               decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
-                       }
-                       
-                       mothurOut("Finding window breaks... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               it = trimmed[i].begin();
-                               vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
-                               windowsForeachQuery[i] = win;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-               
-               }else {         createProcessesSpots();         }
-               
-               if (processors == 1) { 
-                                               
-                       mothurOut("Calculating observed distance... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
+               }else                           {   probabilityProfile = readFreq();    mothurOut("Done.");               }
+               mothurOutEndLine();
        
-                               obsDistance[i] = obsi;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       mothurOut("Finding variability... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
-
-                               Qav[i] = q;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       mothurOut("Calculating alpha... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
-                               seqCoef[i] = alpha;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-               
-               
-                       mothurOut("Calculating expected distance... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
-                               expectedDistance[i] = exp;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       mothurOut("Finding deviation... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
-                               DE[i] = de;
-                       
-                               it = trimmed[i].begin();
-                               float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
-                               deviation[i] = dist;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-               } 
-               else {          createProcesses();              }
-               
                //quantiles are used to determine whether the de values found indicate a chimera
                //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
                //combination of sequences in the template
                if (quanfile != "") {  quantiles = readQuantiles();  }
                else {
                        
+                       //if user has not provided the quantiles and wants the mask we need to mask, but then delete and reread or we will mess up the best match process in getChimeras
+                       if (seqMask != "") {
+                               //mask templates
+                               for (int i = 0; i < templateSeqs.size(); i++) {
+                                       decalc->runMask(templateSeqs[i]);
+                               }
+                       }
+                       
+                       if (filter) {
+                               createFilter(templateSeqs);
+                               for (int i = 0; i < templateSeqs.size(); i++) {  runFilter(templateSeqs[i]);  }
+                       }
+
+                       
                        mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
                        if (processors == 1) { 
                                quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
@@ -281,58 +103,16 @@ int Pintail::getChimeras() {
                        string noOutliers, outliers;
                        
                        if ((!filter) && (seqMask == "")) {
-                               noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.quan";
+                               noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.quan";
                        }else if ((filter) && (seqMask == "")) { 
-                               noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.filtered.quan";
+                               noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.filtered.quan";
                        }else if ((!filter) && (seqMask != "")) { 
-                               noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.masked.quan";
+                               noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.masked.quan";
                        }else if ((filter) && (seqMask != "")) { 
-                               noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.filtered.masked.quan";
+                               noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.filtered.masked.quan";
                        }
 
-                       //outliers = getRootName(templateFile) + "pintail.quanYESOUTLIERS";
-                       
-                       /*openOutputFile(outliers, out4);
-                       
-                       //adjust quantiles
-                       for (int i = 0; i < quantilesMembers.size(); i++) {
-                               vector<float> temp;
-                               
-                               if (quantilesMembers[i].size() == 0) {
-                                       //in case this is not a distance found in your template files
-                                       for (int g = 0; g < 6; g++) {
-                                               temp.push_back(0.0);
-                                       }
-                               }else{
-                                       
-                                       sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
-                                       
-                                       //save 10%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
-                                       //save 25%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
-                                       //save 50%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
-                                       //save 75%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
-                                       //save 95%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
-                                       //save 99%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
-                                       
-                               }
-                               
-                               //output quan value
-                               out4 << i+1 << '\t';                            
-                               for (int u = 0; u < temp.size(); u++) {   out4 << temp[u] << '\t'; }
-                               out4 << endl;
-                               
-                               quantiles[i] = temp;
-                               
-                       }
-                       
-                       out4.close();*/
-                       
+                                               
                        decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
                        
                        openOutputFile(noOutliers, out5);
@@ -376,15 +156,112 @@ int Pintail::getChimeras() {
 
                        mothurOut("Done."); mothurOutEndLine();
                        
-               }
+                       //if mask reread template
+                       if ((seqMask != "") || (filter)) {
+                               for (int i = 0; i < templateSeqs.size(); i++)           {  delete templateSeqs[i];              }
+                               templateSeqs = readSeqs(templateFileName);
+                       }
                        
+               }
+               
                //free memory
-               for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
-               for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
-                       
-               delete distcalculator;
-               delete decalc;
+               for (int i = 0; i < templateLines.size(); i++) { delete templateLines[i];  }
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "doPrep");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+void Pintail::print(ostream& out) {
+       try {
+               int index = ceil(deviation);
+               
+               //is your DE value higher than the 95%
+               string chimera;
+               if (index != 0) {  //if index is 0 then its an exact match to a template seq
+                       if (quantiles[index][4] == 0.0) {
+                               chimera = "Your template does not include sequences that provide quantile values at distance " + toString(index);
+                       }else {
+                               if (DE > quantiles[index][4])           {       chimera = "Yes";        }
+                               else                                                            {       chimera = "No";         }
+                       }
+               }else{ chimera = "No";          }
+               
+               out << querySeq->getName() << '\t' << "div: " << deviation << "\tstDev: " << DE << "\tchimera flag: " << chimera << endl;
+               if (chimera == "Yes") {
+                       mothurOut(querySeq->getName() + "\tdiv: " + toString(deviation) + "\tstDev: " + toString(DE) + "\tchimera flag: " + chimera); mothurOutEndLine();
+               }
+               out << "Observed\t";
+               
+               for (int j = 0; j < obsDistance.size(); j++) {  out << obsDistance[j] << '\t';  }
+               out << endl;
+               
+               out << "Expected\t";
+               
+               for (int m = 0; m < expectedDistance.size(); m++) {  out << expectedDistance[m] << '\t';  }
+               out << endl;
+               
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "print");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+int Pintail::getChimeras(Sequence* query) {
+       try {
+               querySeq = query;
+               trimmed.clear();
+               windowSizes = window;
+                               
+               //find pairs
+               bestfit = findPairs(query);
+                                                       
+               //make P into Q
+               for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
+       
+               //mask sequences if the user wants to 
+               if (seqMask != "") {
+                       decalc->runMask(query);
+                       decalc->runMask(bestfit);
+               }
+               
+               if (filter) {
+                       runFilter(query);
+                       runFilter(bestfit);
+               }
+               
+               //trim seq
+               decalc->trimSeqs(query, bestfit, trimmed);  
+               
+               //find windows
+               it = trimmed.begin();
+               windowsForeachQuery = decalc->findWindows(query, it->first, it->second, windowSizes, increment);
+
+               //find observed distance
+               obsDistance = decalc->calcObserved(query, bestfit, windowsForeachQuery, windowSizes);
+                               
+               Qav = decalc->findQav(windowsForeachQuery, windowSizes, probabilityProfile);
+
+               //find alpha                    
+               seqCoef = decalc->getCoef(obsDistance, Qav);
+               
+               //calculating expected distance
+               expectedDistance = decalc->calcExpected(Qav, seqCoef);
+               
+               //finding de
+               DE = decalc->calcDE(obsDistance, expectedDistance);
+               
+               //find distance between query and closest match
+               it = trimmed.begin();
+               deviation = decalc->calcDist(query, bestfit, it->first, it->second); 
                
+               delete bestfit;
+                                                                       
                return 0;
        }
        catch(exception& e) {
@@ -437,16 +314,13 @@ vector<float> Pintail::readFreq() {
 
 //***************************************************************************************************************
 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
-vector<Sequence*> Pintail::findPairs(int start, int end) {
+Sequence* Pintail::findPairs(Sequence* q) {
        try {
                
-               vector<Sequence*> seqsMatches;  
+               Sequence* seqsMatches;  
                
-               for(int i = start; i < end; i++){
-                       
-                       vector<Sequence*> copy = decalc->findClosest(querySeqs[i], templateSeqs, 1);
-                       seqsMatches.push_back(copy[0]);
-               }
+               vector<Sequence*> copy = decalc->findClosest(q, templateSeqs, 1);
+               seqsMatches = copy[0];
                
                return seqsMatches;
        
@@ -456,458 +330,7 @@ vector<Sequence*> Pintail::findPairs(int start, int end) {
                exit(1);
        }
 }
-
-/**************************************************************************************************/
-
-void Pintail::createProcessesSpots() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                               
-                               for (int j = lines[process]->start; j < lines[process]->end; j++) {
-                               
-                                       //chops off beginning and end of sequences so they both start and end with a base
-                                       map<int, int> trim;
-
-                                       decalc->trimSeqs(querySeqs[j], bestfit[j], trim); 
-                                       trimmed[j] = trim;
-                                       
-                               }
-
-                               mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       it = trimmed[i].begin();
-                                       windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
-                               }
-                               mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output windowsForeachQuery
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << windowsForeachQuery[i].size() << '\t';
-                                       for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
-                                               out << windowsForeachQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                               }
-                       
-                               //output windowSizes
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << windowSizes[i] << '\t';
-                               }
-                               out << endl;
-                               
-                               //output trimmed values
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       it = trimmed[i].begin();                                        
-                                       out << it->first << '\t' << it->second << endl;
-                               }
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-               
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-                       
-                       int size = lines[i]->end - lines[i]->start;
-                                       
-                       int count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               int num;
-                               in >> num;
-                       
-                               vector<int> win;  int w;
-                               for (int j = 0; j < num; j++) {
-                                       in >> w;
-                                       win.push_back(w);
-                               }
-                       
-                               windowsForeachQuery[count] = win;
-                               count++;
-                               gobble(in);
-                       }
-               
-                       gobble(in);
-                       count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               int num;
-                               in >> num;
-                               
-                               windowSizes[count] = num;
-                               count++;
-                       }
-                       
-                       gobble(in);
-                       
-                       count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               int front, back;
-                               in >> front >> back;
-                       
-                               map<int, int> t;
-                               
-                               t[front] = back;
-                               
-                               trimmed[count] = t;
-                               count++;
-                               
-                               gobble(in);
-                       }
-
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-                       
-       
-#else
-               for (int j = 0; j < bestfit.size(); j++) {
-                       //chops off beginning and end of sequences so they both start and end with a base
-                       decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
-               }
-
-               for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               it = trimmed[i].begin();
-                               vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
-                               windowsForeachQuery[i] = win;
-               }
-
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "Pintail", "createProcessesSpots");
-               exit(1);
-       }
-}
 /**************************************************************************************************/
-
-void Pintail::createProcessesPairs() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                               
-                               mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               bestfit = findPairs(lines[process]->start, lines[process]->end);
-                               mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output range and size
-                               out << bestfit.size() << endl;
-                               
-                               //output pairs
-                               for (int i = 0; i < bestfit.size(); i++) {
-                                       out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl;
-                               }
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-               
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-                       
-                       int size;
-                       in >> size;  gobble(in);
-                               
-                       //get pairs
-                       int count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               Sequence* temp = new Sequence(in);
-                               bestfit[count] = temp;
-                       
-                               count++;
-                               gobble(in);
-                       }
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-                       
-       
-#else
-               bestfit = findPairs(lines[0]->start, lines[0]->end);
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "Pintail", "createProcessesPairs");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-
-void Pintail::createProcesses() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                               
-                               mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       
-                                       vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
-                                       obsDistance[i] = obsi;
-                               
-                                       //calc Qav
-                                       vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
-                                       
-                                       //get alpha
-                                       float alpha = decalc->getCoef(obsDistance[i], q);
-                                       
-                                       //find expected
-                                       vector<float> exp = decalc->calcExpected(q, alpha);
-                                       expectedDistance[i] = exp;
-                                       
-                                       //get de and deviation
-                                       float dei = decalc->calcDE(obsi, exp);
-                                       DE[i] = dei;
-                                       
-                                       it = trimmed[i].begin();
-                                       float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
-                                       deviation[i] = dist;
-                               }
-                               mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               int size = lines[process]->end - lines[process]->start;
-                               out << size << endl;
-                                                               
-                               //output observed distances
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << obsDistance[i].size() << '\t';
-                                       for (int j = 0; j < obsDistance[i].size(); j++) {
-                                               out << obsDistance[i][j] << '\t';
-                                       }
-                                       out << endl;
-                               }
-                               
-                               
-                               //output expected distances
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << expectedDistance[i].size() << '\t';
-                                       for (int j = 0; j < expectedDistance[i].size(); j++) {
-                                               out << expectedDistance[i][j] << '\t';
-                                       }
-                                       out << endl;
-                               }
-
-                       
-                               //output de values
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << DE[i] << '\t';
-                               }
-                               out << endl;    
-                               
-                               //output de values
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << deviation[i] << '\t';
-                               }
-                               out << endl;
-                               
-                               out.close();
-
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-               
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-                       
-                       int size;
-                       in >> size;  gobble(in);
-                       
-                       //get observed distances
-                       int count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               int num;
-                               in >> num;
-                       
-                               vector<float> obs;  float w;
-                               for (int j = 0; j < num; j++) {
-                                       in >> w;
-                                       obs.push_back(w);
-                               }
-                       
-                               obsDistance[count] = obs;
-                               count++;
-                               gobble(in);
-                       }
-                       
-                       gobble(in);
-                       
-                       //get expected distances
-                       count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               int num;
-                               in >> num;
-                       
-                               vector<float> exp;  float w;
-                               for (int j = 0; j < num; j++) {
-                                       in >> w;
-                                       exp.push_back(w);
-                               }
-                       
-                               expectedDistance[count] = exp;
-                               count++;
-                               gobble(in);
-                       }
-
-                       gobble(in);
-                       
-                       count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               float num;
-                               in >> num;
-                               
-                               DE[count] = num;
-                               count++;
-                       }
-                       
-                       gobble(in);
-                       
-                       count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               float num;
-                               in >> num;
-                               
-                               deviation[count] = num;
-                               count++;
-                       }
-
-                       in.close();
-                       remove(s.c_str());
-               }
-
-                               
-#else
-                       mothurOut("Calculating observed distance... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
-                               obsDistance[i] = obsi;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       
-                       mothurOut("Finding variability... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
-                               Qav[i] = q;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       
-                       mothurOut("Calculating alpha... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
-                               seqCoef.push_back(alpha);
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-               
-               
-               
-                       mothurOut("Calculating expected distance... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
-                               expectedDistance[i] = exp;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       
-                       mothurOut("Finding deviation... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
-                               DE[i] = de;
-                               
-                               it = trimmed[i].begin();
-                               float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
-                               deviation[i] = dist;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "Pintail", "createProcesses");
-               exit(1);
-       }
-}
-
-
-/**************************************************************************************************/
-
 void Pintail::createProcessesQuan() {
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
index 26154b2403176e14691fd86248a9edc4ac03a085..ed52c3aa0d010c70bd99bb045ac3715a5d8cb94e 100644 (file)
--- a/pintail.h
+++ b/pintail.h
 class Pintail : public Chimera {
        
        public:
-               Pintail(string, string, string);        
+               Pintail(string, string);        
                ~Pintail();
                
-               int getChimeras();
+               int getChimeras(Sequence*);
                void print(ostream&);
                
                void setCons(string c)          { consfile = c;  }
@@ -39,44 +39,39 @@ class Pintail : public Chimera {
                Dist* distcalculator;
                DeCalculator* decalc;
                int iters;
-               string fastafile, templateFile, consfile;
+               string fastafile, consfile;
                
-               
-               vector<linePair*> lines;
                vector<linePair*> templateLines;
-               vector<Sequence*> querySeqs;
-               vector<Sequence*> templateSeqs;
-               
-               vector<Sequence*> bestfit;  //bestfit[0] matches queryseqs[0]...
+               Sequence*querySeq;
+                               
+               Sequence* bestfit;  //closest match to query in template
                
-               vector< vector<float> > obsDistance;  //obsDistance[0] is the vector of observed distances for queryseqs[0]... 
-               vector< vector<float> > expectedDistance;  //expectedDistance[0] is the vector of expected distances for queryseqs[0]... 
-               vector<float> deviation;  //deviation[0] is the percentage of mismatched pairs over the whole seq between querySeqs[0] and its best match.
-               vector< vector<int> > windowsForeachQuery;  // windowsForeachQuery[0] is a vector containing the starting spot in queryseqs[0] aligned sequence for each window.
+               vector<float>  obsDistance;  //obsDistance is the vector of observed distances for query 
+               vector<float>  expectedDistance;  //expectedDistance is the vector of expected distances for query
+               float deviation;  //deviation is the percentage of mismatched pairs over the whole seq between query and its best match.
+               vector<int>  windowsForeachQuery;  // windowsForeachQuery is a vector containing the starting spot in query aligned sequence for each window.
                                                                                //this is needed so you can move by bases and not just spots in the alignment
                                                                                
-               vector<int> windowSizes;                        //windowSizes[0] = window size of querySeqs[0]
+               int windowSizes;                        //windowSizes = window size of query
                vector<int> windowSizesTemplate;    //windowSizesTemplate[0] = window size of templateSeqs[0]
                
-               vector< map<int, int> > trimmed;    //trimmed[0] = start and stop of trimmed sequences for querySeqs[0]
+               map<int, int> trimmed;    //trimmed = start and stop of trimmed sequences for query
                map<int, int>::iterator it;
                
-               vector< vector<float> > Qav;    //Qav[0] is the vector of average variablility for queryseqs[0]... 
-               vector<float>  seqCoef;                         //seqCoef[0] is the coeff for queryseqs[0]...
-               vector<float> DE;                                       //DE[0] is the deviaation for queryseqs[0]...
+               vector<float>  Qav;     //Qav is the vector of average variablility for query
+               float  seqCoef;         //seqCoef is the coeff for query
+               float DE;                       //DE is the deviaation for query
                vector<float> probabilityProfile;
                vector< vector<float> > quantiles;  //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
                vector< vector<quanMember> > quantilesMembers;  //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
-               vector< set<int> > h;
+               set<int>  h;
                
                
                vector<float> readFreq();
-               vector<Sequence*> findPairs(int, int);
+               Sequence* findPairs(Sequence*);
                        
-               void createProcessesSpots();
-               void createProcessesPairs();
-               void createProcesses();
                void createProcessesQuan();
+               void doPrep();
                
 };
 
index 25fee86518bc6e2177b11824aa7989168c38772f..cdaa38701e0593817d5af7b415e308429fe2e6a1 100644 (file)
@@ -354,7 +354,9 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
                                group = "xxx";
                        }
                        
-                       T->tree[n1].setGroup(group);
+                       vector<string> tempGroup; tempGroup.push_back(group);
+                       
+                       T->tree[n1].setGroup(tempGroup);
                        T->tree[n1].setChildren(-1,-1);
                
                        if(blen == 1){  
index 09f899b947bf9bb4b455de86479d34ffa709d46f..a11465d669a55de0c5555f616c36f51caef74ea2 100644 (file)
@@ -87,7 +87,7 @@ ReadTreeCommand::ReadTreeCommand(string option){
                        
                        namefile = validParameter.validFile(parameters, "name", true);
                        if (namefile == "not open") { abort = true; }
-                       else if (namefile == "not found") { treefile = ""; }
+                       else if (namefile == "not found") { namefile = ""; }
                        else { readNamesFile(); }       
                        
                        if (abort == false) {
@@ -162,6 +162,7 @@ int ReadTreeCommand::execute(){
                                        if (it == nameMap.end()) {
                                                mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); mothurOutEndLine();
                                                treeMap->removeSeq(treeMap->namesOfSeqs[i]);
+                                               i--; //need this because removeSeq removes name from namesOfSeqs
                                        }
                                }
                        }
index 8e9eb56c8e55bddd9405cc790759c45f2ed7dc12..66db22b6a36ae9a0f7f9dd383201cd3852a120b2 100644 (file)
@@ -10,8 +10,8 @@
 #include "slayer.h"
 
 /***********************************************************************/
-Slayer::Slayer(int win, int increment, int parentThreshold, float div, int i) :
-               windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div), iters(i){}
+Slayer::Slayer(int win, int increment, int parentThreshold, float div, int i, int snp) :
+               windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div), iters(i), percentSNPSample(snp){}
 /***********************************************************************/
 string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
        try {
@@ -20,13 +20,14 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                for (int i = 0; i < refSeqs.size(); i++) {
                
                        for (int j = i+1; j < refSeqs.size(); j++) {
-                       
+       
                                //make copies of query and each parent because runBellerophon removes gaps and messes them up
                                Sequence* q = new Sequence(query->getName(), query->getAligned());
                                Sequence* leftParent = new Sequence(refSeqs[i]->getName(), refSeqs[i]->getAligned());
                                Sequence* rightParent = new Sequence(refSeqs[j]->getName(), refSeqs[j]->getAligned());
                                
-                               vector<data_struct> divs = runBellerophon(q, leftParent, rightParent);
+                               map<int, int> spots;  //map from spot in original sequence to spot in filtered sequence for query and both parents
+                               vector<data_struct> divs = runBellerophon(q, leftParent, rightParent, spots);
                                
                                vector<data_struct> selectedDivs;
                                for (int k = 0; k < divs.size(); k++) {
@@ -39,16 +40,17 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                                        
                                        //require at least 3 SNPs on each side of the break
                                        if ((numSNPSLeft >= 3) && (numSNPSRight >= 3)) {
+                                       
+                                               //removed in 12/09 version of chimeraSlayer
+                                               //int winSizeLeft = divs[k].winLEnd - divs[k].winLStart + 1;
+                                               //int winSizeRight = divs[k].winREnd - divs[k].winRStart + 1;
                                                
-                                               int winSizeLeft = divs[k].winLEnd - divs[k].winLStart + 1;
-                                               int winSizeRight = divs[k].winREnd - divs[k].winRStart + 1;
-                                               
-                                               float snpRateLeft = numSNPSLeft / (float) winSizeLeft;
-                                               float snpRateRight = numSNPSRight / (float) winSizeRight;
-                                               float logR = log(snpRateLeft / snpRateRight) / log(2.0);
+                                               //float snpRateLeft = numSNPSLeft / (float) winSizeLeft;
+                                               //float snpRateRight = numSNPSRight / (float) winSizeRight;
+                                               //float logR = log(snpRateLeft / snpRateRight) / log(2.0); 
                                                
                                                // do not accept excess snp ratio on either side of the break
-                                               if (abs(logR) < 1 ) {  
+                                               //if (abs(logR) < 1 ) {  
                                                        
                                                        float BS_A, BS_B;
                                                        bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B);
@@ -59,9 +61,15 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                                                        divs[k].bsMax = max(BS_A, BS_B);
                                                
                                                        divs[k].chimeraMax = max(divs[k].qla_qrb, divs[k].qlb_qra);
+                                                       
+                                                       //so results reflect orignal alignment
+                                                       divs[k].winLStart = spots[divs[k].winLStart];
+                                                       divs[k].winLEnd = spots[divs[k].winLEnd];  
+                                                       divs[k].winRStart = spots[divs[k].winRStart]; 
+                                                       divs[k].winREnd = spots[divs[k].winREnd]; 
                                                
                                                        selectedDivs.push_back(divs[k]);
-                                               }
+                                               //}
                                        }
                                }
                                
@@ -74,7 +82,7 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                        }
                }
                
-               
+
                // compute bootstrap support
                if (all.size() > 0) {
                        //sort them
@@ -95,22 +103,31 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
        }
 }
 /***********************************************************************/
-vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence* pB) {
+vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence* pB, map<int, int>& spots) {
        try{
                
                vector<data_struct> data;
-                               
+               
                //vertical filter
                vector<Sequence*> temp;
                temp.push_back(q); temp.push_back(pA); temp.push_back(pB);
-               map<int, int> spots = verticalFilter(temp);
+               
+               //maps spot in new alignment to spot in alignment before filter
+               spots = verticalFilter(temp);  //fills baseSpots
                
                //get these to avoid numerous function calls
                string query = q->getAligned();
                string parentA = pA->getAligned();
                string parentB = pB->getAligned();
                int length = query.length();
-               
+//cout << q->getName() << endl << q->getAligned() << endl << endl;     
+//cout << pA->getName() << endl << pA->getAligned() << endl << endl;           
+//cout << pB->getName() << endl << pB->getAligned() << endl << endl;   
+//cout << " length = " << length << endl;
+//cout << q->getName() << endl;
+//cout << pA->getName() << '\t';
+//cout << pB->getName() << endl;
+       
                //check window size
                if (length < (2*windowSize+windowStep)) { 
                        mothurOut("Your window size is too large for " + q->getName() + ". I will make the window size " + toString(length/4) + " which is 1/4 the filtered length."); mothurOutEndLine();      
@@ -143,9 +160,11 @@ vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence*
                        //float avgQA_QB = ((QA*leftLength) + (QB*rightLength)) / (float) length;
                
                        float divR_QLA_QRB = min((QLA_QRB/QA), (QLA_QRB/QB));
-               
                        float divR_QLB_QRA = min((QLB_QRA/QA), (QLB_QRA/QB));
+//cout << leftLength << '\t' << rightLength << '\t' << QLA << '\t' << QRB << '\t' << QLB << '\t' << QRA  << '\t' << LAB << '\t' << RAB << '\t' << AB << '\t' << QA << '\t' << QB << '\t' << QLA_QRB << '\t' <<  QLB_QRA <<    endl;                    
 
+//cout << divRThreshold << endl;
+//cout << breakpoint << '\t' << divR_QLA_QRB << '\t' << divR_QLB_QRA << endl;
                        //is one of them above the 
                        if (divR_QLA_QRB >= divRThreshold || divR_QLB_QRA >= divRThreshold) {
                                
@@ -167,10 +186,10 @@ vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence*
                                        member.rab = RAB; 
                                        member.qra = QRA; 
                                        member.qlb = QLB; 
-                                       member.winLStart = spots[0];
-                                       member.winLEnd = spots[breakpoint];  //so breakpoint reflects spot in alignment before filter
-                                       member.winRStart = spots[breakpoint+1]
-                                       member.winREnd = spots[length-1]
+                                       member.winLStart = 0;
+                                       member.winLEnd = breakpoint;  
+                                       member.winRStart = breakpoint+1
+                                       member.winREnd = length-1
                                        member.querySeq = *(q); 
                                        member.parentA = *(pA);
                                        member.parentB = *(pB);
@@ -198,7 +217,7 @@ vector<snps> Slayer::getSNPS(string parentA, string query, string parentB, int l
        try {
        
                vector<snps> data;
-
+//cout << left << '\t' << right << endl;
                for (int i = left; i <= right; i++) {
                        
                        char A = parentA[i];
@@ -206,12 +225,31 @@ vector<snps> Slayer::getSNPS(string parentA, string query, string parentB, int l
                        char B = parentB[i];
                        
                        if ((A != Q) || (B != Q)) {
-                               snps member;
-                               member.queryChar = Q;
-                               member.parentAChar = A;
-                               member.parentBChar = B;
+//cout << "not equal " << Q << '\t' << A << '\t' << B << endl;
+                       
+                               //ensure not neighboring a gap. change to 12/09 release of chimeraSlayer - not sure what this adds, but it eliminates alot of SNPS
+                               if (
+                                       //did query loose a base here during filter??
+                                       ( i == 0 || abs (baseSpots[0][i] - baseSpots[0][i-1]) == 1) &&
+                                       ( i == query.length() || abs (baseSpots[0][i] - baseSpots[0][i+1]) == 1)
+                                       &&
+                                       //did parentA loose a base here during filter??
+                                       ( i == 0 || abs (baseSpots[1][i] - baseSpots[1][i-1]) == 1) &&
+                                       ( i == parentA.length() || abs (baseSpots[1][i] - baseSpots[1][i+1]) == 1) 
+                                       &&
+                                       //did parentB loose a base here during filter??
+                                       ( i == 0 || abs (baseSpots[2][i] - baseSpots[2][i-1]) == 1) &&
+                                       ( i == parentB.length() || abs (baseSpots[2][i] - baseSpots[2][i+1]) == 1)
+                                       ) 
+                               { 
                                
-                               data.push_back(member);
+                                       snps member;
+                                       member.queryChar = Q;
+                                       member.parentAChar = A;
+                                       member.parentBChar = B;
+//cout << "not neighboring a gap " << Q << '\t' << A << '\t' << B << '\t' << baseSpots[0][i] << '\t' << baseSpots[0][i+1] << '\t' << baseSpots[0][i-1] << '\t' << baseSpots[1][i] << '\t' << baseSpots[1][i+1] << '\t' << baseSpots[1][i-1] << '\t' << baseSpots[2][i] << '\t' << baseSpots[2][i+1] << '\t' << baseSpots[2][i-1] << endl;                              
+                                       data.push_back(member);
+                               }
                        }
                }
                
@@ -232,9 +270,9 @@ void Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, fl
                int count_A = 0; // sceneario QLA,QRB supported
                int count_B = 0; // sceneario QLB,QRA supported
        
-               int numLeft = max(1, int(left.size()/10 +0.5));
-               int numRight = max(1, int(right.size()/10 + 0.5));
-               
+               int numLeft = max(1, int(left.size() * (percentSNPSample/(float)100) + 0.5));
+               int numRight = max(1, int(right.size() * (percentSNPSample/(float)100) + 0.5));
+
                for (int i = 0; i < iters; i++) {
                        //random sampling with replacement.
                
@@ -278,11 +316,29 @@ void Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, fl
                        if ((QLB > QLA) && (QRA > QRB)) {
                                count_B++;
                        }
-               
+                       
+//cout << "selected left snp: \n";
+//for (int j = 0; j < selectedLeft.size(); j++) {  cout << selectedLeft[j].parentAChar;  } 
+//cout << endl;
+//for (int j = 0; j < selectedLeft.size(); j++) {  cout << selectedLeft[j].queryChar;  }
+//cout << endl;
+//for (int j = 0; j < selectedLeft.size(); j++) {  cout << selectedLeft[j].parentBChar;  }
+//cout << endl;
+//cout << "selected right snp: \n";
+//for (int j = 0; j < selectedRight.size(); j++) {  cout << selectedRight[j].parentAChar;  } 
+//cout << endl;
+//for (int i = 0; i < selectedRight.size(); i++) {  cout << selectedRight[i].queryChar;  }
+//cout << endl;
+//for (int i = 0; i < selectedRight.size(); i++) {  cout << selectedRight[i].parentBChar;  }
+//cout << endl;                
                }
 
+
+
+
                BSA = ((float) count_A / (float) iters) * 100;
                BSB = ((float) count_B / (float) iters) * 100;
+//cout << "bsa = " << BSA << " bsb = " << BSB << endl;
        
        }
        catch(exception& e) {
@@ -359,7 +415,7 @@ float Slayer::computePercentID(string queryFrag, string parent, int left, int ri
        try {
                int total = 0;
                int matches = 0;
-       
+
                for (int i = left; i <= right; i++) {
                        total++;
                        if (queryFrag[i] == parent[i]) {
@@ -380,6 +436,10 @@ float Slayer::computePercentID(string queryFrag, string parent, int left, int ri
 //remove columns that contain any gaps
 map<int, int> Slayer::verticalFilter(vector<Sequence*> seqs) {
        try {
+               //find baseSpots
+               baseSpots.clear(); 
+               baseSpots.resize(3);  //query, parentA, parentB
+       
                vector<int> gaps;       gaps.resize(seqs[0]->getAligned().length(), 0);
                
                string filterString = (string(seqs[0]->getAligned().length(), '1'));
@@ -391,11 +451,11 @@ map<int, int> Slayer::verticalFilter(vector<Sequence*> seqs) {
                        
                        for (int j = 0; j < seqAligned.length(); j++) {
                                //if this spot is a gap
-                               if ((seqAligned[j] == '-') || (seqAligned[j] == '.') || (toupper(seqAligned[j]) == 'N'))        {       gaps[j]++;      }
+                               if ((seqAligned[j] == '-') || (seqAligned[j] == '.') || (toupper(seqAligned[j]) == 'N'))        {   gaps[j]++;  }
                        }
                }
                
-               //zero out spot where all sequences have blanks
+               //zero out spot where any sequences have blanks
                int numColRemoved = 0;
                int count = 0;
                map<int, int> maskMap; maskMap.clear();
@@ -407,16 +467,25 @@ map<int, int> Slayer::verticalFilter(vector<Sequence*> seqs) {
                                count++;
                        }
                }
-               
+
                //for each sequence
                for (int i = 0; i < seqs.size(); i++) {
                
                        string seqAligned = seqs[i]->getAligned();
                        string newAligned = "";
                        
+                       int baseCount = 0;
+                       int count = 0;
                        for (int j = 0; j < seqAligned.length(); j++) {
+                               //are you a base
+                               if ((seqAligned[j] != '-') && (seqAligned[j] != '.') && (toupper(seqAligned[j]) != 'N'))        { baseCount++; }
+                       
                                //if this spot is not a gap
-                               if (filterString[j] == '1') { newAligned += seqAligned[j]; }
+                               if (filterString[j] == '1') { 
+                                       newAligned += seqAligned[j]; 
+                                       baseSpots[i][count] = baseCount;
+                                       count++;
+                               }
                        }
                        
                        seqs[i]->setAligned(newAligned);
index dc13e6daa3f1e4ce9e2340da2cf201c9881a3083..099cb208f1e69ee9f943ea81ee395aba9bbc4efc 100644 (file)
--- a/slayer.h
+++ b/slayer.h
@@ -64,7 +64,7 @@ class Slayer {
 
        public:
                
-               Slayer(int, int, int, float, int);
+               Slayer(int, int, int, float, int, int);
                ~Slayer() {};
                
                string getResults(Sequence*, vector<Sequence*>);
@@ -73,14 +73,15 @@ class Slayer {
                                
        private:
                
-               int windowSize, windowStep, parentFragmentThreshold, iters;
+               int windowSize, windowStep, parentFragmentThreshold, iters, percentSNPSample;
                float divRThreshold; 
                vector<data_struct>  outputResults;
+               vector< map<int, int> > baseSpots;
                
                map<int, int> verticalFilter(vector<Sequence*>);
                float computePercentID(string, string, int, int);
                
-               vector<data_struct> runBellerophon(Sequence*, Sequence*, Sequence*);
+               vector<data_struct> runBellerophon(Sequence*, Sequence*, Sequence*, map<int, int>&);
                vector<snps> getSNPS(string, string, string, int, int);
                void bootstrapSNPS(vector<snps>, vector<snps>, float&, float&);
                float snpQA(vector<snps>);
index 52b7322cb1064546fd0f446f63fd61ef61d868fe..731eb64d93f95b485f8607308e3e88345138813c 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -27,7 +27,8 @@ Tree::Tree() {
                        //initialize leaf nodes
                        if (i <= (numLeaves-1)) {
                                tree[i].setName(globaldata->Treenames[i]);
-                               tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
+                               vector<string> tempGroups; tempGroups.push_back(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
+                               tree[i].setGroup(tempGroups);
                                //set pcount and pGroup for groupname to 1.
                                tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
                                tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
@@ -37,7 +38,8 @@ Tree::Tree() {
                        //intialize non leaf nodes
                        }else if (i > (numLeaves-1)) {
                                tree[i].setName("");
-                               tree[i].setGroup("");
+                               vector<string> tempGroups;
+                               tree[i].setGroup(tempGroups);
                        }
                }
        }
@@ -119,6 +121,14 @@ void Tree::addNamesToCounts() {
                                        }
                                }//end if
                                
+                               //update groups to reflect all the groups this node represents
+                               vector<string> nodeGroups;
+                               map<string, int>::iterator itGroups;
+                               for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) {
+                                       nodeGroups.push_back(itGroups->first);
+                               }
+                               tree[i].setGroup(nodeGroups);
+                               
                        }//end else
                }//end for
                                
@@ -375,7 +385,7 @@ void Tree::randomLabels(vector<string> g) {
                                tree[z].pGroups = (tree[i].pGroups);
                                tree[i].pGroups = (lib_hold);
                                
-                               string zgroup = tree[z].getGroup();
+                               vector<string> zgroup = tree[z].getGroup();
                                tree[z].setGroup(tree[i].getGroup());
                                tree[i].setGroup(zgroup);
                                
@@ -394,7 +404,7 @@ void Tree::randomLabels(vector<string> g) {
                exit(1);
        }
 }
-/**************************************************************************************************/
+/**************************************************************************************************
 
 void Tree::randomLabels(string groupA, string groupB) {
        try {
@@ -447,7 +457,9 @@ void Tree::assembleRandomUnifracTree(vector<string> g) {
 }
 /*************************************************************************************************/
 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
-       randomLabels(groupA, groupB);
+
+       vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
+       randomLabels(temp);
        assembleTree();
 }
 
@@ -584,7 +596,9 @@ void Tree::printBranch(int node, ostream& out, string mode) {
                                }
                        }
                }else { //you are a leaf
-                       out << tree[node].getGroup(); 
+                       string leafGroup = globaldata->gTreemap->getGroup(tree[node].getName());
+                       
+                       out << leafGroup; 
                        if (mode == "branch") {
                                //if there is a branch length then print it
                                if (tree[node].getBranchLength() != -1) {
diff --git a/tree.h b/tree.h
index a076cef8eb30e3cda0efd8d74fcb680ae007cb24..37028be57e90f3d5d38eeb3080b36a8538e8bf8e 100644 (file)
--- a/tree.h
+++ b/tree.h
@@ -55,7 +55,7 @@ private:
        void randomTopology();
        void randomBlengths();
        void randomLabels(vector<string>);
-       void randomLabels(string, string);
+       //void randomLabels(string, string);
        void printBranch(int, ostream&, string);  //recursively print out tree
        void parseTreeFile();   //parses through tree file to find names of nodes and number of them
                                                        //this is required in case user has sequences in the names file that are
index e472570895414a4ec535e10c5f02bee10dfea982..cd046f9dced6bbbe33c0d611d603217e28884d7c 100644 (file)
@@ -49,7 +49,7 @@ void TreeMap::removeSeq(string seqName) {
        //erase name from namesOfSeqs
        for (int i = 0; i < namesOfSeqs.size(); i++) {
                if (namesOfSeqs[i] == seqName)  {
-                       namesOfSeqs.erase (namesOfSeqs.begin()+i);
+                       namesOfSeqs.erase(namesOfSeqs.begin()+i);
                        break;
                }
        }
@@ -61,8 +61,6 @@ void TreeMap::removeSeq(string seqName) {
        //remove seq from treemap
        it = treemap.find(seqName);
        treemap.erase(it);
-       
-
 }
 /************************************************************/
 
index 54cdae4e502250229a6e70e5ce937a631a843b1e..648707ab246725ce6f36e912d9b8509024e362b7 100644 (file)
@@ -25,7 +25,7 @@ Node::Node() {
 /****************************************************************/
 void Node::setName(string Name) {  name = Name; }
 /****************************************************************/
-void Node::setGroup(string groups)  { group =groups; }
+void Node::setGroup(vector<string> groups)  { group =groups; }
 /****************************************************************/
 void Node::setBranchLength(float l) { branchLength = l; }
 /****************************************************************/
@@ -41,7 +41,7 @@ void Node::setChildren(int lc, int rc) { lchild = lc; rchild = rc; }  //leftchild
 /****************************************************************/
 string Node::getName() { return name; }
 /****************************************************************/
-string Node::getGroup() { return group; }
+vector<string> Node::getGroup() { return group; }
 /****************************************************************/
 float Node::getBranchLength() { return branchLength; }
 /****************************************************************/
@@ -60,12 +60,16 @@ int Node::getIndex() { return vectorIndex; }
 //to be used by printTree in the Tree class to print the leaf info                     
 void Node::printNode() {
        try{
-               mothurOut(toString(parent) + " " + toString(lchild) + " " + toString(rchild) + " " + group);
+               mothurOut(toString(parent) + " " + toString(lchild) + " " + toString(rchild) + " ");
+               
+               for (int i = 0; i < group.size(); i++) {  mothurOut( group[i] + " "); }
+               
                //there is a branch length
                if (branchLength != -1) { 
                        mothurOut(" " + toString(branchLength)); 
                }
                mothurOut(" |");
+               
                map<string, int>::iterator it;
                for(it=pGroups.begin();it!=pGroups.end();it++){
                        mothurOut(" " + it->first + ":" + toString(it->second));
index d6163a2b1275f9816a576659019d178005042e6d..c25806b7445cb90cc97ae45ab33d917a3981257e 100644 (file)
@@ -21,7 +21,7 @@ class Node  {
                ~Node() { pGroups.clear(); pcount.clear(); };
                
                void setName(string);
-               void setGroup(string);  
+               void setGroup(vector<string>);  
                void setBranchLength(float);
                void setLabel(float);
                void setParent(int);
@@ -30,7 +30,7 @@ class Node  {
                void setLengthToLeaves(float);
                
                string getName();
-               string getGroup();  
+               vector<string> getGroup();  
                float getBranchLength();
                float getLengthToLeaves();
                float getLabel();
@@ -52,7 +52,7 @@ class Node  {
                        
        private:
                string                  name;
-               string                  group;
+               vector<string>  group; 
                float                   branchLength, length2leaf, label;
                int                             parent;
                int                             lchild;
index c39e6629df1c2749928833fe24af65d48e853d64..e6ed0eb83e7a71d2fd34e4b0fc7627c1368b5d6e 100644 (file)
@@ -48,14 +48,8 @@ EstOutput Unweighted::getValues(Tree* t) {
                                //groups in this combo
                                groups.push_back(globaldata->Groups[a]); groups.push_back(globaldata->Groups[l]);
                
-                               for(int i=t->getNumLeaves();i<t->getNumNodes();i++){
-               
-                                       int lc = t->tree[i].getLChild();  //lc = vector index of left child
-                                       int rc = t->tree[i].getRChild();  //rc = vector index of right child
-                       
-                                       /**********************************************************************/
-                                       //This section adds in all lengths that are non leaf
-                       
+                               for(int i=0;i<t->getNumNodes();i++){
+       
                                        copyIpcount = t->tree[i].pcount;
                                        for (it = copyIpcount.begin(); it != copyIpcount.end();) {
                                                if (inUsersGroups(it->first, groups) != true) { 
@@ -73,30 +67,10 @@ EstOutput Unweighted::getValues(Tree* t) {
                                                totalBL += abs(t->tree[i].getBranchLength()); 
                                        }
                        
-                                       /**********************************************************************/
-                                       //This section adds in all lengths that are leaf
-                       
-                                       //if i's chidren are leaves
-                                       if (t->tree[rc].getRChild() == -1) {
-                                               //if rc is a valid group and rc has a BL
-                                               if ((inUsersGroups(t->tree[rc].getGroup(), groups) == true) && (t->tree[rc].getBranchLength() != -1)) {
-                                                       UniqueBL += abs(t->tree[rc].getBranchLength());
-                                                       totalBL += abs(t->tree[rc].getBranchLength()); 
-                                               }
-                                       }
-                       
-                                       if (t->tree[lc].getLChild() == -1) {
-                                               //if lc is a valid group and lc has a BL
-                                               if ((inUsersGroups(t->tree[lc].getGroup(), groups) == true) && (t->tree[lc].getBranchLength() != -1)) {
-                                                       UniqueBL += abs(t->tree[lc].getBranchLength());
-                                                       totalBL += abs(t->tree[lc].getBranchLength()); 
-                                               }
-                                       }
-                       
-                                       /**********************************************************************/
                                }
                
                                UW = (UniqueBL / totalBL);  
+               //cout << globaldata->Groups[a] << globaldata->Groups[l] << '\t' << UniqueBL << '\t' << totalBL << endl;
        
                                if (isnan(UW) || isinf(UW)) { UW = 0; }
        
@@ -126,14 +100,8 @@ EstOutput Unweighted::getValues(Tree* t) {
                        UW = 0.00;              //Unweighted Value = UniqueBL / totalBL;
                        copyIpcount.clear();
                                
-                       for(int i=t->getNumLeaves();i<t->getNumNodes();i++){
-               
-                               int lc = t->tree[i].getLChild();  //lc = vector index of left child
-                               int rc = t->tree[i].getRChild();  //rc = vector index of right child
-                       
-                               /**********************************************************************/
-                               //This section adds in all lengths that are non leaf
-                       
+                       for(int i=0;i<t->getNumNodes();i++){
+                               
                                copyIpcount = t->tree[i].pcount;
                                for (it = copyIpcount.begin(); it != copyIpcount.end();) {
                                        if (inUsersGroups(it->first, groups) != true) { 
@@ -151,27 +119,6 @@ EstOutput Unweighted::getValues(Tree* t) {
                                        totalBL += abs(t->tree[i].getBranchLength()); 
                                }
                        
-                               /**********************************************************************/
-                               //This section adds in all lengths that are leaf
-                       
-                               //if i's chidren are leaves
-                               if (t->tree[rc].getRChild() == -1) {
-                                       //if rc is a valid group and rc has a BL
-                                       if ((inUsersGroups(t->tree[rc].getGroup(), groups) == true) && (t->tree[rc].getBranchLength() != -1)) {
-                                               UniqueBL += abs(t->tree[rc].getBranchLength());
-                                               totalBL += abs(t->tree[rc].getBranchLength()); 
-                                       }
-                               }
-                       
-                               if (t->tree[lc].getLChild() == -1) {
-                                       //if lc is a valid group and lc has a BL
-                                       if ((inUsersGroups(t->tree[lc].getGroup(), groups) == true) && (t->tree[lc].getBranchLength() != -1)) {
-                                               UniqueBL += abs(t->tree[lc].getBranchLength());
-                                               totalBL += abs(t->tree[lc].getBranchLength()); 
-                                       }
-                               }
-                       
-                               /**********************************************************************/
                        }
                
                        UW = (UniqueBL / totalBL);  
@@ -236,12 +183,9 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                                copyTree->assembleRandomUnifracTree(groups[0], groups[1]);
                                
                                //copyTree->createNewickFile("random"+groupA+toString(count));
-               
-                               for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
-               
-                                       int lc = copyTree->tree[i].getLChild();  //lc = vector index of left child
-                                       int rc = copyTree->tree[i].getRChild();  //rc = vector index of right child
-                       
+                               
+                               for(int i=0;i<copyTree->getNumNodes();i++){
+                                               
                                        /**********************************************************************/
                                        //This section adds in all lengths that are non leaf
                                        copyIpcount = copyTree->tree[i].pcount;
@@ -261,27 +205,6 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                                                totalBL += abs(copyTree->tree[i].getBranchLength()); 
                                        }
                        
-                                       /**********************************************************************/
-                                       //This section adds in all lengths that are leaf
-                       
-                                       //if i's chidren are leaves
-                                       if (copyTree->tree[rc].getRChild() == -1) {
-                                               //if rc is a valid group and rc has a BL
-                                               if ((inUsersGroups(copyTree->tree[rc].getGroup(), groups) == true) && (copyTree->tree[rc].getBranchLength() != -1)) {
-                                                       UniqueBL += abs(copyTree->tree[rc].getBranchLength());
-                                                       totalBL += abs(copyTree->tree[rc].getBranchLength()); 
-                                               }
-                                       }
-                       
-                                       if (copyTree->tree[lc].getLChild() == -1) {
-                                               //if lc is a valid group and lc has a BL
-                                               if ((inUsersGroups(copyTree->tree[lc].getGroup(), groups) == true) && (copyTree->tree[lc].getBranchLength() != -1)) {
-                                                       UniqueBL += abs(copyTree->tree[lc].getBranchLength());
-                                                       totalBL += abs(copyTree->tree[lc].getBranchLength()); 
-                                               }
-                                       }
-                       
-                                       /**********************************************************************/
                                }
                
                                UW = (UniqueBL / totalBL);  
@@ -320,13 +243,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                        //swap labels in all the groups you want to compare
                        copyTree->assembleRandomUnifracTree(groups);
 
-                       for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
-               
-                               int lc = copyTree->tree[i].getLChild();  //lc = vector index of left child
-                               int rc = copyTree->tree[i].getRChild();  //rc = vector index of right child
-                       
-                               /**********************************************************************/
-                               //This section adds in all lengths that are non leaf
+                       for(int i=0;i<copyTree->getNumNodes();i++){
                        
                                copyIpcount = copyTree->tree[i].pcount;
                                for (it = copyIpcount.begin(); it != copyIpcount.end();) {
@@ -345,27 +262,6 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                                        totalBL += abs(copyTree->tree[i].getBranchLength()); 
                                }
                        
-                               /**********************************************************************/
-                               //This section adds in all lengths that are leaf
-                       
-                               //if i's chidren are leaves
-                               if (copyTree->tree[rc].getRChild() == -1) {
-                                       //if rc is a valid group and rc has a BL
-                                       if ((inUsersGroups(copyTree->tree[rc].getGroup(), groups) == true) && (copyTree->tree[rc].getBranchLength() != -1)) {
-                                               UniqueBL += abs(copyTree->tree[rc].getBranchLength());
-                                               totalBL += abs(copyTree->tree[rc].getBranchLength()); 
-                                       }
-                               }
-                       
-                               if (copyTree->tree[lc].getLChild() == -1) {
-                                       //if lc is a valid group and lc has a BL
-                                       if ((inUsersGroups(copyTree->tree[lc].getGroup(), groups) == true) && (copyTree->tree[lc].getBranchLength() != -1)) {
-                                               UniqueBL += abs(copyTree->tree[lc].getBranchLength());
-                                               totalBL += abs(copyTree->tree[lc].getBranchLength()); 
-                                       }
-                               }
-                       
-                               /**********************************************************************/
                        }
                
                        UW = (UniqueBL / totalBL);  
index d1bc40ea72b096e6ae8ffce9f5488dec31a85000..8a312297cc677ffb78aec597bf8128efbf500a0f 100644 (file)
@@ -26,6 +26,8 @@ EstOutput Weighted::getValues(Tree* t) {
                                //initialize weighted scores
                                WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0;
                                
+                               vector<string> groups; groups.push_back(globaldata->Groups[i]); groups.push_back(globaldata->Groups[l]);
+                               
                                D.push_back(0.0000); //initialize a spot in D for each combination
                                
                                /********************************************************/
@@ -52,9 +54,25 @@ EstOutput Weighted::getValues(Tree* t) {
                                        //is this sum from a sequence which is in one of the users groups
                                        if (inUsersGroups(t->tree[v].getGroup(), globaldata->Groups) == true) {
                                                //is this sum from a sequence which is in this groupCombo
-                                               if ((t->tree[v].getGroup() == globaldata->Groups[i]) || (t->tree[v].getGroup() == globaldata->Groups[l])) {
-                                                       sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()];
-                                                       D[count] += sum; 
+                                               if (inUsersGroups(t->tree[v].getGroup(), groups)) {
+                                                       int numSeqsInGroupI, numSeqsInGroupL;
+                                                       
+                                                       map<string, int>::iterator it;
+                                                       it = t->tree[v].pcount.find(groups[0]);
+                                                       if (it != t->tree[v].pcount.end()) { //this leaf node contains seqs from group i
+                                                               numSeqsInGroupI = it->second;
+                                                       }else{ numSeqsInGroupI = 0; }
+                                                       
+                                                       it = t->tree[v].pcount.find(groups[1]);
+                                                       if (it != t->tree[v].pcount.end()) { //this leaf node contains seqs from group l
+                                                               numSeqsInGroupL = it->second;
+                                                       }else{ numSeqsInGroupL = 0; }
+                                                       
+                                                       double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groups[0]]) + ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groups[1]]);
+
+                                                       //sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()];
+                                                       
+                                                       D[count] += weightedSum; 
                                                }
                                        }
                                }
@@ -125,6 +143,7 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                WScore[(groupA+groupB)] = 0.0;
                float D = 0.0;
                
+               vector<string> groups; groups.push_back(groupA); groups.push_back(groupB);
                                                
                /********************************************************/
                //calculate a D value for the group combo
@@ -147,9 +166,25 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                                sum += abs(t->tree[index].getBranchLength());
                        }
                                                
-                       if ((t->tree[v].getGroup() == groupA) || (t->tree[v].getGroup() == groupB)) {
-                               sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()];
-                               D += sum; 
+                       if (inUsersGroups(t->tree[v].getGroup(), groups)) {
+                               int numSeqsInGroupI, numSeqsInGroupL;
+                                                       
+                               map<string, int>::iterator it;
+                               it = t->tree[v].pcount.find(groups[0]);
+                               if (it != t->tree[v].pcount.end()) { //this leaf node contains seqs from group i
+                                       numSeqsInGroupI = it->second;
+                               }else{ numSeqsInGroupI = 0; }
+                               
+                               it = t->tree[v].pcount.find(groups[1]);
+                               if (it != t->tree[v].pcount.end()) { //this leaf node contains seqs from group l
+                                       numSeqsInGroupL = it->second;
+                               }else{ numSeqsInGroupL = 0; }
+                               
+                               double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groups[0]]) + ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groups[1]]);
+                               
+                               //sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()];
+                               
+                               D += weightedSum; 
                        }
                }
                /********************************************************/