working on chimeras

author westcott <westcott>

Fri, 12 Feb 2010 19:50:54 +0000 (19:50 +0000)

committer westcott <westcott>

Fri, 12 Feb 2010 19:50:54 +0000 (19:50 +0000)
author westcott <westcott>
Fri, 12 Feb 2010 19:50:54 +0000 (19:50 +0000)
committer westcott <westcott>
Fri, 12 Feb 2010 19:50:54 +0000 (19:50 +0000)
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj

index 21dbf8dfbb074de24a0b70e523bd95e56d414695..51a592b21cdd2b1e18b1f7357b1b34993276c5c2 100644 (file)
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -188,6 +188,7 @@
                 A7E0AAB610D2886D00CF407B /* mgclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0AAB510D2886D00CF407B /* mgclustercommand.cpp */; };
                 A7E4A783106913F900688F62 /* getsharedotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E4A782106913F900688F62 /* getsharedotucommand.cpp */; };
                 A7E4A824106A9AD700688F62 /* maligner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E4A823106A9AD700688F62 /* maligner.cpp */; };
+               A7E8A22F1125939F0011D39C /* chimerarealigner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E8A22E1125939F0011D39C /* chimerarealigner.cpp */; };
                 A7F5759710CEBC0600E20E47 /* libreadline.a in Frameworks */ = {isa = PBXBuildFile; fileRef = A7F5759610CEBC0600E20E47 /* libreadline.a */; };
                 EB1216880F619B83004A865F /* bergerparker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216870F619B83004A865F /* bergerparker.cpp */; };
                 EB1216E50F61ACFB004A865F /* bstick.cpp in Sources */ = {isa = PBXBuildFile; fileRef = EB1216E40F61ACFB004A865F /* bstick.cpp */; };
@@ -593,6 +594,8 @@
                 A7E4A782106913F900688F62 /* getsharedotucommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getsharedotucommand.cpp; sourceTree = SOURCE_ROOT; };
                 A7E4A822106A9AD700688F62 /* maligner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = maligner.h; sourceTree = SOURCE_ROOT; };
                 A7E4A823106A9AD700688F62 /* maligner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = maligner.cpp; sourceTree = SOURCE_ROOT; };
+               A7E8A22D1125939F0011D39C /* chimerarealigner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerarealigner.h; sourceTree = SOURCE_ROOT; };
+               A7E8A22E1125939F0011D39C /* chimerarealigner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerarealigner.cpp; sourceTree = SOURCE_ROOT; };
                 A7F5759610CEBC0600E20E47 /* libreadline.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libreadline.a; path = "../readline-6.0/libreadline.a"; sourceTree = SOURCE_ROOT; };
                 C6859E8B029090EE04C91782 /* Mothur.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = Mothur.1; sourceTree = "<group>"; };
                 EB1216860F619B83004A865F /* bergerparker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bergerparker.h; sourceTree = SOURCE_ROOT; };
@@ -733,6 +736,8 @@
                                 A75B887B104C16860083C454 /* ccode.cpp */,
                                 A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */,
                                 A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */,
+                               A7E8A22D1125939F0011D39C /* chimerarealigner.h */,
+                               A7E8A22E1125939F0011D39C /* chimerarealigner.cpp */,
                                 A7B04491106CC3E60046FC83 /* chimeraslayer.h */,
                                 A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */,
                                 372A55531017922B00C5194B /* decalc.h */,
@@ -1337,6 +1342,7 @@
                                 A704E8A31106045D00870157 /* otuhierarchycommand.cpp in Sources */,
                                 A70B00C8110885EB002F453A /* setdircommand.cpp in Sources */,
                                 A794201111107897003AECCD /* distancedb.cpp in Sources */,
+                               A7E8A22F1125939F0011D39C /* chimerarealigner.cpp in Sources */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
                 };
diff --git a/aligncommand.cpp b/aligncommand.cpp

index 7782d1eac5354261c6727e240a33ae2d89984224..708a85472dc53fecec1f022ce86512a82d620c30 100644 (file)
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -218,6 +218,7 @@ int AlignCommand::execute(){
                         string alignFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align";
                         string reportFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "align.report";
                         string accnosFileName = outputDir + getRootName(getSimpleName(candidateFileNames[s])) + "flip.accnos";
+                       bool hasAccnos = true;
                         
                         int numFastaSeqs = 0;
                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
@@ -307,7 +308,7 @@ int AlignCommand::execute(){
                                                 mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
                                         }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
                                         mothurOutEndLine();
-                               }
+                               }else{ hasAccnos = false;  }
                         }
  #else
                         ifstream inFASTA;
@@ -320,7 +321,7 @@ int AlignCommand::execute(){
                         driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
                         
                         //delete accnos file if its blank else report to user
-                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  }
+                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
                         else { 
                                 mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
                                 if (!flip) {
@@ -331,7 +332,10 @@ int AlignCommand::execute(){
                         
  #endif
                         
-                       
+                       mothurOut("Output File Names: " + alignFileName + ", " + reportFileName);
+                       if (hasAccnos)  {       mothurOut(", " + accnosFileName + ".");         }
+                       else                    {       mothurOut(".");                                                         }
+                       mothurOutEndLine();
                         
                         mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences.");
                         mothurOutEndLine();
diff --git a/bellerophon.cpp b/bellerophon.cpp

index 7c64d4703a4bd3700e0757052c392639f16aa7eb..afb88d2ea29e7b59e971fb6cee9c95f8ffd6e475 100644 (file)
--- a/bellerophon.cpp
+++ b/bellerophon.cpp
@@ -230,7 +230,7 @@ int Bellerophon::getChimeras() {
                 sort(pref.begin(), pref.end(), comparePref);
                 
                 return 0;
-
+               
         }
         catch(exception& e) {
                 errorOut(e, "Bellerophon", "getChimeras");
diff --git a/blastdb.cpp b/blastdb.cpp

index 6b45a48ce379168e82300209a7668c3e07e4d0ab..50a0401b2cb8780765ba56a3ba2c025380c4e212 100644 (file)
--- a/blastdb.cpp
+++ b/blastdb.cpp
@@ -81,6 +81,55 @@ vector<int> BlastDB::findClosestSequences(Sequence* seq, int n) {
  
  }
  /**************************************************************************************************/
+//assumes you have added all the template sequences using the addSequence function and run generateDB.
+map<int, float> BlastDB::findClosest(Sequence* seq, int n) {
+       try{
+               map<int, float> topMatches;
+               
+               ofstream queryFile;
+               openOutputFile(queryFileName, queryFile);
+               queryFile << '>' << seq->getName() << endl;
+               queryFile << seq->getUnaligned() << endl;
+               queryFile.close();
+                               
+               //      the goal here is to quickly survey the database to find the closest match.  To do this we are using the default
+               //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
+               //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
+       
+               string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -b " + toString(n) + " -v " + toString(n);
+               blastCommand += (" -i " + queryFileName + " -o " + blastFileName);
+               system(blastCommand.c_str());
+               
+               ifstream m8FileHandle;
+               openInputFile(blastFileName, m8FileHandle);
+       
+               string dummy;
+               int templateAccession;
+               gobble(m8FileHandle);
+//string name = seq->getName();
+//ofstream out;
+//openOutputFileAppend(name, out);     
+               while(!m8FileHandle.eof()){
+                       m8FileHandle >> dummy >> templateAccession >> searchScore;
+//out << dummy << '\t' <<  templateAccession   << '\t' << searchScore << endl;
+                       //get rest of junk in line
+                       while (!m8FileHandle.eof())     {       char c = m8FileHandle.get(); 
+                       //out << c; 
+                       if (c == 10 || c == 13){        break;  }       } 
+                       
+                       gobble(m8FileHandle);
+                       topMatches[templateAccession] = searchScore;
+               }
+               m8FileHandle.close();
+//out.close();         
+               return topMatches;
+       }
+       catch(exception& e) {
+               errorOut(e, "BlastDB", "findClosest");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
  void BlastDB::addSequence(Sequence seq) {
         try {
         
@@ -103,7 +152,7 @@ void BlastDB::addSequence(Sequence seq) {
  void BlastDB::generateDB() {
         try {
         
-               mothurOut("Generating the temporary BLAST database...\t");      cout.flush();
+               //mothurOut("Generating the temporary BLAST database...\t");    cout.flush();
                 
                 path = globaldata->argv;
                 path = path.substr(0, (path.find_last_of('m')));
@@ -111,7 +160,7 @@ void BlastDB::generateDB() {
                 string formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName;        //      format the database, -o option gives us the ability
                 system(formatdbCommand.c_str());                                                                //      to get the right sequence names, i think. -p F
                                                                                                                                         //      option tells formatdb that seqs are DNA, not prot
-               mothurOut("DONE."); mothurOutEndLine(); mothurOutEndLine(); cout.flush();
+               //mothurOut("DONE."); mothurOutEndLine();       mothurOutEndLine(); cout.flush();
         }
         catch(exception& e) {
                 errorOut(e, "BlastDB", "generateDB");
diff --git a/blastdb.hpp b/blastdb.hpp

index c718bb43fb29562b755cdd44e37fa893c7b57f25..fea55c8256e561af5477356b3c5bc545b30d6960 100644 (file)
--- a/blastdb.hpp
+++ b/blastdb.hpp
@@ -23,6 +23,7 @@ public:
         void generateDB();
         void addSequence(Sequence);
         vector<int> findClosestSequences(Sequence*, int);
+       map<int, float> findClosest(Sequence*, int); //template index -> searchscore
  
  private:
         string dbFileName;
diff --git a/ccode.cpp b/ccode.cpp

index c474d7a0ad7cb54729f809cf9921ed234f48d5db..bfcd44d8239a30f8ad18db73f0c06268dc0e7474 100644 (file)
--- a/ccode.cpp
+++ b/ccode.cpp
@@ -13,292 +13,216 @@
  
  
  //***************************************************************************************************************
-Ccode::Ccode(string filename, string temp, string o) {  fastafile = filename;  templateFile = temp;  outputDir = o; }
+Ccode::Ccode(string filename, string o) {  
+       fastafile = filename;  outputDir = o; 
+       distCalc = new eachGapDist();
+       decalc = new DeCalculator();
+       
+       mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo";
+       ofstream out2;
+       openOutputFile(mapInfo, out2);
+               
+       out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
+       out2.close();
+}
  //***************************************************************************************************************
-
  Ccode::~Ccode() {
-       try {
-               for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
-               for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "~Ccode");
-               exit(1);
-       }
+       delete distCalc;
+       delete decalc;
  }      
  //***************************************************************************************************************
+void Ccode::printHeader(ostream& out) {
+       out << "For full window mapping info refer to " << mapInfo << endl << endl;
+}
+//***************************************************************************************************************
  void Ccode::print(ostream& out) {
         try {
                 
                 mothurOutEndLine();
                 
-               string mapInfo = outputDir + getRootName(getSimpleName(fastafile)) + "mapinfo";
                 ofstream out2;
-               openOutputFile(mapInfo, out2);
-               
-               out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
+               openOutputFileAppend(mapInfo, out2);
                 
-               for (int j = 0; j < querySeqs.size(); j++) {
-                       out2 << querySeqs[j]->getName() << endl;
-                       for (it = spotMap[j].begin(); it!= spotMap[j].end(); it++) {
-                               out2 << it->first << '\t' << it->second << endl;
-                       }
+               out2 << querySeq->getName() << endl;
+               for (it = spotMap.begin(); it!= spotMap.end(); it++) {
+                       out2 << it->first << '\t' << it->second << endl;
                 }
                 out2.close();
                 
                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
                 
-               out << "For full window mapping info refer to " << mapInfo << endl << endl;
-               
-               for (int i = 0; i < querySeqs.size(); i++) {
-               
-                       out << querySeqs[i]->getName() << endl << endl << "Reference sequences used and distance to query:" << endl;
+               out << querySeq->getName() << endl << endl << "Reference sequences used and distance to query:" << endl;
                         
-                       for (int j = 0; j < closest[i].size(); j++) {
-                               out << setprecision(3) << closest[i][j].seq->getName() << '\t' << closest[i][j].dist << endl;
-                       }
-                       out << endl << endl;
+               for (int j = 0; j < closest.size(); j++) {
+                       out << setprecision(3) << closest[j].seq->getName() << '\t' << closest[j].dist << endl;
+               }
+               out << endl << endl;
                 
-                       //for each window
-                       //window mapping info.
-                       out << "Mapping information: ";
-                       //you mask and did not filter
-                       if ((seqMask != "") && (!filter)) { out << "mask and trim."; }
+               //for each window
+               //window mapping info.
+               out << "Mapping information: ";
+               //you mask and did not filter
+               if ((seqMask != "") && (!filter)) { out << "mask and trim."; }
                                 
-                       //you filtered and did not mask
-                       if ((seqMask == "") && (filter)) { out << "filter and trim."; }
+               //you filtered and did not mask
+               if ((seqMask == "") && (filter)) { out << "filter and trim."; }
                                 
-                       //you masked and filtered
-                       if ((seqMask != "") && (filter)) { out << "mask, filter and trim."; }
+               //you masked and filtered
+               if ((seqMask != "") && (filter)) { out << "mask, filter and trim."; }
                         
-                       out << endl << "Window\tStartPos\tEndPos" << endl;
-                       it = trim[i].begin();
+               out << endl << "Window\tStartPos\tEndPos" << endl;
+               it = trim.begin();
                         
-                       for (int k = 0; k < windows[i].size()-1; k++) {
-                               out << k+1 << '\t' << spotMap[i][windows[i][k]-it->first] << '\t' << spotMap[i][windows[i][k]-it->first+windowSizes[i]] << endl;
-                       }
+               for (int k = 0; k < windows.size()-1; k++) {
+                       out << k+1 << '\t' << spotMap[windows[k]-it->first] << '\t' << spotMap[windows[k]-it->first+windowSizes] << endl;
+               }
                         
-                       out << windows[i].size() << '\t' << spotMap[i][windows[i][windows[i].size()-1]-it->first] << '\t' << spotMap[i][it->second-it->first-1] << endl;
-                       out << endl;
+               out << windows.size() << '\t' << spotMap[windows[windows.size()-1]-it->first] << '\t' << spotMap[it->second-it->first-1] << endl;
+               out << endl;
                         
-                       out << "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova" << endl;
-                       for (int k = 0; k < windows[i].size(); k++) {
-                               float ds = averageQuery[i][k] / averageRef[i][k]; 
-                               out << k+1 << '\t' << averageQuery[i][k] << '\t' << sdQuery[i][k] << '\t' << averageRef[i][k] << '\t'<< sdRef[i][k] << '\t' << ds << '\t' << anova[i][k] << endl;
-                       }
-                       out << endl;
+               out << "Window\tAvgQ\t(sdQ)\tAvgR\t(sdR)\tRatio\tAnova" << endl;
+               for (int k = 0; k < windows.size(); k++) {
+                       float ds = averageQuery[k] / averageRef[k]; 
+                       out << k+1 << '\t' << averageQuery[k] << '\t' << sdQuery[k] << '\t' << averageRef[k] << '\t'<< sdRef[k] << '\t' << ds << '\t' << anova[k] << endl;
+               }
+               out << endl;
                         
-                       //varRef
-                       //varQuery
-                       /* F test for differences among variances.
-                       * varQuery is expected to be higher or similar than varRef */
-                       //float fs = varQuery[query][i] / varRef[query][i];     /* F-Snedecor, test for differences of variances */
+               //varRef
+               //varQuery
+               /* F test for differences among variances.
+               * varQuery is expected to be higher or similar than varRef */
+               //float fs = varQuery[query] / varRef[query];   /* F-Snedecor, test for differences of variances */
                         
-                       bool results = false;   
+               bool results = false;   
                                                 
-                       //confidence limit, t - Student, anova
-                       out << "Window\tConfidenceLimit\tt-Student\tAnova" << endl;
+               //confidence limit, t - Student, anova
+               out << "Window\tConfidenceLimit\tt-Student\tAnova" << endl;
                         
-                       for (int k = 0; k < windows[i].size(); k++) {
-                               string temp = "";
-                               if (isChimericConfidence[i][k]) {  temp += "*\t"; }
-                               else { temp += "\t"; }
+               for (int k = 0; k < windows.size(); k++) {
+                       string temp = "";
+                       if (isChimericConfidence[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
                                 
-                               if (isChimericTStudent[i][k]) {  temp += "*\t"; }
-                               else { temp += "\t"; }
+                       if (isChimericTStudent[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
                                 
-                               if (isChimericANOVA[i][k]) {  temp += "*\t"; }
-                               else { temp += "\t"; }
+                       if (isChimericANOVA[k]) {  temp += "*\t"; }
+                       else { temp += "\t"; }
                         
-                               out << k+1 << '\t' << temp << endl;
+                       out << k+1 << '\t' << temp << endl;
                                 
-                               if (temp == "*\t*\t*\t") {  results = true;  }
-                       }
-                       out << endl;    
+                       if (temp == "*\t*\t*\t") {  results = true;  }
+               }
+               out << endl;    
                         
-                       if (results) {
-                               mothurOut(querySeqs[i]->getName() + " was found have at least one chimeric window."); mothurOutEndLine();
-                       }
+               if (results) {
+                       mothurOut(querySeq->getName() + " was found have at least one chimeric window."); mothurOutEndLine();
                 }
+               
         }
         catch(exception& e) {
                 errorOut(e, "Ccode", "print");
                 exit(1);
         }
  }
-
  //***************************************************************************************************************
-int Ccode::getChimeras() {
+int Ccode::getChimeras(Sequence* query) {
         try {
-               
-               //read in query sequences and subject sequences
-               mothurOut("Reading sequences and template file... "); cout.flush();
-               querySeqs = readSeqs(fastafile);
-               templateSeqs = readSeqs(templateFile);
-               mothurOut("Done."); mothurOutEndLine();
-               
-               int numSeqs = querySeqs.size();
-               
-               if (unaligned) { mothurOut("Your sequences need to be aligned when you use the bellerophon ccode."); mothurOutEndLine(); return 1;  }
-               
-               closest.resize(numSeqs);
-               
-               refCombo.resize(numSeqs, 0);
-               sumRef.resize(numSeqs); 
-               varRef.resize(numSeqs); 
-               varQuery.resize(numSeqs); 
-               sdRef.resize(numSeqs); 
-               sdQuery.resize(numSeqs);     
-               sumQuery.resize(numSeqs);
-               sumSquaredRef.resize(numSeqs); 
-               sumSquaredQuery.resize(numSeqs); 
-               averageRef.resize(numSeqs);
-               averageQuery.resize(numSeqs);
-               anova.resize(numSeqs);
-               isChimericConfidence.resize(numSeqs);
-               isChimericTStudent.resize(numSeqs);
-               isChimericANOVA.resize(numSeqs);
-               trim.resize(numSeqs);
-               spotMap.resize(numSeqs);
-               windowSizes.resize(numSeqs, window);
-               windows.resize(numSeqs);
-               
-               //break up file if needed
-               int linesPerProcess = numSeqs / processors ;
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       //find breakup of sequences for all times we will Parallelize
-                       if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
-                       else {
-                               //fill line pairs
-                               for (int i = 0; i < (processors-1); i++) {                      
-                                       lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
-                               }
-                               //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
-                               int i = processors - 1;
-                               lines.push_back(new linePair((i*linesPerProcess), numSeqs));
-                       }
-                       
-               #else
-                       lines.push_back(new linePair(0, numSeqs));
-               #endif
         
-               distCalc = new eachGapDist();
-               decalc = new DeCalculator();
-               
-               //find closest
-               if (processors == 1) { 
-                       mothurOut("Finding top matches for sequences... "); cout.flush();
-                       closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
-                       mothurOut("Done."); mothurOutEndLine();
-               }else {         createProcessesClosest();               }
+               closest.clear();
+               refCombo = 0;
+               sumRef.clear(); 
+               varRef.clear(); 
+               varQuery.clear(); 
+               sdRef.clear(); 
+               sdQuery.clear();     
+               sumQuery.clear();
+               sumSquaredRef.clear(); 
+               sumSquaredQuery.clear(); 
+               averageRef.clear();
+               averageQuery.clear();
+               anova.clear();
+               isChimericConfidence.clear();
+               isChimericTStudent.clear();
+               isChimericANOVA.clear();
+               trim.clear();
+               spotMap.clear();
+               windowSizes = window;
+               windows.clear();
  
-               //initialize spotMap
-               for (int j = 0; j < numSeqs; j++) {
-                       for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
-                               spotMap[j][i] = i;
-                       }
-               }
+       
+               querySeq = query;
+               
+               //find closest matches to query
+               closest = findClosest(query, numWanted);
                 
+               //initialize spotMap
+               for (int i = 0; i < query->getAligned().length(); i++) {        spotMap[i] = i;         }
+       
                 //mask sequences if the user wants to 
                 if (seqMask != "") {
                         decalc->setMask(seqMask);
                         
-                       //mask querys
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               decalc->runMask(querySeqs[i]);
-                       }
-               
-                       //mask templates
-                       for (int i = 0; i < templateSeqs.size(); i++) {
-                               decalc->runMask(templateSeqs[i]);
-                       }
+                       decalc->runMask(query);
                         
-                       for (int i = 0; i < numSeqs; i++) {
-                               spotMap[i] = decalc->getMaskMap();
-                       }
+                       //mask closest
+                       for (int i = 0; i < closest.size(); i++) {      decalc->runMask(closest[i].seq);        }
+                       
+                       spotMap = decalc->getMaskMap();
                 }
                 
                 if (filter) {
-                       vector<Sequence*> temp = templateSeqs;
-                       for (int i = 0; i < querySeqs.size(); i++) { temp.push_back(querySeqs[i]);  }
+                       vector<Sequence*> temp;
+                       for (int i = 0; i < closest.size(); i++) { temp.push_back(closest[i].seq);  }
+                       temp.push_back(query);  
                         
                         createFilter(temp);
                 
-                       runFilter(querySeqs);
-                       runFilter(templateSeqs);
+                       for (int i = 0; i < temp.size(); i++) { runFilter(temp[i]);  }
                         
                         //update spotMap
                         map<int, int> newMap;
                         int spot = 0;
-                       int j = 0;
                         
                         for (int i = 0; i < filterString.length(); i++) {
                                 if (filterString[i] == '1') {
                                         //add to newMap
-                                       newMap[spot] = spotMap[j][i];
+                                       newMap[spot] = spotMap[i];
                                         spot++;  
                                 }
                         }
-                       
-                       for (int i = 0; i < numSeqs; i++) {
-                               spotMap[i] = newMap;
-                       }
+                       spotMap = newMap;
                 }
  
                 //trim sequences - this follows ccodes remove_extra_gaps 
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       trimSequences(i);
-               }
+               trimSequences(query);
+               
                 
                 //windows are equivalent to words - ccode paper recommends windows are between 5% and 20% on alignment length().  
                 //Our default will be 10% and we will warn if user tries to use a window above or below these recommendations
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       windows[i] = findWindows(i);  
-               }
+               windows = findWindows();  
+               
  
                 //remove sequences that are more than 20% different and less than 0.5% different - may want to allow user to specify this later 
-               if (processors == 1) { 
-                       for (int i = 0; i < closest.size(); i++) {
-                               removeBadReferenceSeqs(closest[i], i);
-                       }
-               }else {         createProcessesRemoveBad();             }
-
+               removeBadReferenceSeqs(closest);
                 
-               if (processors == 1) { 
-                       //find the averages for each querys references
-                       for (int i = 0; i < numSeqs; i++) {
-                               getAverageRef(closest[i], i);  //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
-                       }
-                       
-                       //find the averages for the query 
-                       for (int i = 0; i < numSeqs; i++) {
-                               getAverageQuery(closest[i], i);  //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
-                       }
-               }else {         createProcessesAverages();              }
                 
-               if (processors == 1) { 
-                       //find the averages for each querys references 
-                       for (int i = 0; i < numSeqs; i++) {
-                               findVarianceRef(i);  //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                       }
-                       
-                       //find the averages for the query 
-                       for (int i = 0; i < numSeqs; i++) {
-                               findVarianceQuery(i);  //fills varQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                       }
-               }else {         createProcessesVariances();             }
+               //find the averages for each querys references
+               getAverageRef(closest);  //fills sumRef, averageRef, sumSquaredRef and refCombo.
+               getAverageQuery(closest, query);  //fills sumQuery, averageQuery, sumSquaredQuery.
+                                       
                 
-               if (processors == 1) { 
-                       for (int i = 0; i < numSeqs; i++) {
-                               determineChimeras(i);  //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i]. 
-                       }
-               }else {         createProcessesDetermine();             }
+               //find the averages for each querys references 
+               findVarianceRef();  //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0.
+                       
+                       
+               //find the averages for the query 
+               findVarianceQuery();  //fills varQuery and sdQuery also sets minimum error rate to 0.001 to avoid divide by 0.
+                                       
+               determineChimeras();  //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA. 
                 
                 //free memory
-               for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
-               delete distCalc;
-               delete decalc;
+               for (int i = 0; i < closest.size(); i++) {  delete closest[i].seq;  }
                 
                 return 0;
         }
@@ -309,17 +233,17 @@ int Ccode::getChimeras() {
  }
  /***************************************************************************************************************/
  //ccode algo says it does this to "Removes the initial and final gaps to avoid biases due to incomplete sequences."
-void Ccode::trimSequences(int query) {
+void Ccode::trimSequences(Sequence* query) {
         try {
                 
                 int frontPos = 0;  //should contain first position in all seqs that is not a gap character
-               int rearPos = querySeqs[query]->getAligned().length();
+               int rearPos = query->getAligned().length();
                 
                 //********find first position in closest seqs that is a non gap character***********//
                 //find first position all query seqs that is a non gap character
-               for (int i = 0; i < closest[query].size(); i++) {
+               for (int i = 0; i < closest.size(); i++) {
                         
-                       string aligned = closest[query][i].seq->getAligned();
+                       string aligned = closest[i].seq->getAligned();
                         int pos = 0;
                         
                         //find first spot in this seq
@@ -335,7 +259,7 @@ void Ccode::trimSequences(int query) {
                 }
                 
                 //find first position all querySeq[query] that is a non gap character
-               string aligned = querySeqs[query]->getAligned();
+               string aligned = query->getAligned();
                 int pos = 0;
                         
                 //find first spot in this seq
@@ -351,9 +275,9 @@ void Ccode::trimSequences(int query) {
                 
                 
                 //********find last position in closest seqs that is a non gap character***********//
-               for (int i = 0; i < closest[query].size(); i++) {
+               for (int i = 0; i < closest.size(); i++) {
                         
-                       string aligned = closest[query][i].seq->getAligned();
+                       string aligned = closest[i].seq->getAligned();
                         int pos = aligned.length();
                         
                         //find first spot in this seq
@@ -369,7 +293,7 @@ void Ccode::trimSequences(int query) {
                 }
                 
                 //find last position all querySeqs[query] that is a non gap character
-               aligned = querySeqs[query]->getAligned();
+               aligned = query->getAligned();
                 pos = aligned.length();
                 
                 //find first spot in this seq
@@ -391,7 +315,7 @@ void Ccode::trimSequences(int query) {
                 tempTrim[frontPos] = rearPos;
                 
                 //save trimmed locations
-               trim[query] = tempTrim;
+               trim = tempTrim;
                                                 
                 //update spotMask
                 map<int, int> newMap;
@@ -399,56 +323,45 @@ void Ccode::trimSequences(int query) {
                 
                 for (int i = frontPos; i < rearPos; i++) {
                         //add to newMap
-//cout << query << '\t' << i << '\t' << spotMap[query][i] << endl;
-                       newMap[spot] = spotMap[query][i];
+                       newMap[spot] = spotMap[i];
                         spot++;  
                 }
-               
-               //for (it = newMap.begin(); it!=newMap.end(); it++) {
-                       //cout << query << '\t' << it->first << '\t' << it->second << endl;
-               //}
-               
-               spotMap[query] = newMap;
-
-               
+               spotMap = newMap;
         }
         catch(exception& e) {
                 errorOut(e, "Ccode", "trimSequences");
                 exit(1);
         }
-
  }
  /***************************************************************************************************************/
-vector<int> Ccode::findWindows(int query) {
+vector<int> Ccode::findWindows() {
         try {
                 
                 vector<int> win; 
-               it = trim[query].begin();
+               it = trim.begin();
                 
                 int length = it->second - it->first;
         
                 //default is wanted = 10% of total length
-               if (windowSizes[query] > length) { 
+               if (windowSizes > length) { 
                         mothurOut("You have slected a window larger than your sequence length after all filters, masks and trims have been done. I will use the default 10% of sequence length.");
-                       windowSizes[query] = length / 10;
-               }else if (windowSizes[query] == 0) { windowSizes[query] = length / 10;  }
-               else if (windowSizes[query] > (length * 0.20)) {
+                       windowSizes = length / 10;
+               }else if (windowSizes == 0) { windowSizes = length / 10;  }
+               else if (windowSizes > (length * 0.20)) {
                         mothurOut("You have selected a window that is larger than 20% of your sequence length.  This is not recommended, but I will continue anyway."); mothurOutEndLine();
-               }else if (windowSizes[query] < (length * 0.05)) {
+               }else if (windowSizes < (length * 0.05)) {
                         mothurOut("You have selected a window that is smaller than 5% of your sequence length.  This is not recommended, but I will continue anyway."); mothurOutEndLine();
                 }
                 
                 //save starting points of each window
-               for (int m = it->first;  m < (it->second-windowSizes[query]); m+=windowSizes[query]) {  win.push_back(m);  }
+               for (int m = it->first;  m < (it->second-windowSizes); m+=windowSizes) {  win.push_back(m);  }
                 
                 //save last window
                 if (win[win.size()-1] < (it->first+length)) {
-                       win.push_back(win[win.size()-1]+windowSizes[query]); // ex. string length is 115, window is 25, without this you would get 0, 25, 50, 75
+                       win.push_back(win[win.size()-1]+windowSizes); // ex. string length is 115, window is 25, without this you would get 0, 25, 50, 75
                 }                                                                                                                                                                                                       //with this you would get 1,25,50,75,100
                 
-
                 return win;
-       
         }
         catch(exception& e) {
                 errorOut(e, "Ccode", "findWindows");
@@ -512,7 +425,7 @@ int Ccode::getDiff(string seqA, string seqB) {
  }
  //***************************************************************************************************************
  //tried to make this look most like ccode original implementation
-void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs, int query) {
+void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs) {
         try {
                 
                 vector< vector<int> > numDiffBases;
@@ -520,7 +433,7 @@ void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs, int query) {
                 //initialize to 0
                 for (int i = 0; i < numDiffBases.size(); i++) { numDiffBases[i].resize(seqs.size(),0); }
                 
-               it = trim[query].begin();
+               it = trim.begin();
                 int length = it->second - it->first;
                 
                 //calc differences from each sequence to everyother seq in the set
@@ -576,9 +489,8 @@ void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs, int query) {
                         seqs = goodSeqs;
                         
                 }else { //warn, but dont remove any
-                       mothurOut(querySeqs[query]->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity.  I will continue, but please check."); mothurOutEndLine();  
+                       mothurOut(querySeq->getName() + " does not have an adaquate number of reference sequences that are within 20% and 0.5% similarity.  I will continue, but please check."); mothurOutEndLine();  
                 }
-                       
  
         }
         catch(exception& e) {
@@ -587,46 +499,33 @@ void Ccode::removeBadReferenceSeqs(vector<SeqDist>& seqs, int query) {
         }
  }
  //***************************************************************************************************************
-vector< vector<SeqDist> > Ccode::findClosest(int start, int end, int numWanted) {
+//makes copy of templateseq for filter
+vector<SeqDist>  Ccode::findClosest(Sequence* q, int numWanted) {
         try{
         
-               vector< vector<SeqDist> > topMatches;  topMatches.resize(querySeqs.size());
-       
-               float smallestOverall, smallestLeft, smallestRight;
-               smallestOverall = 1000;  smallestLeft = 1000;  smallestRight = 1000;
+               vector<SeqDist>  topMatches;  
                 
-               //for each sequence in querySeqs - find top matches to use as reference
-               for(int j = start; j < end; j++){
-                       
-                       Sequence query = *(querySeqs[j]);
+               Sequence query = *(q);
                         
-                       vector<SeqDist> distances;
+               //calc distance to each sequence in template seqs
+               for (int i = 0; i < templateSeqs.size(); i++) {
                         
-                       //calc distance to each sequence in template seqs
-                       for (int i = 0; i < templateSeqs.size(); i++) {
-                       
-                               Sequence ref = *(templateSeqs[i]); 
+                       Sequence ref = *(templateSeqs[i]); 
                                         
-                               //find overall dist
-                               distCalc->calcDist(query, ref);
-                               float dist = distCalc->getDist();       
+                       //find overall dist
+                       distCalc->calcDist(query, ref);
+                       float dist = distCalc->getDist();       
                                 
-                               //save distance
-                               SeqDist temp;
-                               temp.seq = templateSeqs[i];
-                               temp.dist = dist;
+                       //save distance
+                       SeqDist temp;
+                       temp.seq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+                       temp.dist = dist;
  
-                               distances.push_back(temp);
-                       }
-                       
-                       sort(distances.begin(), distances.end(), compareSeqDist);
-                       
-                       //save the number of top matches wanted
-                       for (int h = 0; h < numWanted; h++) {
-                               topMatches[j].push_back(distances[h]);
-                       }
+                       topMatches.push_back(temp);
                 }
                         
+               sort(topMatches.begin(), topMatches.end(), compareSeqDist);
+                       
                 return topMatches;
  
         }
@@ -637,21 +536,21 @@ vector< vector<SeqDist> > Ccode::findClosest(int start, int end, int numWanted)
  }
  /**************************************************************************************************/
  //find the distances from each reference sequence to every other reference sequence for each window for this query
-void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
+void Ccode::getAverageRef(vector<SeqDist> ref) {
         try {
                 
-               vector< vector< vector<int> > > diffs;  //diffs[0][1][2] is the number of differences between ref seq 0 and ref seq 1 at window 2.
+               vector< vector< vector<int> > >  diffs;  //diffs[0][1][2] is the number of differences between ref seq 0 and ref seq 1 at window 2.
                 
                 //initialize diffs vector
                 diffs.resize(ref.size());
                 for (int i = 0; i < diffs.size(); i++) {  
                         diffs[i].resize(ref.size());
                         for (int j = 0; j < diffs[i].size(); j++) {
-                               diffs[i][j].resize(windows[query].size(), 0);
+                               diffs[i][j].resize(windows.size(), 0);
                         }
                 }
                 
-               it = trim[query].begin();
+               it = trim.begin();
                                 
                 //find the distances from each reference sequence to every other reference sequence for each window for this query              
                 for (int i = 0; i < ref.size(); i++) {
@@ -663,23 +562,23 @@ void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
                         
                                 string refJ = ref[j].seq->getAligned();
                         
-                               for (int k = 0; k < windows[query].size(); k++) {
+                               for (int k = 0; k < windows.size(); k++) {
                                         
                                         string refIWindowk, refJWindowk;
                                         
-                                       if (k < windows[query].size()-1) {
+                                       if (k < windows.size()-1) {
                                                 //get window strings
-                                               refIWindowk = refI.substr(windows[query][k], windowSizes[query]);
-                                               refJWindowk = refJ.substr(windows[query][k], windowSizes[query]);
+                                               refIWindowk = refI.substr(windows[k], windowSizes);
+                                               refJWindowk = refJ.substr(windows[k], windowSizes);
                                         }else { //last window may be smaller than rest - see findwindows
                                                 //get window strings
-                                               refIWindowk = refI.substr(windows[query][k], (it->second-windows[query][k]));
-                                               refJWindowk = refJ.substr(windows[query][k], (it->second-windows[query][k]));
+                                               refIWindowk = refI.substr(windows[k], (it->second-windows[k]));
+                                               refJWindowk = refJ.substr(windows[k], (it->second-windows[k]));
                                         }
                                         
                                         //find differences
                                         int diff = getDiff(refIWindowk, refJWindowk);
-//cout <<  i << '\t' << j << '\t' << k << '\t' << diff << endl;
+
                                         //save differences in [i][j][k] and [j][i][k] since they are the same
                                         diffs[i][j][k] = diff;
                                         diffs[j][i][k] = diff;
@@ -691,20 +590,20 @@ void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
                 }//i
                 
                 //initialize sumRef for this query 
-               sumRef[query].resize(windows[query].size(), 0);
-               sumSquaredRef[query].resize(windows[query].size(), 0);
-               averageRef[query].resize(windows[query].size(), 0);
+               sumRef.resize(windows.size(), 0);
+               sumSquaredRef.resize(windows.size(), 0);
+               averageRef.resize(windows.size(), 0);
                 
                 //find the sum of the differences for hte reference sequences
                 for (int i = 0; i < diffs.size(); i++) {
                         for (int j = 0; j < i; j++) {
                         
                                 //increment this querys reference sequences combos
-                               refCombo[query]++;
+                               refCombo++;
                                 
                                 for (int k = 0; k < diffs[i][j].size(); k++) {
-                                       sumRef[query][k] += diffs[i][j][k];
-                                       sumSquaredRef[query][k] += (diffs[i][j][k]*diffs[i][j][k]);
+                                       sumRef[k] += diffs[i][j][k];
+                                       sumSquaredRef[k] += (diffs[i][j][k]*diffs[i][j][k]);
                                 }//k
                                 
                         }//j
@@ -712,8 +611,8 @@ void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
  
                 
                 //find the average of the differences for the references for each window
-               for (int i = 0; i < windows[query].size(); i++) {
-                       averageRef[query][i] = sumRef[query][i] / (float) refCombo[query];
+               for (int i = 0; i < windows.size(); i++) {
+                       averageRef[i] = sumRef[i] / (float) refCombo;
                 }
                 
         }
@@ -723,7 +622,7 @@ void Ccode::getAverageRef(vector<SeqDist> ref, int query) {
         }
  }
  /**************************************************************************************************/
-void Ccode::getAverageQuery (vector<SeqDist> ref, int query) {
+void Ccode::getAverageQuery (vector<SeqDist> ref, Sequence* query) {
         try {
         
                 vector< vector<int> >  diffs;  //diffs[1][2] is the number of differences between querySeqs[query] and ref seq 1 at window 2.
@@ -731,35 +630,35 @@ void Ccode::getAverageQuery (vector<SeqDist> ref, int query) {
                 //initialize diffs vector
                 diffs.resize(ref.size());
                 for (int j = 0; j < diffs.size(); j++) {
-                       diffs[j].resize(windows[query].size(), 0);
+                       diffs[j].resize(windows.size(), 0);
                 }
                 
-               it = trim[query].begin();
+               it = trim.begin();
                                                         
-               string refQuery = querySeqs[query]->getAligned();
+               string refQuery = query->getAligned();
                          
                 //j<i, so you don't find distances from i to j and then j to i.
                 for (int j = 0; j < ref.size(); j++) {
                          
                          string refJ = ref[j].seq->getAligned();
                          
-                        for (int k = 0; k < windows[query].size(); k++) {
+                        for (int k = 0; k < windows.size(); k++) {
                                         
                                         string QueryWindowk, refJWindowk;
                                         
-                                       if (k < windows[query].size()-1) {
+                                       if (k < windows.size()-1) {
                                                 //get window strings
-                                               QueryWindowk = refQuery.substr(windows[query][k], windowSizes[query]);
-                                               refJWindowk = refJ.substr(windows[query][k], windowSizes[query]);                                       
+                                               QueryWindowk = refQuery.substr(windows[k], windowSizes);
+                                               refJWindowk = refJ.substr(windows[k], windowSizes);                                     
                                         }else { //last window may be smaller than rest - see findwindows
                                                 //get window strings
-                                               QueryWindowk = refQuery.substr(windows[query][k], (it->second-windows[query][k]));
-                                               refJWindowk = refJ.substr(windows[query][k], (it->second-windows[query][k]));
+                                               QueryWindowk = refQuery.substr(windows[k], (it->second-windows[k]));
+                                               refJWindowk = refJ.substr(windows[k], (it->second-windows[k]));
                                         }
                                         
                                         //find differences
                                         int diff = getDiff(QueryWindowk, refJWindowk);
-//cout  << j << '\t' << k << '\t' << diff << endl;                                             
+                                       
                                         //save differences 
                                         diffs[j][k] = diff;
                          
@@ -768,25 +667,23 @@ void Ccode::getAverageQuery (vector<SeqDist> ref, int query) {
                          
                 
                 //initialize sumRef for this query 
-               sumQuery[query].resize(windows[query].size(), 0);
-               sumSquaredQuery[query].resize(windows[query].size(), 0);
-               averageQuery[query].resize(windows[query].size(), 0);
+               sumQuery.resize(windows.size(), 0);
+               sumSquaredQuery.resize(windows.size(), 0);
+               averageQuery.resize(windows.size(), 0);
                 
                 //find the sum of the differences 
                 for (int j = 0; j < diffs.size(); j++) {
                         for (int k = 0; k < diffs[j].size(); k++) {
-                               sumQuery[query][k] += diffs[j][k];
-                               sumSquaredQuery[query][k] += (diffs[j][k]*diffs[j][k]);
+                               sumQuery[k] += diffs[j][k];
+                               sumSquaredQuery[k] += (diffs[j][k]*diffs[j][k]);
                         }//k
                 }//j
                 
                 
                 //find the average of the differences for the references for each window
-               for (int i = 0; i < windows[query].size(); i++) {
-                       averageQuery[query][i] = sumQuery[query][i] / (float) ref.size();
+               for (int i = 0; i < windows.size(); i++) {
+                       averageQuery[i] = sumQuery[i] / (float) ref.size();
                 }
-
-       
         }
         catch(exception& e) {
                 errorOut(e, "Ccode", "getAverageQuery");
@@ -794,23 +691,23 @@ void Ccode::getAverageQuery (vector<SeqDist> ref, int query) {
         }
  }
  /**************************************************************************************************/
-void Ccode::findVarianceRef (int query) {
+void Ccode::findVarianceRef() {
         try {
                 
-               varRef[query].resize(windows[query].size(), 0);
-               sdRef[query].resize(windows[query].size(), 0);
+               varRef.resize(windows.size(), 0);
+               sdRef.resize(windows.size(), 0);
                 
                 //for each window
-               for (int i = 0; i < windows[query].size(); i++) {
-                       varRef[query][i] = (sumSquaredRef[query][i] - ((sumRef[query][i]*sumRef[query][i])/(float)refCombo[query])) / (float)(refCombo[query]-1);
-                       sdRef[query][i] = sqrt(varRef[query][i]);
+               for (int i = 0; i < windows.size(); i++) {
+                       varRef[i] = (sumSquaredRef[i] - ((sumRef[i]*sumRef[i])/(float)refCombo)) / (float)(refCombo-1);
+                       sdRef[i] = sqrt(varRef[i]);
                         
                         //set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation
-                       if (averageRef[query][i] < 0.001)                       {       averageRef[query][i] = 0.001;           }
-                       if (sumRef[query][i] < 0.001)                           {       sumRef[query][i] = 0.001;                       }
-                       if (varRef[query][i] < 0.001)                           {       varRef[query][i] = 0.001;                       }
-                       if (sumSquaredRef[query][i] < 0.001)            {       sumSquaredRef[query][i] = 0.001;        }
-                       if (sdRef[query][i] < 0.001)                            {       sdRef[query][i] = 0.001;                        }
+                       if (averageRef[i] < 0.001)                      {       averageRef[i] = 0.001;          }
+                       if (sumRef[i] < 0.001)                          {       sumRef[i] = 0.001;                      }
+                       if (varRef[i] < 0.001)                          {       varRef[i] = 0.001;                      }
+                       if (sumSquaredRef[i] < 0.001)           {       sumSquaredRef[i] = 0.001;       }
+                       if (sdRef[i] < 0.001)                           {       sdRef[i] = 0.001;                       }
                                 
                 }
         }
@@ -820,22 +717,22 @@ void Ccode::findVarianceRef (int query) {
         }
  }
  /**************************************************************************************************/
-void Ccode::findVarianceQuery (int query) {
+void Ccode::findVarianceQuery() {
         try {
-               varQuery[query].resize(windows[query].size(), 0);
-               sdQuery[query].resize(windows[query].size(), 0);
+               varQuery.resize(windows.size(), 0);
+               sdQuery.resize(windows.size(), 0);
                 
                 //for each window
-               for (int i = 0; i < windows[query].size(); i++) {
-                       varQuery[query][i] = (sumSquaredQuery[query][i] - ((sumQuery[query][i]*sumQuery[query][i])/(float) closest[query].size())) / (float) (closest[query].size()-1);
-                       sdQuery[query][i] = sqrt(varQuery[query][i]);
+               for (int i = 0; i < windows.size(); i++) {
+                       varQuery[i] = (sumSquaredQuery[i] - ((sumQuery[i]*sumQuery[i])/(float) closest.size())) / (float) (closest.size()-1);
+                       sdQuery[i] = sqrt(varQuery[i]);
                         
                         //set minimum error rate to 0.001 - to avoid potential divide by zero - not sure if this is necessary but it follows ccode implementation
-                       if (averageQuery[query][i] < 0.001)                     {       averageQuery[query][i] = 0.001;         }
-                       if (sumQuery[query][i] < 0.001)                         {       sumQuery[query][i] = 0.001;                     }
-                       if (varQuery[query][i] < 0.001)                         {       varQuery[query][i] = 0.001;                     }
-                       if (sumSquaredQuery[query][i] < 0.001)          {       sumSquaredQuery[query][i] = 0.001;      }
-                       if (sdQuery[query][i] < 0.001)                          {       sdQuery[query][i] = 0.001;                      }
+                       if (averageQuery[i] < 0.001)                    {       averageQuery[i] = 0.001;                }
+                       if (sumQuery[i] < 0.001)                                {       sumQuery[i] = 0.001;                    }
+                       if (varQuery[i] < 0.001)                                {       varQuery[i] = 0.001;                    }
+                       if (sumSquaredQuery[i] < 0.001)         {       sumSquaredQuery[i] = 0.001;     }
+                       if (sdQuery[i] < 0.001)                         {       sdQuery[i] = 0.001;                     }
                 }
  
         }
@@ -845,44 +742,44 @@ void Ccode::findVarianceQuery (int query) {
         }
  }
  /**************************************************************************************************/
-void Ccode::determineChimeras (int query) {
+void Ccode::determineChimeras() {
         try {
                 
-               isChimericConfidence[query].resize(windows[query].size(), false);
-               isChimericTStudent[query].resize(windows[query].size(), false);
-               isChimericANOVA[query].resize(windows[query].size(), false);
-               anova[query].resize(windows[query].size());
+               isChimericConfidence.resize(windows.size(), false);
+               isChimericTStudent.resize(windows.size(), false);
+               isChimericANOVA.resize(windows.size(), false);
+               anova.resize(windows.size());
  
                 
                 //for each window
-               for (int i = 0; i < windows[query].size(); i++) {
+               for (int i = 0; i < windows.size(); i++) {
                 
                         //get confidence limits
-                       float t = getT(closest[query].size()-1);  //how many seqs you are comparing to this querySeq
-                       float dsUpper = (averageQuery[query][i] + (t * sdQuery[query][i])) / averageRef[query][i]; 
-                       float dsLower = (averageQuery[query][i] - (t * sdQuery[query][i])) / averageRef[query][i]; 
-//cout << t << '\t' << "ds upper = " << dsUpper << " dsLower = " << dsLower << endl;                   
-                       if ((dsUpper > 1.0) && (dsLower > 1.0) && (averageQuery[query][i] > averageRef[query][i])) {  /* range does not include 1 */
-                                       isChimericConfidence[query][i] = true;   /* significantly higher at P<0.05 */ 
-//cout << i << " made it here" << endl;
+                       float t = getT(closest.size()-1);  //how many seqs you are comparing to this querySeq
+                       float dsUpper = (averageQuery[i] + (t * sdQuery[i])) / averageRef[i]; 
+                       float dsLower = (averageQuery[i] - (t * sdQuery[i])) / averageRef[i]; 
+               
+                       if ((dsUpper > 1.0) && (dsLower > 1.0) && (averageQuery[i] > averageRef[i])) {  /* range does not include 1 */
+                                       isChimericConfidence[i] = true;   /* significantly higher at P<0.05 */ 
+
                         }
                         
                         //student t test
-                       int degreeOfFreedom = refCombo[query] + closest[query].size() - 2;
-                       float denomForT = (((refCombo[query]-1) * varQuery[query][i] + (closest[query].size() - 1) * varRef[query][i]) / (float) degreeOfFreedom) * ((refCombo[query] + closest[query].size()) / (float) (refCombo[query] * closest[query].size()));    /* denominator, without sqrt(), for ts calculations */
+                       int degreeOfFreedom = refCombo + closest.size() - 2;
+                       float denomForT = (((refCombo-1) * varQuery[i] + (closest.size() - 1) * varRef[i]) / (float) degreeOfFreedom) * ((refCombo + closest.size()) / (float) (refCombo * closest.size()));    /* denominator, without sqrt(), for ts calculations */
                                 
-                       float ts = fabs((averageQuery[query][i] - averageRef[query][i]) / (sqrt(denomForT)));  /* value of ts for t-student test */     
+                       float ts = fabs((averageQuery[i] - averageRef[i]) / (sqrt(denomForT)));  /* value of ts for t-student test */   
                         t = getT(degreeOfFreedom);
-//cout << i << '\t' << t << '\t' << ts << endl;                        
-                       if ((ts >= t) && (averageQuery[query][i] > averageRef[query][i])) {   
-                               isChimericTStudent[query][i] = true;   /* significantly higher at P<0.05 */ 
+       
+                       if ((ts >= t) && (averageQuery[i] > averageRef[i])) {   
+                               isChimericTStudent[i] = true;   /* significantly higher at P<0.05 */ 
                         }
                 
                         //anova test
-                       float value1 = sumQuery[query][i] + sumRef[query][i];
-                       float value2 = sumSquaredQuery[query][i] + sumSquaredRef[query][i];
-                       float value3 = ((sumQuery[query][i]*sumQuery[query][i]) / (float) (closest[query].size())) + ((sumRef[query][i] * sumRef[query][i]) / (float) refCombo[query]);
-                       float value4 = (value1 * value1) / ( (float) (closest[query].size() + refCombo[query]) );
+                       float value1 = sumQuery[i] + sumRef[i];
+                       float value2 = sumSquaredQuery[i] + sumSquaredRef[i];
+                       float value3 = ((sumQuery[i]*sumQuery[i]) / (float) (closest.size())) + ((sumRef[i] * sumRef[i]) / (float) refCombo);
+                       float value4 = (value1 * value1) / ( (float) (closest.size() + refCombo) );
                         float value5 = value2 - value4;
                         float value6 = value3 - value4;
                         float value7 = value5 - value6;
@@ -891,13 +788,13 @@ void Ccode::determineChimeras (int query) {
                         
                         float f = getF(degreeOfFreedom);
                         
-                        if ((anovaValue >= f) && (averageQuery[query][i] > averageRef[query][i]))  {
-                      isChimericANOVA[query][i] = true;   /* significant P<0.05 */
+                        if ((anovaValue >= f) && (averageQuery[i] > averageRef[i]))  {
+                      isChimericANOVA[i] = true;   /* significant P<0.05 */
                 }
                         
                         if (isnan(anovaValue) || isinf(anovaValue)) { anovaValue = 0.0; }
                         
-                       anova[query][i] = anovaValue;
+                       anova[i] = anovaValue;
                 }
                 
         }
@@ -1009,597 +906,6 @@ float Ccode::getF(int numseq) {
                 exit(1);
         }
  }
-
-/**************************************************************************************************/
-void Ccode::createProcessesClosest() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                               
-                               mothurOut("Finding top matches for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               closest = findClosest(lines[process]->start, lines[process]->end, numWanted);
-                               mothurOut("Done finding top matches for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                                                       
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                        for (int j = 0; j < closest[i].size(); j++) {
-                                               closest[i][j].seq->printSequence(out);
-                                        }
-                               }
-                               out << ">" << endl; //to stop sequence read
-                               
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                        for (int j = 0; j < closest[i].size(); j++) {
-                                               out << closest[i][j].dist << '\t';
-                                        }
-                                        out << endl;
-                               }
-                               out.close();
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-               
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-                       
-                       vector< vector<Sequence*> > tempClosest;  tempClosest.resize(querySeqs.size());
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               vector<Sequence*> tempVector;
-       
-                               for (int j = 0; j < numWanted; j++) {
-                               
-                                       Sequence* temp = new Sequence(in);
-                                       gobble(in);
-                                               
-                                       tempVector.push_back(temp);
-                               }
-                               
-                               tempClosest[k] = tempVector;
-                       }
-                       
-                       string junk;
-                       in >> junk;  gobble(in);  // to get ">"
-               
-                       vector< vector<float> > dists; dists.resize(querySeqs.size());
-                       
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               dists[k].resize(numWanted);
-                               for (int j = 0; j < numWanted; j++) {
-                                       in >> dists[k][j];
-                               }
-                               gobble(in);
-                       
-                       } 
-
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               closest[k].resize(numWanted);
-                               for (int j = 0; j < closest[k].size(); j++) {
-                                       closest[k][j].seq = tempClosest[k][j];
-                                       closest[k][j].dist = dists[k][j]; 
-                               }
-                       } 
-
-                       in.close();
-                       remove(s.c_str());
-               }
-                       
-       
-#else
-               closest = findClosest(lines[0]->start, lines[0]->end, numWanted);
-#endif 
-
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "createProcessesClosest");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-void Ccode::createProcessesRemoveBad() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                                                       
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       removeBadReferenceSeqs(closest[i], i);
-                               }
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << closest[i].size() << endl;
-                                        for (int j = 0; j < closest[i].size(); j++) {
-                                               closest[i][j].seq->printSequence(out);
-                                        }
-                                        out << ">" << endl; //to stop sequence read
-                               }
-                               
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                        for (int j = 0; j < closest[i].size(); j++) {
-                                               out << closest[i][j].dist << '\t';
-                                        }
-                                        out << endl;
-                               }
-
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-       
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-               
-                       vector< vector<Sequence*> > tempClosest;  tempClosest.resize(querySeqs.size());
-                       vector<int> sizes; 
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-               
-                               int num;
-                               in >> num; 
-                               sizes.push_back(num);
-                               gobble(in);
-                               
-                               vector<Sequence*> tempVector;
-                               
-                               for (int j = 0; j < num; j++) {
-                               
-                                       Sequence* temp = new Sequence(in);
-                                       gobble(in);
-                                               
-                                       tempVector.push_back(temp);
-                               }
-                               string junk;
-                               in >> junk;  gobble(in);  // to get ">"
-
-                               tempClosest[k] = tempVector;
-                       }
-                       
-                       vector< vector<float> > dists; dists.resize(querySeqs.size());
-                       int count = 0;
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               dists[k].resize(sizes[count]);
-                               for (int j = 0; j < sizes[count]; j++) {
-                                       in >> dists[k][j];
-                               }
-                               gobble(in);
-                               count++;
-                       } 
-                       
-                       count = 0;
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               for (int j = 0; j < sizes[count]; j++) {
-                                       closest[k][j].seq = tempClosest[k][j];
-                                       closest[k][j].dist = dists[k][j]; 
-                               }
-                               count++;
-                       } 
-
-                       in.close();
-                       remove(s.c_str());
-               }
-#else
-               for (int i = 0; i < closest.size(); i++) {
-                       removeBadReferenceSeqs(closest[i], i);
-               }
-#endif         
-       
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "createProcessesRemoveBad");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-void Ccode::createProcessesAverages() {
-       try {
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                                                       
-                               //find the averages for each querys references 
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       getAverageRef(closest[i], i);  //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
-                               }
-                               
-                               //find the averages for the query 
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       getAverageQuery(closest[i], i);  //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
-                               }
-                               
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sumRef[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << averageRef[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sumSquaredRef[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sumQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << averageQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sumSquaredQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       out << refCombo[i] << endl;
-                               }
-
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-       
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-               
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               sumRef[k].resize(windows[k].size());
-                               averageRef[k].resize(windows[k].size());
-                               sumSquaredRef[k].resize(windows[k].size());
-                               averageQuery[k].resize(windows[k].size());
-                               sumQuery[k].resize(windows[k].size());
-                               sumSquaredQuery[k].resize(windows[k].size());
-                               
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sumRef[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> averageRef[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sumSquaredRef[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sumQuery[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> averageQuery[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sumSquaredQuery[k][j];
-                               }
-                               gobble(in);
-                               in >> refCombo[k];
-                               gobble(in);
-                       }
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-#else
-               //find the averages for each querys references 
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       getAverageRef(closest[i], i);  //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
-               }
-       
-               //find the averages for the query 
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       getAverageQuery(closest[i], i);  //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
-               }
-
-#endif         
-       
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "createProcessesAverages");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-void Ccode::createProcessesVariances() {
-       try {
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                                                       
-                               //find the averages for each querys references 
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       findVarianceRef(i);  //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                               }
-                               
-                               //find the averages for the query 
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       findVarianceQuery(i);  //fills varQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                               }
-                               
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << varRef[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sdRef[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << varQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << sdQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                               }
-
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-       
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-               
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               varRef[k].resize(windows[k].size());
-                               sdRef[k].resize(windows[k].size());
-                               varQuery[k].resize(windows[k].size());
-                               sdQuery[k].resize(windows[k].size());
-                                                               
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> varRef[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sdRef[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> varQuery[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> sdQuery[k][j];
-                               }
-                               gobble(in);
-                       }
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-#else
-                       //find the averages for each querys references 
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               findVarianceRef(i);  //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                       }
-                       
-                       //find the averages for the query 
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               findVarianceQuery(i);  //fills v arQuery[i] and sdQuery[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-                       }
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "createProcessesVariances");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-void Ccode::createProcessesDetermine() {
-       try {
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                                                       
-                               //find the averages for each querys references 
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       determineChimeras(i);  //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i]. 
-                               }
-
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << anova[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << isChimericConfidence[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << isChimericTStudent[i][j] << '\t';
-                                       }
-                                       out << endl;
-                                       for (int j = 0; j < windows[i].size(); j++) {
-                                               out << isChimericANOVA[i][j] << '\t';
-                                       }
-                                       out << endl;
-                               }
-
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-       
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-               
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                               anova[k].resize(windows[k].size());
-                               isChimericConfidence[k].resize(windows[k].size(), false);
-                               isChimericTStudent[k].resize(windows[k].size(), false);
-                               isChimericANOVA[k].resize(windows[k].size(), false);
-                               int tempBool;
-                                                               
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> anova[k][j];
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> tempBool;
-                                       if (tempBool == 1) {  isChimericConfidence[k][j] = true;  }
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> tempBool;
-                                       if (tempBool == 1) {  isChimericTStudent[k][j] = true;  }
-                               }
-                               gobble(in);
-                               for (int j = 0; j < windows[k].size(); j++) {
-                                       in >> tempBool;
-                                       if (tempBool == 1) {  isChimericANOVA[k][j] = true;  }
-                               }
-                               gobble(in);
-                       }
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-       #else
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               determineChimeras(i);  //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
-                       }
-       #endif          
-
-       }
-       catch(exception& e) {
-               errorOut(e, "Ccode", "createProcessesDetermine");
-               exit(1);
-       }
-}
  //***************************************************************************************************************
  
  
diff --git a/ccode.h b/ccode.h

index 52592f9c8e19ef4ef80e26b62199e3c9daf489d8..c60b1302f9802af48e3d34f847ab67e8d425cf52 100644 (file)
--- a/ccode.h
+++ b/ccode.h
@@ -24,70 +24,57 @@
  class Ccode : public Chimera {
         
         public:
-               Ccode(string, string, string);  
+               Ccode(string, string);  
                 ~Ccode();
                 
-               int getChimeras();
+               int getChimeras(Sequence* query);
                 void print(ostream&);
-               
-               void setCons(string c)          {}
-               void setQuantiles(string q) {}
-               
-               
+               void printHeader(ostream&);             
         private:
         
                 Dist* distCalc;
                 DeCalculator* decalc;
                 int iters;
-               string fastafile, templateFile;
+               string fastafile, mapInfo;
                 
+               Sequence* querySeq;
                 
-               vector<linePair*> lines;
-               vector<Sequence*> querySeqs;
-               vector<Sequence*> templateSeqs;
-               vector< map<int, int> > spotMap;
+               map<int, int> spotMap;
                 map<int, int>::iterator it;
                 
-               vector< vector<int> > windows; //windows[0] is the vector of window breaks for querySeqs[0]
-               vector<int> windowSizes;  //windowSizes[0] is the size of the windows for querySeqs[0]
-               vector< map<int, int> > trim;  //trim[0] is the map containing the starting and ending positions for querySeqs[0] set of seqs
-               vector< vector<SeqDist> > closest;  //closest[0] is a vector of sequence at are closest to queryseqs[0]...
-               vector< vector<float> > averageRef;  //averageRef[0] is the average distance at each window for the references for querySeqs[0]
-               vector< vector<float> > averageQuery;  //averageQuery[0] is the average distance at each winow for the query for querySeqs[0]
-               vector< vector<float> >  sumRef;  //sumRef[0] is the sum of distances at each window for the references for querySeqs[0]
-               vector< vector<float> >  sumSquaredRef;  //sumSquaredRef[0] is the sum of squared distances at each window for the references for querySeqs[0]
-               vector< vector<float> > sumQuery;  //sumQuery[0] is the sum of distances at each window for the comparison of query to references for querySeqs[0]
-               vector< vector<float> >  sumSquaredQuery;  //sumSquaredQuery[0] is the sum of squared distances at each window for the comparison of query to references for querySeqs[0]
-               vector< vector<float> > varRef;  //varRef[0] is the variance among references seqs at each window for querySeqs[0]
-               vector< vector<float> > varQuery;  //varQuery[0] is the variance among references and querySeqs[0] at each window
-               vector< vector<float> > sdRef;  //sdRef[0] is the standard deviation of references seqs at each window for querySeqs[0]
-               vector< vector<float> > sdQuery;  //sdQuery[0] is the standard deviation of references and querySeqs[0] at each window
-               vector< vector<float> > anova;  //anova[0] is the vector of anova scores for each window for querySeqs[0]
-               vector<int> refCombo;  //refCombo[0] is the number of reference sequences combinations for querySeqs[0]
-               vector< vector<bool> > isChimericConfidence;  //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
-               vector< vector<bool> > isChimericTStudent;  //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
-               vector< vector<bool> > isChimericANOVA;  //isChimericConfidence[0] indicates whether querySeqs[0] is chimeric at a given window according to the confidence limits
+               vector<int>  windows; //windows is the vector of window breaks for query
+               int windowSizes;  //windowSizes is the size of the windows for query
+               map<int, int> trim;  //trim is the map containing the starting and ending positions for query
+               vector<SeqDist>  closest;  //closest is a vector of sequence at are closest to query
+               vector<float>  averageRef;  //averageRef is the average distance at each window for the references for query
+               vector<float>  averageQuery;  //averageQuery is the average distance at each winow for the query for query
+               vector<float>   sumRef;  //sumRef is the sum of distances at each window for the references for query
+               vector<float>   sumSquaredRef;  //sumSquaredRef is the sum of squared distances at each window for the references for query
+               vector<float> sumQuery;  //sumQuery is the sum of distances at each window for the comparison of query to references for query
+               vector<float>  sumSquaredQuery;  //sumSquaredQuery is the sum of squared distances at each window for the comparison of query to references for query
+               vector<float> varRef;  //varRef is the variance among references seqs at each window for query
+               vector<float> varQuery;  //varQuery is the variance among references and query at each window
+               vector<float> sdRef;  //sdRef is the standard deviation of references seqs at each window for query
+               vector<float> sdQuery;  //sdQuery is the standard deviation of references and query at each window
+               vector<float> anova;  //anova is the vector of anova scores for each window for query
+               int refCombo;  //refCombo is the number of reference sequences combinations for query
+               vector<bool>  isChimericConfidence;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
+               vector<bool>  isChimericTStudent;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
+               vector<bool>  isChimericANOVA;  //isChimericConfidence indicates whether query is chimeric at a given window according to the confidence limits
                 
-               vector< vector<SeqDist> > findClosest(int, int, int); 
-               void removeBadReferenceSeqs(vector<SeqDist>&, int);  //removes sequences from closest that are to different of too similar to eachother. 
-               void trimSequences(int);
-               vector<int> findWindows(int);
-               void getAverageRef(vector<SeqDist>, int);               //fills sumRef[i], averageRef[i], sumSquaredRef[i] and refCombo[i].
-               void getAverageQuery (vector<SeqDist>, int);    //fills sumQuery[i], averageQuery[i], sumSquaredQuery[i].
-               void findVarianceRef (int);                                             //fills varRef[i] and sdRef[i] also sets minimum error rate to 0.001 to avoid divide by 0.
-               void findVarianceQuery (int);                                   //fills varQuery[i] and sdQuery[i]
-               void determineChimeras (int);                                   //fills anova, isChimericConfidence[i], isChimericTStudent[i] and isChimericANOVA[i].
+               vector<SeqDist>  findClosest(Sequence*, int); 
+               void removeBadReferenceSeqs(vector<SeqDist>&);  //removes sequences from closest that are to different of too similar to eachother. 
+               void trimSequences(Sequence*);
+               vector<int> findWindows();
+               void getAverageRef(vector<SeqDist>);            //fills sumRef, averageRef, sumSquaredRef and refCombo.
+               void getAverageQuery (vector<SeqDist>, Sequence*);      //fills sumQuery, averageQuery, sumSquaredQuery.
+               void findVarianceRef ();                                                //fills varRef and sdRef also sets minimum error rate to 0.001 to avoid divide by 0.
+               void findVarianceQuery ();                                      //fills varQuery and sdQuery
+               void determineChimeras ();                                      //fills anova, isChimericConfidence, isChimericTStudent and isChimericANOVA.
                 
                 int getDiff(string, string);  //return number of mismatched bases, a gap to base is not counted as a mismatch
                 float getT(int); 
                 float getF(int); 
-               
-               void createProcessesClosest();
-               void createProcessesRemoveBad();
-               void createProcessesAverages();
-               void createProcessesVariances();
-               void createProcessesDetermine();
-                               
  };
  
  /***********************************************************/
diff --git a/chimera.cpp b/chimera.cpp

index 4ae4991c8d78ebe7a90920972175554dd83b6d36..bf16de4e34b795694d1a894ea1005f32353be110 100644 (file)
--- a/chimera.cpp
+++ b/chimera.cpp
@@ -13,7 +13,7 @@
  //this is a vertical soft filter
  void Chimera::createFilter(vector<Sequence*> seqs) {
         try {
-               
+               filterString = "";
                 int threshold = int (0.5 * seqs.size());
  //cout << "threshhold = " << threshold << endl;
                 
@@ -60,33 +60,29 @@ void Chimera::createFilter(vector<Sequence*> seqs) {
         }
  }
  //***************************************************************************************************************
-void Chimera::runFilter(vector<Sequence*> seqs) {
+void Chimera::runFilter(Sequence* seq) {
         try {
                 
-               //for each sequence
-               for (int i = 0; i < seqs.size(); i++) {
-               
-                       string seqAligned = seqs[i]->getAligned();
-                       string newAligned = "";
+               string seqAligned = seq->getAligned();
+               string newAligned = "";
                         
-                       for (int j = 0; j < seqAligned.length(); j++) {
-                               //if this spot is a gap
-                               if (filterString[j] == '1') { newAligned += seqAligned[j]; }
-                       }
-                       
-                       seqs[i]->setAligned(newAligned);
+               for (int j = 0; j < seqAligned.length(); j++) {
+                       //if this spot is a gap
+                       if (filterString[j] == '1') { newAligned += seqAligned[j]; }
                 }
-               
+                       
+               seq->setAligned(newAligned);
         }
         catch(exception& e) {
                 errorOut(e, "Chimera", "runFilter");
                 exit(1);
         }
  }
-
  //***************************************************************************************************************
  vector<Sequence*> Chimera::readSeqs(string file) {
         try {
+       
+               mothurOut("Reading sequences... "); cout.flush();
                 ifstream in;
                 openInputFile(file, in);
                 vector<Sequence*> container;
@@ -108,6 +104,8 @@ vector<Sequence*> Chimera::readSeqs(string file) {
                 }
                 
                 in.close();
+               mothurOut("Done."); mothurOutEndLine();
+               
                 return container;
         }
         catch(exception& e) {
@@ -187,6 +185,31 @@ vector< vector<float> > Chimera::readQuantiles() {
         }
  }
  //***************************************************************************************************************
+Sequence* Chimera::getSequence(string name) {
+       try{
+               Sequence* temp;
+               
+               //look through templateSeqs til you find it
+               int spot = -1;
+               for (int i = 0; i < templateSeqs.size(); i++) {
+                       if (name == templateSeqs[i]->getName()) {  
+                               spot = i;
+                               break;
+                       }
+               }
+               
+               if(spot == -1) { mothurOut("Error: Could not find sequence."); mothurOutEndLine(); return NULL; }
+               
+               temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
+               
+               return temp;
+       }
+       catch(exception& e) {
+               errorOut(e, "Chimera", "getSequence");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
  
  
  
diff --git a/chimera.h b/chimera.h

index 8b3b0eb713beb96cc391f18a733029298508145a..a968a02fa61cb06d7bef2d92386db5d55cf020f3 100644 (file)
--- a/chimera.h
+++ b/chimera.h
@@ -14,7 +14,7 @@
  #include "mothur.h"
  #include "sequence.hpp"
  
-
+/***********************************************************************/
  struct Preference {
                 string name;
                 vector<string> leftParent; //keep the name of closest left associated with the two scores
@@ -25,12 +25,52 @@ struct Preference {
                 int midpoint;
  
  };
+/***********************************************************************/
+struct score_struct {
+       int prev;
+       int score;
+       int row;
+       int col;
+};
+/***********************************************************************/
+struct trace_struct {
+       int col;
+       int oldCol;
+       int row;
+};
+/***********************************************************************/
+struct results {
+       int regionStart;
+       int regionEnd;
+       int nastRegionStart;
+       int nastRegionEnd;
+       string parent;
+       float queryToParent;
+       float queryToParentLocal;
+       float divR;
+};
+/***********************************************************************/
+struct rank {
+               int num;
+               float score;
+               rank(int n, float s) : num(n), score(s) {}
+};
+/***********************************************************************/
  
  struct SeqDist {
         Sequence* seq;
         float dist;
  };
-
+//********************************************************************************************************************
+//sorts highest to lowest
+inline bool compareMembers(rank left, rank right){
+       return (left.score > right.score);      
+} 
+//********************************************************************************************************************
+//sorts lowest to highest
+inline bool compareRegionStart(results left, results right){
+       return (left.nastRegionStart < right.nastRegionStart);  
+} 
  //********************************************************************************************************************
  //sorts lowest to highest
  inline bool compareSeqDist(SeqDist left, SeqDist right){
@@ -59,8 +99,8 @@ class Chimera {
         public:
         
                 Chimera(){};
+               Chimera(string);
                 Chimera(string, string);
-               Chimera(string, string, string);
                 virtual ~Chimera(){};
                 virtual void setFilter(bool f)                  {       filter = f;                     }
                 virtual void setCorrection(bool c)              {       correction = c;         }
@@ -76,30 +116,37 @@ class Chimera {
                 virtual void setDivR(float d)                   {       divR = d;                       }
                 virtual void setParents(int p)                  {       parents = p;            }
                 virtual void setMinSim(int s)                   {       minSim = s;                     }
+               virtual void setMinCoverage(int c)              {       minCov = c;                     }
+               virtual void setMinBS(int b)                    {       minBS = b;                      }
+               virtual void setMinSNP(int s)                   {       minSNP = s;                     }
                 virtual void setIters(int i)                    {       iters = i;                      }
-
+               virtual void setTemplateSeqs(vector<Sequence*> t)       {       templateSeqs = t;       }
+               virtual bool getUnaligned()                             {       return unaligned;                       }
+               virtual void setTemplateFile(string t)  {   templateFileName = t;       }
                 
                 virtual void setCons(string){};
                 virtual void setQuantiles(string){};
+               virtual void doPrep(){};
                 virtual vector<Sequence*> readSeqs(string);
                 virtual vector< vector<float> > readQuantiles();
                 virtual void setMask(string);
-               virtual void runFilter(vector<Sequence*>);
+               virtual void runFilter(Sequence*);
                 virtual void createFilter(vector<Sequence*>);
                 
+               virtual void printHeader(ostream&){};
+               virtual int getChimeras(Sequence*){ return 0; }
+               virtual int getChimeras(){ return 0; }
+               virtual void print(ostream&){}; 
                 
-               //pure functions
-               virtual int getChimeras() = 0;  
-               virtual void print(ostream&) = 0;       
                 
         protected:
                 
+               vector<Sequence*> templateSeqs;
                 bool filter, correction, svg, unaligned;
-               int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents, iters;
+               int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters;
                 float divR;
-               string seqMask, quanfile, filterString, name, outputDir;
-                       
-
+               string seqMask, quanfile, filterString, name, outputDir, templateFileName;
+               Sequence* getSequence(string);  //find sequence from name       
  };
  
  /***********************************************************************/
diff --git a/chimeracheckrdp.cpp b/chimeracheckrdp.cpp

index 04c143fecc50207a682f033567372959bcb831be..39ed4870fd1f6668839b3abf6299b537dc4c3ebf 100644 (file)
--- a/chimeracheckrdp.cpp
+++ b/chimeracheckrdp.cpp
@@ -10,12 +10,11 @@
  #include "chimeracheckrdp.h"
                 
  //***************************************************************************************************************
-ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string o) {  fastafile = filename;  templateFile = temp; outputDir = o;  }
+ChimeraCheckRDP::ChimeraCheckRDP(string filename,  string o) {  fastafile = filename;  outputDir = o;  }
  //***************************************************************************************************************
  
  ChimeraCheckRDP::~ChimeraCheckRDP() {
         try {
-               for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
                 delete templateDB;
                 delete kmer;
         }
@@ -28,113 +27,65 @@ ChimeraCheckRDP::~ChimeraCheckRDP() {
  void ChimeraCheckRDP::print(ostream& out) {
         try {
                 
-               mothurOutEndLine();
-               
-               //vector<bool> isChimeric;  isChimeric.resize(querySeqs.size(), false);
+               mothurOut("Processing: " + querySeq->getName()); mothurOutEndLine();
                 
-               for (int i = 0; i < querySeqs.size(); i++) {
+               out << querySeq->getName() << endl;
+               out << "IS scores: " << '\t';
                         
-                               out << querySeqs[i]->getName() << endl;
-                               out << "IS scores: " << '\t';
-                               
-                               //int lastChimericWindowFound = 0;
-                               
-                               for (int k = 0; k < IS[i].size(); k++) {
-                                       out << IS[i][k].score << '\t'; 
-                                       //if (IS[i][k].score > chimeraCutoff) {  isChimeric[i] = true;   lastChimericWindowFound = k;           }                       
-                               }
-                               out << endl;
-                               //if (isChimeric[i]) { 
-                                       //mothurOut(querySeqs[i]->getName() + "\tIS: " + toString(IS[i][lastChimericWindowFound].score) + "\tbreakpoint: " + toString(IS[i][lastChimericWindowFound].midpoint) + "\tleft parent: " + IS[i][lastChimericWindowFound].leftParent + "\tright parent: " + IS[i][lastChimericWindowFound].rightParent); mothurOutEndLine();
-                                       //out << endl << "chimera: YES" << endl;
-                               //}else{
-                                       //out << endl << "chimera: NO" << endl;
-                               //}
+               for (int k = 0; k < IS.size(); k++) {
+                       out << IS[k].score << '\t'; 
+               }
+               out << endl;
+               
+               if (svg) {
+                       if (name != "") { //if user has specific names
+                               map<string, string>::iterator it = names.find(querySeq->getName());
                                 
-                               if (svg) {
-                                       
-                                       if (name != "") { //if user has specific names
-                                               map<string, string>::iterator it = names.find(querySeqs[i]->getName());
-                                       
-                                               if (it != names.end()) { //user wants pic of this
-                                                       makeSVGpic(IS[i], i);  //zeros out negative results
-                                               }
-                                       }else{//output them all
-                                               makeSVGpic(IS[i], i);  //zeros out negative results
-                                       }
+                               if (it != names.end()) { //user wants pic of this
+                                       makeSVGpic(IS);  //zeros out negative results
                                 }
+                       }else{//output them all
+                               makeSVGpic(IS);  //zeros out negative results
+                       }
                 }
-               
-               mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine();
         }
         catch(exception& e) {
                 errorOut(e, "ChimeraCheckRDP", "print");
                 exit(1);
         }
  }
-
  //***************************************************************************************************************
-int ChimeraCheckRDP::getChimeras() {
+void ChimeraCheckRDP::doPrep() {
         try {
-               
-               //read in query sequences and subject sequences
-               mothurOutEndLine();
-               mothurOut("Reading query sequences... "); cout.flush();
-               querySeqs = readSeqs(fastafile);
-               mothurOut("Done."); 
-               //templateSeqs = readSeqs(templateFile);
-               templateDB = new AlignmentDB(templateFile, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
+               templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0);
                 mothurOutEndLine();
                 
-               int numSeqs = querySeqs.size();
-               
-               IS.resize(numSeqs);
-               closest.resize(numSeqs);
-               
-               //break up file if needed
-               int linesPerProcess = numSeqs / processors ;
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       //find breakup of sequences for all times we will Parallelize
-                       if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
-                       else {
-                               //fill line pairs
-                               for (int i = 0; i < (processors-1); i++) {                      
-                                       lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
-                               }
-                               //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
-                               int i = processors - 1;
-                               lines.push_back(new linePair((i*linesPerProcess), numSeqs));
-                       }
-                       
-               #else
-                       lines.push_back(new linePair(0, numSeqs));
-               #endif
-               
                 kmer = new Kmer(kmerSize);
                 
                 if (name != "") { 
                         readName(name);  //fills name map with names of seqs the user wants to have .svg for.  
                 }
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraCheckRDP", "doPrep");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int ChimeraCheckRDP::getChimeras(Sequence* query) {
+       try {
                 
-               //find closest seq to each querySeq
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       closest[i] = templateDB->findClosestSequence(querySeqs[i]);  
-               }
-
-               //for each query find IS value  
-               if (processors == 1) {
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               IS[i] = findIS(i); 
-                       }
-               }else {         createProcessesIS();    }
-               
+               IS.clear();
+                               
+               querySeq = query;
+                       
+               closest = templateDB->findClosestSequence(query);  
+       
+               IS = findIS(); 
+                                       
                 //determine chimera report cutoff - window score above 95%
                 //getCutoff();  - not very acurate predictor
                 
-               //free memory
-               for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];        }
-       
                 return 0;
         }
         catch(exception& e) {
@@ -143,7 +94,7 @@ int ChimeraCheckRDP::getChimeras() {
         }
  }
  //***************************************************************************************************************
-vector<sim> ChimeraCheckRDP::findIS(int query) {
+vector<sim> ChimeraCheckRDP::findIS() {
         try {
                 
                 
@@ -154,13 +105,11 @@ vector<sim> ChimeraCheckRDP::findIS(int query) {
                 vector< map<int, int> > subjectKmerInfo;
                 
                 vector<sim>  isValues;
-               string queryName = querySeqs[query]->getName();
-               string seq = querySeqs[query]->getUnaligned();
-               
-               mothurOut("Finding IS values for sequence " + toString(query+1)); mothurOutEndLine();
+               string queryName = querySeq->getName();
+               string seq = querySeq->getUnaligned();
                 
                 queryKmerInfo = kmer->getKmerCounts(seq);
-               subjectKmerInfo = kmer->getKmerCounts(closest[query].getUnaligned());
+               subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned());
                 
                 //find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each
                 int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]);
@@ -238,7 +187,7 @@ vector<sim> ChimeraCheckRDP::findIS(int query) {
                         
                         delete left;
                         delete right;
-               }       
+               }
                 
                 return isValues;
         
@@ -309,36 +258,10 @@ int ChimeraCheckRDP::calcKmers(map<int, int> query, map<int, int> subject) {
  }
  
  //***************************************************************************************************************
-void ChimeraCheckRDP::getCutoff() {
-       try{
-               
-               vector<float> temp;
-               
-               //store all is scores for all windows
-               for (int i = 0; i < IS.size(); i++) {
-                       for (int j = 0; j < IS[i].size(); j++) {
-                               temp.push_back(IS[i][j].score);
-                       }
-               }
-               
-               //sort them
-               sort(temp.begin(), temp.end());
-               
-               //get 95%
-               chimeraCutoff = temp[int(temp.size() * 0.95)];
-
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "getCutoff");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-void ChimeraCheckRDP::makeSVGpic(vector<sim> info, int query) {
+void ChimeraCheckRDP::makeSVGpic(vector<sim> info) {
         try{
                 
-               string file = outputDir + querySeqs[query]->getName() + ".chimeracheck.svg";
+               string file = outputDir + querySeq->getName() + ".chimeracheck.svg";
                 ofstream outsvg;
                 openOutputFile(file, outsvg);
                 
@@ -346,7 +269,7 @@ void ChimeraCheckRDP::makeSVGpic(vector<sim> info, int query) {
                 
                 outsvg << "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n";
                 outsvg << "<g>\n";
-               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeqs[query]->getName() + "</text>\n";
+               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n";
                 
                 outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n";  
                 outsvg <<  "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n";
@@ -392,96 +315,5 @@ void ChimeraCheckRDP::makeSVGpic(vector<sim> info, int query) {
         }
  }
  //***************************************************************************************************************
-void ChimeraCheckRDP::createProcessesIS() {
-       try {
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                                                       
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       IS[i] = findIS(i);  
-                               }                               
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output pairs
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << IS[i].size() << endl;
-                                       for (int j = 0; j < IS[i].size(); j++) {
-                                               out << IS[i][j].leftParent << '\t'<< IS[i][j].rightParent << '\t' << IS[i][j].midpoint << '\t' << IS[i][j].score << endl;
-                                       }
-                               }
-
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-       
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-               
-                       //get pairs
-                       for (int k = lines[i]->start; k < lines[i]->end; k++) {
-                       
-                               int size;
-                               in >> size; gobble(in);
-                               
-                               string left, right;
-                               int mid;
-                               float score;
-                               
-                               IS[k].clear();
-                               
-                               for (int j = 0; j < size; j++) {
-                                       in >> left >> right >> mid >> score;  gobble(in);
-                                       
-                                       sim temp;
-                                       temp.leftParent = left;
-                                       temp.rightParent = right;
-                                       temp.midpoint = mid;
-                                       temp.score = score;
-                                       
-                                       IS[k].push_back(temp);
-                               }
-                       }
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-#else
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               IS[i] = findIS(i);
-                       }
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraCheckRDP", "createProcessesIS");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
  
  
diff --git a/chimeracheckrdp.h b/chimeracheckrdp.h

index 6193d6d64d47a3dcec90d47844fe62d16ccd6fb7..e12e7d3d9d6cf3a08e44d453e7ea5afd2a34f06b 100644 (file)
--- a/chimeracheckrdp.h
+++ b/chimeracheckrdp.h
@@ -25,45 +25,30 @@
  class ChimeraCheckRDP : public Chimera {
         
         public:
-               ChimeraCheckRDP(string, string, string);        
+               ChimeraCheckRDP(string, string);        
                 ~ChimeraCheckRDP();
                 
-               int getChimeras();
+               int getChimeras(Sequence*);
                 void print(ostream&);
-               
-               void setCons(string){};
-               void setQuantiles(string q) {};
-               
+               void doPrep();
                 
         private:
                 
-               vector<linePair*> lines;
-               vector<Sequence*> querySeqs;
+               
+               Sequence* querySeq;
                 AlignmentDB* templateDB;
                 Kmer* kmer;
-               
-               vector< vector<sim> > IS;  //IS[0] is the vector of IS values for each window for querySeqs[0]
-               float chimeraCutoff;
-               
-               
-               //map<string, vector< map<int, int> > >:: iterator it;
-               //map<int, int>::iterator it2;
-               
-               vector<Sequence> closest;               //closest[0] is the closest overall seq to querySeqs[0].
-               
-               string fastafile, templateFile;
+               Sequence closest;               //closest is the closest overall seq to query
+
+               vector<sim>  IS;  //IS is the vector of IS values for each window for query
+               string fastafile;
                 map<string, string> names;
                 
-               vector<sim> findIS(int);
+               vector<sim> findIS();
                 int calcKmers(map<int, int>, map<int, int>);
-               void getCutoff();
-               void makeSVGpic(vector<sim>, int);
+               void makeSVGpic(vector<sim>);
                 void readName(string);
-                               
-               void createProcessesIS();
-               
  };
-
  /***********************************************************/
  
  #endif
diff --git a/chimerarealigner.cpp b/chimerarealigner.cpp

new file mode 100644 (file)

index 0000000..57c3641
--- /dev/null
+++ b/chimerarealigner.cpp
@@ -0,0 +1,108 @@
+/*
+ *  chimerarealigner.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 2/12/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimerarealigner.h"
+#include "needlemanoverlap.hpp"
+#include "nast.hpp"
+
+//***************************************************************************************************************
+ChimeraReAligner::ChimeraReAligner(vector<Sequence*> t, int m, int mm) : match(m), misMatch(mm) {  templateSeqs = t;  }
+//***************************************************************************************************************
+ChimeraReAligner::~ChimeraReAligner() {}       
+//***************************************************************************************************************
+void ChimeraReAligner::reAlign(Sequence* query, vector<results> parents) {
+       try {
+               if (parents.size() != 0) {
+                       vector<Sequence*> queryParts;
+                       vector<Sequence*> parentParts;  //queryParts[0] relates to parentParts[0]
+                       
+                       string qAligned = query->getAligned();
+                       string newQuery = "";
+                       
+                       //sort parents by region start
+                       sort(parents.begin(), parents.end(), compareRegionStart);
+
+                       //make sure you don't cutoff beginning of query 
+                       if (parents[0].nastRegionStart > 0) {  newQuery += qAligned.substr(0, parents[0].nastRegionStart+1);  }
+                       int longest = 0;
+                       //take query and break apart into pieces using breakpoints given by results of parents
+                       for (int i = 0; i < parents.size(); i++) {
+                               int length = parents[i].nastRegionEnd - parents[i].nastRegionStart+1;
+                               string q = qAligned.substr(parents[i].nastRegionStart, length);
+                               Sequence* queryFrag = new Sequence(query->getName(), q);
+                               
+                               queryParts.push_back(queryFrag);
+                               
+                               Sequence* parent = getSequence(parents[i].parent);
+                               string p = parent->getAligned();
+                               p = p.substr(parents[i].nastRegionStart, length);
+                               parent->setAligned(p);
+                               
+                               parentParts.push_back(parent);
+                               
+                               if (q.length() > longest)       { longest = q.length(); }
+                               if (p.length() > longest)       { longest = p.length(); }
+                       }
+                                                                                       
+                       //align each peice to correct parent from results
+                       for (int i = 0; i < queryParts.size(); i++) {
+                               alignment = new NeedlemanOverlap(-2.0, match, misMatch, longest+1); //default gapopen, match, mismatch, longestbase
+                               Nast nast(alignment, queryParts[i], parentParts[i]);
+                               delete alignment;
+                       }
+                                                                                               
+                       //recombine pieces to form new query sequence
+                       for (int i = 0; i < queryParts.size(); i++) {
+                               newQuery += queryParts[i]->getAligned();
+                       }
+                       
+                       //make sure you don't cutoff end of query 
+                       if (parents[parents.size()-1].nastRegionEnd < qAligned.length()) {  newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd-1);  }
+                       
+                       //set query to new aligned string
+                       query->setAligned(newQuery);
+                       
+                       //free memory
+                       for (int i = 0; i < queryParts.size(); i++) { delete queryParts[i];  }
+                       for (int i = 0; i < parentParts.size(); i++) { delete parentParts[i];  }
+                       
+               } //else leave query alone, you have bigger problems...
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraReAligner", "reAlign");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+Sequence* ChimeraReAligner::getSequence(string name) {
+       try{
+               Sequence* temp;
+               
+               //look through templateSeqs til you find it
+               int spot = -1;
+               for (int i = 0; i < templateSeqs.size(); i++) {
+                       if (name == templateSeqs[i]->getName()) {  
+                               spot = i;
+                               break;
+                       }
+               }
+               
+               if(spot == -1) { mothurOut("Error: Could not find sequence."); mothurOutEndLine(); return NULL; }
+               
+               temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
+               
+               return temp;
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraReAligner", "getSequence");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
diff --git a/chimerarealigner.h b/chimerarealigner.h

new file mode 100644 (file)

index 0000000..2b53841
--- /dev/null
+++ b/chimerarealigner.h
@@ -0,0 +1,37 @@
+#ifndef CHIMERAREALIGNER_H
+#define CHIMERAREALIGNER_H
+
+/*
+ *  chimerarealigner.h
+ *  Mothur
+ *
+ *  Created by westcott on 2/12/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "chimera.h"
+#include "alignment.hpp"
+
+/***********************************************************/
+
+class ChimeraReAligner  {
+       
+       public:
+               ChimeraReAligner(vector<Sequence*>, int, int);   
+               ~ChimeraReAligner();
+               
+               void reAlign(Sequence*, vector<results>);
+                               
+       private:
+               Sequence* querySeq;
+               Alignment* alignment;
+               vector<Sequence*> templateSeqs;
+               int match, misMatch;
+               
+               Sequence* getSequence(string);  //find sequence from name
+};
+/***********************************************************/
+
+#endif
+
diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp

index b06d3693259c7f2361e428cb965dbd99301ad7d8..e1eebf400c052f7f7926e0029f6090ae0e93ede3 100644 (file)
--- a/chimeraseqscommand.cpp
+++ b/chimeraseqscommand.cpp
@@ -26,7 +26,8 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                 
                 else {
                         //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "iters","outputdir","inputdir" };
+                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", 
+                       "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim","mincov","minbs", "minsnp","parents", "iters","outputdir","inputdir" };
                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -148,7 +149,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                         svg = isTrue(temp);
                         
                         temp = validParameter.validFile(parameters, "window", false);   
-                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }                     
+                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "50"; }                      
                         else if (temp == "not found") { temp = "0"; }
                         convert(temp, window);
                         
@@ -158,25 +159,37 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                         temp = validParameter.validFile(parameters, "mismatch", false);                 if (temp == "not found") { temp = "-4"; }
                         convert(temp, mismatch);
                         
-                       temp = validParameter.validFile(parameters, "divergence", false);               if (temp == "not found") { temp = "1.0"; }
+                       temp = validParameter.validFile(parameters, "divergence", false);               if (temp == "not found") { temp = "1.007"; }
                         convert(temp, divR);
                         
                         temp = validParameter.validFile(parameters, "minsim", false);                   if (temp == "not found") { temp = "90"; }
                         convert(temp, minSimilarity);
                         
-                       temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "5"; }
+                       temp = validParameter.validFile(parameters, "mincov", false);                   if (temp == "not found") { temp = "70"; }
+                       convert(temp, minCoverage);
+                       
+                       temp = validParameter.validFile(parameters, "minbs", false);                    if (temp == "not found") { temp = "90"; }
+                       convert(temp, minBS);
+                       
+                       temp = validParameter.validFile(parameters, "minsnp", false);                   if (temp == "not found") { temp = "10"; }
+                       convert(temp, minSNP);
+
+                       temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "3"; }
                         convert(temp, parents); 
                         
-                       temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
+                       temp = validParameter.validFile(parameters, "iters", false);    
+                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "100"; }             
+                       else if (temp == "not found") { temp = "1000"; }
                         convert(temp, iters); 
                          
                         temp = validParameter.validFile(parameters, "increment", false);                
-                       if ((temp == "not found") && ((method == "chimeracheck") || (method == "chimeraslayer"))) { temp = "10"; }
+                       if ((temp == "not found") && (method == "chimeracheck")) { temp = "10"; }
+                       else if ((temp == "not found") && (method == "chimeraslayer")) { temp = "5"; }
                         else if (temp == "not found") { temp = "25"; }
                         convert(temp, increment);
                         
                         temp = validParameter.validFile(parameters, "numwanted", false);
-                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "10"; }              
+                       if ((temp == "not found") && (method == "chimeraslayer")) { temp = "15"; }              
                         else if (temp == "not found") { temp = "20"; }
                         convert(temp, numwanted);
  
@@ -216,7 +229,11 @@ void ChimeraSeqsCommand::help(){
                 mothurOut("The ksize parameter allows you to input kmersize. \n");
                 mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
                 mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
-               //mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
+               mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
+               mothurOut("The minsim parameter allows you .... \n");
+               mothurOut("The mincov parameter allows you to specify minimum coverage by closest matches found in template. Default is 70, meaning 70%. \n");
+               mothurOut("The minbs parameter allows you to specify minimum bootstrap support for calling a sequence chimeric. Default is 90, meaning 90%. \n");
+               mothurOut("The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n");
                 mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
                 mothurOut("Details for each method: \n"); 
                 mothurOut("\tpintail: \n"); 
@@ -252,22 +269,20 @@ int ChimeraSeqsCommand::execute(){
                 
                 if (abort == true) { return 0; }
                 
+               int start = time(NULL); 
+               
                 if (method == "bellerophon")                    {               chimera = new Bellerophon(fastafile, outputDir);                        }
-               else if (method == "pintail")                   {               chimera = new Pintail(fastafile, templatefile, outputDir);      }
-               else if (method == "ccode")                             {               chimera = new Ccode(fastafile, templatefile, outputDir);                        }
-               else if (method == "chimeracheck")              {               chimera = new ChimeraCheckRDP(fastafile, templatefile, outputDir);      }
-               else if (method == "chimeraslayer")             {               chimera = new ChimeraSlayer(fastafile, templatefile);           }
+               else if (method == "pintail")                   {               chimera = new Pintail(fastafile, outputDir);                            }
+               else if (method == "ccode")                             {               chimera = new Ccode(fastafile, outputDir);                                      }
+               else if (method == "chimeracheck")              {               chimera = new ChimeraCheckRDP(fastafile, outputDir);            }
+               else if (method == "chimeraslayer")             {               chimera = new ChimeraSlayer("blast");                                           }
                 else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0;          }
                 
                 //set user options
                 if (maskfile == "default") { mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); mothurOutEndLine();  }
                 
-               //saves time to avoid generating it
                 chimera->setCons(consfile);     
-               
-               //saves time to avoid generating it
                 chimera->setQuantiles(quanfile);                                
-               
                 chimera->setMask(maskfile);
                 chimera->setFilter(filter);
                 chimera->setCorrection(correction);
@@ -283,25 +298,126 @@ int ChimeraSeqsCommand::execute(){
                 chimera->setDivR(divR);
                 chimera->setParents(parents);
                 chimera->setMinSim(minSimilarity);
+               chimera->setMinCoverage(minCoverage);
+               chimera->setMinBS(minBS);
+               chimera->setMinSNP(minSNP);
                 chimera->setIters(iters);
+               chimera->setTemplateFile(templatefile);
+
                 
-                               
-               //find chimeras
-               int error = chimera->getChimeras();
                 
-               //there was a problem
-               if (error == 1) {  return 0;  }
+               vector<Sequence*> templateSeqs;
+               if ((method != "bellerophon") && (method != "chimeracheck")) {   
+                       templateSeqs = chimera->readSeqs(templatefile);   
+                       if (chimera->getUnaligned()) { 
+                               mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine(); 
+                               //free memory
+                               for (int i = 0; i < templateSeqs.size(); i++)           {  delete templateSeqs[i];              }
+                               return 0; 
+                       }
+                       
+                       //set options
+                       chimera->setTemplateSeqs(templateSeqs);
  
+               }else if (method == "bellerophon") {//run bellerophon separately since you need to read entire fastafile to run it
+                       chimera->getChimeras();
+                       
+                       string outputFName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
+                       ofstream out;
+                       openOutputFile(outputFName, out);
+                       
+                       chimera->print(out);
+                       out.close();
+                       return 0;
+               }
+               
+               //some methods need to do prep work before processing the chimeras
+               chimera->doPrep(); 
+               
+               ofstream outHeader;
+               string tempHeader = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras.tempHeader";
+               openOutputFile(tempHeader, outHeader);
+               
+               chimera->printHeader(outHeader);
+               outHeader.close();
+               
                 string outputFileName = outputDir + getRootName(getSimpleName(fastafile)) + method + maskfile + ".chimeras";
-               ofstream out;
-               openOutputFile(outputFileName, out);
+
+               //break up file
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       if(processors == 1){
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                               inFASTA.close();
+                               
+                               lines.push_back(new linePair(0, numSeqs));
+                               
+                               driver(lines[0], outputFileName, fastafile);
+                               
+                       }else{
+                               vector<int> positions;
+                               processIDS.resize(0);
+                               
+                               ifstream inFASTA;
+                               openInputFile(fastafile, inFASTA);
+                               
+                               string input;
+                               while(!inFASTA.eof()){
+                                       input = getline(inFASTA);
+                                       if (input.length() != 0) {
+                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                       }
+                               }
+                               inFASTA.close();
+                               
+                               numSeqs = positions.size();
+                               
+                               int numSeqsPerProcessor = numSeqs / processors;
+                               
+                               for (int i = 0; i < processors; i++) {
+                                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                                       if(i == processors - 1){
+                                               numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
+                                       }
+                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                               }
+                               
+                               
+                               createProcesses(outputFileName, fastafile); 
+                               
+                               rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
+                                                               
+                               //append alignment and report files
+                               for(int i=1;i<processors;i++){
+                                       appendOutputFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+                                       remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                               }
+                       }
+
+               #else
+                       ifstream inFASTA;
+                       openInputFile(candidateFileNames[s], inFASTA);
+                       numSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
+                       inFASTA.close();
+                       lines.push_back(new linePair(0, numSeqs));
+                       
+                       driver(lines[0], outputFileName, fastafile);
+               #endif
                 
-               //print results
-               chimera->print(out);
+               //mothurOut("Output File Names: ");
+               //if ((filter) && (method == "bellerophon")) { mothurOut(
+               //if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; }
+               //      else                             { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; }
                 
-               out.close();
+               appendOutputFiles(tempHeader, outputFileName);
+               remove(tempHeader.c_str());
+
+               for (int i = 0; i < templateSeqs.size(); i++)           {   delete templateSeqs[i];     }
+               
+               if (method == "chimeracheck") { mothurOutEndLine(); mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); mothurOutEndLine();  }
                 
-               delete chimera;
+               mothurOutEndLine(); mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");       mothurOutEndLine();
                 
                 return 0;
                 
@@ -310,6 +426,107 @@ int ChimeraSeqsCommand::execute(){
                 errorOut(e, "ChimeraSeqsCommand", "execute");
                 exit(1);
         }
+}//**********************************************************************************************************************
+
+int ChimeraSeqsCommand::driver(linePair* line, string outputFName, string filename){
+       try {
+               ofstream out;
+               openOutputFile(outputFName, out);
+               
+               ifstream inFASTA;
+               openInputFile(filename, inFASTA);
+
+               inFASTA.seekg(line->start);
+               
+               for(int i=0;i<line->numSeqs;i++){
+               
+                       Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
+                               
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                                       
+                               //find chimeras
+                               chimera->getChimeras(candidateSeq);
+               
+                               //print results
+                               chimera->print(out);
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((i+1) % 100 == 0){   mothurOut("Processing sequence: " + toString(i+1)); mothurOutEndLine();         }
+               }
+               //report progress
+               if((line->numSeqs) % 100 != 0){ mothurOut("Processing sequence: " + toString(line->numSeqs)); mothurOutEndLine();               }
+               
+               out.close();
+               inFASTA.close();
+                               
+               return 1;
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSeqsCommand", "driver");
+               exit(1);
+       }
  }
+
  /**************************************************************************************************/
  
+void ChimeraSeqsCommand::createProcesses(string outputFileName, string filename) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               //              processIDS.resize(0);
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename);
+                               exit(0);
+                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
+               }
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+#endif         
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSeqsCommand", "createProcesses");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void ChimeraSeqsCommand::appendOutputFiles(string temp, string filename) {
+       try{
+               
+               ofstream output;
+               ifstream input;
+               
+               openOutputFileAppend(temp, output);
+               openInputFile(filename, input);
+               
+               while(char c = input.get()){
+                       if(input.eof())         {       break;                  }
+                       else                            {       output << c;    }
+               }
+               
+               input.close();
+               output.close();
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSeqsCommand", "appendOuputFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/chimeraseqscommand.h b/chimeraseqscommand.h

index 46e2bc88300c7a3e89f8fb985100d6edd50129a1..bac02852744952477c97c0ea7dc284f56ff589bc 100644 (file)
--- a/chimeraseqscommand.h
+++ b/chimeraseqscommand.h
@@ -26,11 +26,23 @@ public:
         
                 
  private:
+
+       struct linePair {
+               int start;
+               int numSeqs;
+               linePair(long int i, int j) : start(i), numSeqs(j) {}
+       };
+       vector<int> processIDS;   //processid
+       vector<linePair*> lines;
         
+       int driver(linePair*, string, string);
+       void createProcesses(string, string);
+       void appendOutputFiles(string, string); 
+
         bool abort;
         string method, fastafile, templatefile, consfile, quanfile, maskfile, namefile, outputDir;
         bool filter, correction, svg, printAll;
-       int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity;
+       int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs;
         float divR;
         Chimera* chimera;
         
diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp

index 5d3d6706ede49b38c5385d29c1d127b1488c4000..b2c0a0561a0f6baf035f83fc4113d3a0f16ef643 100644 (file)
--- a/chimeraslayer.cpp
+++ b/chimeraslayer.cpp
@@ -8,161 +8,109 @@
   */
  
  #include "chimeraslayer.h"
+#include "chimerarealigner.h"
  
  //***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
+ChimeraSlayer::ChimeraSlayer(string mode) : searchMethod(mode) {       decalc = new DeCalculator();      }
  //***************************************************************************************************************
-
-ChimeraSlayer::~ChimeraSlayer() {
-       try {
-               for (int i = 0; i < querySeqs.size(); i++)                      {  delete querySeqs[i];                 }
-               for (int i = 0; i < templateSeqs.size(); i++)           {  delete templateSeqs[i];              }
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSlayer", "~ChimeraSlayer");
-               exit(1);
-       }
-}      
+ChimeraSlayer::~ChimeraSlayer() {      delete decalc;   }
+//***************************************************************************************************************
+void ChimeraSlayer::printHeader(ostream& out) {
+       mothurOutEndLine();
+       mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
+       mothurOutEndLine();
+}
  //***************************************************************************************************************
  void ChimeraSlayer::print(ostream& out) {
         try {
-               mothurOutEndLine();
-               mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
-               mothurOutEndLine();
-               
-               for (int i = 0; i < querySeqs.size(); i++) {
-               
-                       if (chimeraFlags[i] == "yes") { 
-//                             cout << i << endl;
-                               if ((chimeraResults[i][0].bsa >= 90.0) || (chimeraResults[i][0].bsb >= 90.0)) {
-                                       mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
-                                       out << querySeqs[i]->getName() << "\tyes" << endl;
-                               }else {
-                                       out << querySeqs[i]->getName() << "\tyes" << endl;
-                                       //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
+               if (chimeraFlags == "yes") {
+                       string chimeraFlag = "no";
+                       if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
+                          ||
+                          (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
+                       
+                       
+                       if (chimeraFlag == "yes") {     
+                               if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
+                                       mothurOut(querySeq->getName() + "\tyes"); mothurOutEndLine();
                                 }
-
-                               printBlock(chimeraResults[i][0], out, i);
-                               
-                               out << endl;
-                       }else{
-                               out << querySeqs[i]->getName() << "\tno" << endl;
-                               //mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
                         }
-               }
-                               
+                       out << querySeq->getName() << "\tyes" << endl;
+                       printBlock(chimeraResults[0], out);
+                       out << endl;
+               }else {  out << querySeq->getName() << "\tno" << endl;  }
+               
         }
         catch(exception& e) {
                 errorOut(e, "ChimeraSlayer", "print");
                 exit(1);
         }
  }
-
  //***************************************************************************************************************
-int ChimeraSlayer::getChimeras() {
+int ChimeraSlayer::getChimeras(Sequence* query) {
         try {
+               chimeraFlags = "no";
+               querySeq = query;
                 
-               //read in query sequences and subject sequences
-               mothurOut("Reading sequences and template file... "); cout.flush();
-               querySeqs = readSeqs(fastafile);
-               templateSeqs = readSeqs(templateFile);
-               mothurOut("Done."); mothurOutEndLine();
-               
-               int numSeqs = querySeqs.size();
-               
-               if (unaligned) { mothurOut("Your sequences need to be aligned when you use the chimeraslayer method."); mothurOutEndLine(); return 1;  }
-               
-               chimeraResults.resize(numSeqs);
-               chimeraFlags.resize(numSeqs, "no");
-               spotMap.resize(numSeqs);
-               
-               //break up file if needed
-               int linesPerProcess = numSeqs / processors ;
-               
-               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       //find breakup of sequences for all times we will Parallelize
-                       if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
-                       else {
-                               //fill line pairs
-                               for (int i = 0; i < (processors-1); i++) {                      
-                                       lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
-                               }
-                               //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
-                               int i = processors - 1;
-                               lines.push_back(new linePair((i*linesPerProcess), numSeqs));
-                       }
-               #else
-                       lines.push_back(new linePair(0, numSeqs));
-               #endif
-               
-               if (seqMask != "") {    decalc = new DeCalculator();    } //to use below
-               
-               //initialize spotMap
-               for (int j = 0; j < numSeqs; j++) {
-                       for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
-                               spotMap[j][i] = i;
-                       }
+               for (int i = 0; i < query->getAligned().length(); i++) {
+                       spotMap[i] = i;
                 }
                 
                 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
-               maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
-               slayer = new Slayer(window, increment, minSim, divR, iters);
-               
-               for (int i = 0; i < querySeqs.size(); i++) {
+               maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod);
+               slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
                 
-                       string chimeraFlag = maligner->getResults(querySeqs[i]);
-                       //float percentIdentical = maligner->getPercentID();
-                       vector<results> Results = maligner->getOutput();
+               string chimeraFlag = maligner->getResults(query, decalc);
+               vector<results> Results = maligner->getOutput();
+
+               //realign query to parents to improve slayers detection rate
+               //ChimeraReAligner realigner(templateSeqs, match, misMatch);
+               //realigner.reAlign(query, Results);
                         
-                       cout << "Processing sequence: " << i+1 << endl;
+                       //if (chimeraFlag == "yes") {
                         
-                       //for (int j = 0; j < Results.size(); j++) {
-                               //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
-                       //}
+               //get sequence that were given from maligner results
+               vector<SeqDist> seqs;
+               map<string, float> removeDups;
+               map<string, float>::iterator itDup;
+               for (int j = 0; j < Results.size(); j++) {
+                       float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
+                       //only add if you are not a duplicate
+                       itDup = removeDups.find(Results[j].parent);
+                       if (itDup == removeDups.end()) { //this is not duplicate
+                               removeDups[Results[j].parent] = dist;
+                       }else if (dist > itDup->second) { //is this a stronger number for this parent
+                               removeDups[Results[j].parent] = dist;
+                       }
+               }
+               
+               for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
+                       Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
                         
-                       //if (chimeraFlag == "yes") {
+                       SeqDist member;
+                       member.seq = seq;
+                       member.dist = itDup->second;
                         
-                               //get sequence that were given from maligner results
-                               vector<SeqDist> seqs;
-                               map<string, float> removeDups;
-                               map<string, float>::iterator itDup;
-                               for (int j = 0; j < Results.size(); j++) {
-                                       float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
-                                       //only add if you are not a duplicate
-                                       itDup = removeDups.find(Results[j].parent);
-                                       if (itDup == removeDups.end()) { //this is not duplicate
-                                               removeDups[Results[j].parent] = dist;
-                                       }else if (dist > itDup->second) { //is this a stronger number for this parent
-                                               removeDups[Results[j].parent] = dist;
-                                       }
-                               }
-                               
-                               for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
-                                       Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
-                                       
-                                       SeqDist member;
-                                       member.seq = seq;
-                                       member.dist = itDup->second;
-                                       
-                                       seqs.push_back(member);
-                               }
-                               
-                               //limit number of parents to explore - default 5
-                               if (Results.size() > parents) {
-                                       //sort by distance
-                                       sort(seqs.begin(), seqs.end(), compareSeqDist);
-                                       //prioritize larger more similiar sequence fragments
-                                       reverse(seqs.begin(), seqs.end());
-                                       
-                                       for (int k = seqs.size()-1; k > (parents-1); k--)  {  
-                                               delete seqs[k].seq;
-                                               seqs.pop_back();        
-                                       }
-                               }
+                       seqs.push_back(member);
+               }
+               
+               //limit number of parents to explore - default 3
+               if (Results.size() > parents) {
+                       //sort by distance
+                       sort(seqs.begin(), seqs.end(), compareSeqDist);
+                       //prioritize larger more similiar sequence fragments
+                       reverse(seqs.begin(), seqs.end());
+                       
+                       for (int k = seqs.size()-1; k > (parents-1); k--)  {  
+                               delete seqs[k].seq;
+                               seqs.pop_back();        
+                       }
+               }
                 
-                               //put seqs into vector to send to slayer
-                               vector<Sequence*> seqsForSlayer;
-                               for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
+               //put seqs into vector to send to slayer
+               vector<Sequence*> seqsForSlayer;
+               for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
+               //cout << i+1 << "num parents = " << seqsForSlayer.size() << '\t' << chimeraFlag << endl;
  //ofstream out;
  //string name = querySeqs[i]->getName();
  //cout << name << endl;
@@ -172,50 +120,42 @@ int ChimeraSlayer::getChimeras() {
  //cout << name << endl;
  //name = name.substr(0, name.find_last_of("|"));
  //cout << name << endl;
-//string filename = name + ".seqsforslayer";
+//string filename = toString(i+1) + ".seqsforslayer";
  //openOutputFile(filename, out);       
-//for (int u = 0; u < seqsForSlayer.size(); u++) { seqsForSlayer[u]->printSequence(out);       }
+//cout << querySeqs[i]->getName() << endl;
+//for (int u = 0; u < seqsForSlayer.size(); u++) { cout << seqsForSlayer[u]->getName() << '\t'; seqsForSlayer[u]->printSequence(out);  }
+//cout << endl;
  //out.close();
-//filename = name + ".fasta";
+//filename = toString(i+1) + ".fasta";
  //openOutputFile(filename, out);       
-//q->printSequence(out);
+//querySeqs[i]->printSequence(out);
  //out.close();
  
-                       
-                               //mask then send to slayer...
-                               if (seqMask != "") {
-                                       decalc->setMask(seqMask);
-
-                                       //mask querys
-                                       decalc->runMask(querySeqs[i]);
-                                       
-                                       //mask parents
-                                       for (int k = 0; k < seqsForSlayer.size(); k++) {
-                                               decalc->runMask(seqsForSlayer[k]);
-                                       }
-                                       
-                                       for (int i = 0; i < numSeqs; i++) {
-                                               spotMap[i] = decalc->getMaskMap();
-                                       }
  
-                               }
-                               
-                               //send to slayer
-                               chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer);
-                               chimeraResults[i] = slayer->getOutput();
-                       
-                               //free memory
-                               for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
-                       //}
                         
-               }       
-               //free memory
-               for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];        }
-               
+               //mask then send to slayer...
                 if (seqMask != "") {
-                       delete decalc; 
+                       decalc->setMask(seqMask);
+                       
+                       //mask querys
+                       decalc->runMask(query);
+                       
+                       //mask parents
+                       for (int k = 0; k < seqsForSlayer.size(); k++) {
+                               decalc->runMask(seqsForSlayer[k]);
+                       }
+                       
+                       spotMap = decalc->getMaskMap();
                 }
                 
+               //send to slayer
+               chimeraFlags = slayer->getResults(query, seqsForSlayer);
+               chimeraResults = slayer->getOutput();
+       
+               //free memory
+               for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
+               //}
+                       
                 return 0;
         }
         catch(exception& e) {
@@ -224,37 +164,12 @@ int ChimeraSlayer::getChimeras() {
         }
  }
  //***************************************************************************************************************
-Sequence* ChimeraSlayer::getSequence(string name) {
-       try{
-               Sequence* temp;
-               
-               //look through templateSeqs til you find it
-               int spot = -1;
-               for (int i = 0; i < templateSeqs.size(); i++) {
-                       if (name == templateSeqs[i]->getName()) {  
-                               spot = i;
-                               break;
-                       }
-               }
-               
-               if(spot == -1) { mothurOut("Error: Could not find sequence in chimeraSlayer."); mothurOutEndLine(); return NULL; }
-               
-               temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
-               
-               return temp;
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSlayer", "getSequence");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
+void ChimeraSlayer::printBlock(data_struct data, ostream& out){
         try {
                 
                 out << "parentA: " << data.parentA.getName() << "  parentB: " << data.parentB.getName()  << endl;
-               out << "Left Window: " << spotMap[i][data.winLStart] << " " << spotMap[i][data.winLEnd] << endl;
-               out << "Right Window: " << spotMap[i][data.winRStart] << " " << spotMap[i][data.winREnd] << endl;
+               out << "Left Window: " << spotMap[data.winLStart] << " " << spotMap[data.winLEnd] << endl;
+               out << "Right Window: " << spotMap[data.winRStart] << " " << spotMap[data.winREnd] << endl;
                 
                 out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
                 out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
@@ -263,6 +178,8 @@ void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
                 out << "Similarity of query to parentA: " << data.qa << endl;
                 out << "Similarity of query to parentB: " << data.qb << endl;
                 
+               out << "Percent_ID QLA_QRB: " << data.qla_qrb << endl;
+               out << "Percent_ID QLB_QRA: " << data.qlb_qra << endl;
                 //out << "Per_id(QL,A): " << data.qla << endl;
                 //out << "Per_id(QL,B): " << data.qlb << endl;
                 //out << "Per_id(QR,A): " << data.qra << endl;
diff --git a/chimeraslayer.h b/chimeraslayer.h

index 5343ec2a58c8a043ae23ac7a58dbeae56744c1b6..4095b86a4b0855e31b05bbfd775504801cc4ed1f 100644 (file)
--- a/chimeraslayer.h
+++ b/chimeraslayer.h
@@ -23,32 +23,25 @@
  class ChimeraSlayer : public Chimera {
         
         public:
-               ChimeraSlayer(string, string);  
+               ChimeraSlayer(string);  
                 ~ChimeraSlayer();
                 
-               int getChimeras();
+               int getChimeras(Sequence*);
                 void print(ostream&);
-               
-               void setCons(string){};
-               void setQuantiles(string q) {};
-               
+               void printHeader(ostream&);
                 
         private:
+               Sequence* querySeq;
                 DeCalculator* decalc;
                 Maligner* maligner;
                 Slayer* slayer;
-               vector<linePair*> lines;
-               vector<Sequence*> querySeqs;
-               vector<Sequence*> templateSeqs;
-               vector< map<int, int> > spotMap;
+               map<int, int>  spotMap;
                 
-               vector< vector<data_struct> > chimeraResults;
-               vector<string> chimeraFlags;
-                               
-               string fastafile, templateFile;
-               
-               Sequence* getSequence(string);  //find sequence from name
-               void printBlock(data_struct, ostream&, int i);
+               vector<data_struct>  chimeraResults;
+               string chimeraFlags, searchMethod;
+               string fastafile;
+       
+               void printBlock(data_struct, ostream&);
  };
  
  /************************************************************************/
diff --git a/commandfactory.cpp b/commandfactory.cpp

index c787d07e066ae08db298efb849966dcf0782113d..964847478ae6e9e42f7ebb7c8ef35b5f452ff3f5 100644 (file)
--- a/commandfactory.cpp
+++ b/commandfactory.cpp
@@ -137,6 +137,7 @@ CommandFactory::CommandFactory(){
         commands["pcoa"]                                = "pcoa";
         commands["otu.hierarchy"]               = "otu.hierarchy";
         commands["set.dir"]                             = "set.dir";
+       commands["merge.files"]                 = "merge.files";
  }
  /***********************************************************/
  
diff --git a/database.hpp b/database.hpp

index f52cdd58be86d31be87951df68d13983dc9bc921..79050777acd7d695754d7a382741538d35409b94 100644 (file)
--- a/database.hpp
+++ b/database.hpp
@@ -48,6 +48,7 @@ public:
         virtual void generateDB() = 0; 
         virtual void addSequence(Sequence) = 0;  //add sequence to search engine
         virtual vector<int> findClosestSequences(Sequence*, int) = 0;  // returns indexes of n closest sequences to query
+       virtual map<int, float> findClosest(Sequence*, int){ return results; }  // returns of n closest sequences to query and their search scores
         virtual float getSearchScore();
         virtual int getLongestBase(); 
         virtual void readKmerDB(ifstream&){};
@@ -59,6 +60,7 @@ public:
  protected:
         int numSeqs, longest;
         float searchScore;
+       map<int, float> results;
  };
  /**************************************************************************************************/
  #endif
diff --git a/decalc.cpp b/decalc.cpp

index 177636ccd1214e5ea713fcaeeb388ceb0c715b33..911efe268b6cd71817c28c7a8c008f850ff63e3e 100644 (file)
--- a/decalc.cpp
+++ b/decalc.cpp
@@ -105,7 +105,7 @@ vector<int>  DeCalculator::findWindows(Sequence* query, int front, int back, int
                 if (size == 0) {  if (cutoff > 1200) {  size = 300; }
                                                         else{  size = (cutoff / 4); }  } 
                 else if (size > (cutoff / 4)) { 
-                               mothurOut("You have selected to large a window size for sequence " + query->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
+                               mothurOut("You have selected too large a window size for sequence " + query->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
                                 size = (cutoff / 4); 
                 }
         
@@ -679,6 +679,7 @@ float DeCalculator::getCoef(vector<float> obs, vector<float> qav) {
         }
  }
  //***************************************************************************************************************
+//gets closest matches to each end, since chimeras will most likely have different parents on each end
  vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*> db, int numWanted) {
         try {
                 
@@ -687,14 +688,29 @@ vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*
                 
                 Dist* distcalculator = new eachGapDist();
                 
-               Sequence query = *(querySeq);
+               string queryAligned = querySeq->getAligned();
+               string leftQuery = queryAligned.substr(0, (queryAligned.length() / 3)); //first 1/3 of the sequence
+               string rightQuery = queryAligned.substr(((queryAligned.length() / 3)*2)); //last 1/3 of the sequence
                 
+               Sequence queryLeft(querySeq->getName(), leftQuery);
+               Sequence queryRight(querySeq->getName(), rightQuery);
+                               
                 for(int j = 0; j < db.size(); j++){
                         
-                       Sequence temp = *(db[j]);
+                       string dbAligned = db[j]->getAligned();
+                       string leftDB = dbAligned.substr(0, (dbAligned.length() / 3)); //first 1/3 of the sequence
+                       string rightDB = dbAligned.substr(((dbAligned.length() / 3)*2)); //last 1/3 of the sequence
+                       
+                       Sequence dbLeft(db[j]->getName(), leftDB);
+                       Sequence dbRight(db[j]->getName(), rightDB);
+                       
+                       distcalculator->calcDist(queryLeft, dbLeft);
+                       float distLeft = distcalculator->getDist();
+                       
+                       distcalculator->calcDist(queryRight, dbRight);
+                       float distRight = distcalculator->getDist();
                         
-                       distcalculator->calcDist(query, temp);
-                       float dist = distcalculator->getDist();
+                       float dist = min(distLeft, distRight); //keep smallest distance
                         
                         SeqDist subject;
                         subject.seq = db[j];
@@ -720,7 +736,7 @@ vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*
         }
  }
  /***************************************************************************************************************/
-void DeCalculator::trimSeqs(Sequence* query, vector<Sequence*> topMatches) {
+map<int, int> DeCalculator::trimSeqs(Sequence* query, vector<Sequence*> topMatches) {
         try {
                 
                 int frontPos = 0;  //should contain first position in all seqs that is not a gap character
@@ -809,6 +825,13 @@ void DeCalculator::trimSeqs(Sequence* query, vector<Sequence*> topMatches) {
                         topMatches[i]->setAligned(newAligned);
                 }
                 
+               map<int, int> trimmedPos;
+               
+               for (int i = 0; i < newAligned.length(); i++) {
+                       trimmedPos[i] = i+frontPos;
+               }
+               
+               return trimmedPos;
         }
         catch(exception& e) {
                 errorOut(e, "DeCalculator", "trimSequences");
diff --git a/decalc.h b/decalc.h

index 5f0a82025c557a75c3adc4715526fa17bd5fe5a5..011993652618dc38294ade4638ad0657238b9377 100644 (file)
--- a/decalc.h
+++ b/decalc.h
@@ -45,7 +45,7 @@ class DeCalculator {
                 void setAlignmentLength(int l) {  alignLength = l;  }
                 void runMask(Sequence*);
                 void trimSeqs(Sequence*, Sequence*, map<int, int>&);
-               void trimSeqs(Sequence*, vector<Sequence*>);
+               map<int, int> trimSeqs(Sequence*, vector<Sequence*>);
                 void removeObviousOutliers(vector< vector<quanMember> >&, int);
                 vector<float> calcFreq(vector<Sequence*>, string);
                 vector<int> findWindows(Sequence*, int, int, int&, int);
diff --git a/engine.cpp b/engine.cpp

index 593c2c3128d08cfe8abddfa81a1e384a1d17aa62..18e1a595c06343a521d9bcd6458b95db4d863db7 100644 (file)
--- a/engine.cpp
+++ b/engine.cpp
@@ -86,19 +86,27 @@ string Engine::getCommand()  {
                         #ifdef USE_READLINE
                                 char* nextCommand = NULL;
                                 nextCommand = readline("mothur > ");
-                               if(nextCommand != NULL) {  add_history(nextCommand);  }         
+                               
+                               if(nextCommand != NULL) {  add_history(nextCommand);  } 
+                               else{ //^D causes null string and we want it to quit mothur
+                                       nextCommand = "quit"; 
+                                       cout << nextCommand << endl;
+                               }       
+                               
                                 mothurOutJustToLog("mothur > " + toString(nextCommand));
                                 return nextCommand;
                         #else
                                 string nextCommand = "";
                                 mothurOut("mothur > ");
                                 getline(cin, nextCommand);
+                               mothurOutJustToLog("mothur > " + toString(nextCommand));
                                 return nextCommand;
                         #endif
                 #else
                         string nextCommand = "";
                         mothurOut("mothur > ");
                         getline(cin, nextCommand);
+                       mothurOutJustToLog("mothur > " + toString(nextCommand));
                         return nextCommand;
                 #endif
                 
diff --git a/globaldata.cpp b/globaldata.cpp

index c2871694e2feaa9219ed5d31575e57a84a3022dc..8f8d1ad5da1addc5dac09f68c6dec5a5b99c1568 100644 (file)
--- a/globaldata.cpp
+++ b/globaldata.cpp
@@ -123,6 +123,7 @@ void GlobalData::newRead() {
                         labels.clear(); Groups.clear();
                         allLines = 1;
                         runParse = true;
+                       names.clear();
         }
         catch(exception& e) {
                 errorOut(e, "GlobalData", "newRead");
diff --git a/maligner.cpp b/maligner.cpp

index 26ac563032bd93c8f46810d84b1c1a1ceb16c231..de6004d7e387d0e241dfbc205695d359dbcf6dda 100644 (file)
--- a/maligner.cpp
+++ b/maligner.cpp
@@ -8,12 +8,14 @@
   */
  
  #include "maligner.h"
+#include "database.hpp"
+#include "blastdb.hpp"
  
  /***********************************************************************/
-Maligner::Maligner(vector<Sequence*> temp, int num, int match, int misMatch, float div, int minCov) :
-               db(temp), numWanted(num), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minCoverage(minCov) {}
+Maligner::Maligner(vector<Sequence*> temp, int num, int match, int misMatch, float div, int ms, int minCov, string mode) :
+               db(temp), numWanted(num), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov), searchMethod(mode) {}
  /***********************************************************************/
-string Maligner::getResults(Sequence* q) {
+string Maligner::getResults(Sequence* q, DeCalculator* decalc) {
         try {
                 
                 outputResults.clear();
@@ -22,14 +24,36 @@ string Maligner::getResults(Sequence* q) {
                 query = new Sequence(q->getName(), q->getAligned());
                 
                 string chimera;
-       
-               decalc = new DeCalculator();
                 
-               //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
-               refSeqs = decalc->findClosest(query, db, numWanted);
-       
-               refSeqs = minCoverageFilter(refSeqs);
+               if (searchMethod != "blast") {
+                       //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
+                       refSeqs = decalc->findClosest(query, db, numWanted);
+               }else{
+                       refSeqs = getBlastSeqs(query, numWanted);
+               }
                 
+//ofstream out;
+//string name = toString(numi+1); 
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(0, name.find_last_of("|"));
+//cout << name << endl;
+//string filename = name + ".seqsformaligner";
+//openOutputFile(filename, out);       
+//for (int u = 0; u < refSeqs.size(); u++) {  refSeqs[u]->printSequence(out);  }
+//out.close();
+//filename = name + ".fasta";
+//openOutputFile(filename, out);       
+//query->printSequence(out);
+//out.close();
+
+//for (int i = 0; i < refSeqs.size(); i++)  { cout << refSeqs[i]->getName() << endl;  }        
+//cout << "before = " << refSeqs.size() << endl;
+               refSeqs = minCoverageFilter(refSeqs);
+//cout << "after = " << refSeqs.size() << endl;                
                 if (refSeqs.size() < 2)  { 
                         for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
                         percentIdenticalQueryChimera = 0.0;
@@ -37,9 +61,30 @@ string Maligner::getResults(Sequence* q) {
                 }
                 
                 int chimeraPenalty = computeChimeraPenalty();
-       
+//cout << chimeraPenalty << endl;              
+               //fills outputResults
+               chimera = chimeraMaligner(chimeraPenalty, decalc);
+               
+                               
+               //free memory
+               delete query;
+               for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
+               
+               return chimera;
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "getResults");
+               exit(1);
+       }
+}
+/***********************************************************************/
+string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) {
+       try {
+               
+               string chimera;
+               
                 //trims seqs to first non gap char in all seqs and last non gap char in all seqs
-               decalc->trimSeqs(query, refSeqs);
+               spotMap = decalc->trimSeqs(query, refSeqs);
                 
                 vector<Sequence*> temp = refSeqs;
                 temp.push_back(query);
@@ -67,8 +112,6 @@ string Maligner::getResults(Sequence* q) {
                 
                 percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq);
                 
-               delete decalc;
-               
                 //save output results
                 for (int i = 0; i < trace.size(); i++) {
                         int regionStart = trace[i].col;
@@ -78,6 +121,8 @@ string Maligner::getResults(Sequence* q) {
                         results temp;
                         
                         temp.parent = refSeqs[seqIndex]->getName();
+                       temp.nastRegionStart = spotMap[regionStart];
+                       temp.nastRegionEnd = spotMap[regionEnd];
                         temp.regionStart = regionStart;
                         temp.regionEnd = regionEnd;
                         
@@ -97,15 +142,11 @@ string Maligner::getResults(Sequence* q) {
                 
                         outputResults.push_back(temp);
                 }
-               
-               //free memory
-               delete query;
-               for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
-               
+
                 return chimera;
         }
         catch(exception& e) {
-               errorOut(e, "Maligner", "getResults");
+               errorOut(e, "Maligner", "chimeraMaligner");
                 exit(1);
         }
  }
@@ -193,21 +234,27 @@ void Maligner::verticalFilter(vector<Sequence*> seqs) {
                         if(gaps[i] == seqs.size())      {       filterString[i] = '0';  numColRemoved++;  }
                 }
                 
+               map<int, int> newMap;
                 //for each sequence
                 for (int i = 0; i < seqs.size(); i++) {
                 
                         string seqAligned = seqs[i]->getAligned();
                         string newAligned = "";
+                       int count = 0;
                         
                         for (int j = 0; j < seqAligned.length(); j++) {
                                 //if this spot is not a gap
-                               if (filterString[j] == '1') { newAligned += seqAligned[j]; }
+                               if (filterString[j] == '1') { 
+                                       newAligned += seqAligned[j]; 
+                                       newMap[count] = spotMap[j];
+                                       count++;
+                               }
                         }
                         
                         seqs[i]->setAligned(newAligned);
                 }
  
-       
+               spotMap = newMap;
         }
         catch(exception& e) {
                 errorOut(e, "Maligner", "verticalFilter");
@@ -452,4 +499,78 @@ float Maligner::computePercentID(string queryAlign, string chimera) {
         }
  }
  //***************************************************************************************************************
+vector<Sequence*> Maligner::getBlastSeqs(Sequence* q, int num) {
+       try {
+cout << q->getName() << endl;  
+               //generate blastdb
+               Database* database = new BlastDB(-2.0, -1.0, matchScore, misMatchPenalty);
+               for (int i = 0; i < db.size(); i++) {   database->addSequence(*db[i]);  }
+               database->generateDB();
+               database->setNumSeqs(db.size());
+               
+               //get parts of query
+               string queryAligned = q->getAligned();
+               string leftQuery = queryAligned.substr(0, (queryAligned.length() / 3)); //first 1/3 of the sequence
+               string rightQuery = queryAligned.substr(((queryAligned.length() / 3)*2)); //last 1/3 of the sequence
+               
+               Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
+               Sequence* queryRight = new Sequence(q->getName(), rightQuery);
+               
+               map<int, float> tempIndexesRight = database->findClosest(queryRight, num);
+               map<int, float> tempIndexesLeft = database->findClosest(queryLeft, num);
+               
+               //merge results
+               vector<rank> mergedResults;
+               
+               map<int, float>::iterator it;
+               map<int, float>::iterator it2;
+               
+               //add in right guys merging common finds
+               for (it = tempIndexesRight.begin(); it != tempIndexesRight.end(); it++) {
+                       it2 = tempIndexesLeft.find(it->first);
+                       
+                       if (it2 == tempIndexesLeft.end()) {  //result only present in right
+                               rank temp(it->first, it->second);
+                               mergedResults.push_back(temp); 
+
+                       }else { //result present in both save best score
+                               float bestscore;
+                               if (it->second > it2->second)   {  bestscore = it->second;  }
+                               else                                                    {  bestscore = it2->second; }
+                               
+                               rank temp(it->first, bestscore);
+                               mergedResults.push_back(temp);
+                               
+                               tempIndexesLeft.erase(it2);
+                       }
+               }
+               
+               //add in unique left guys
+               for (it = tempIndexesLeft.begin(); it != tempIndexesLeft.end(); it++) {
+                       rank temp(it->first, it->second);
+                       mergedResults.push_back(temp); 
+               }
+               
+               sort(mergedResults.begin(), mergedResults.end(), compareMembers);
+               
+               vector<Sequence*> refResults;
+               for (int i = 0; i < numWanted; i++) {
+                       Sequence* temp = new Sequence(db[mergedResults[i].num]->getName(), db[mergedResults[i].num]->getAligned());
+                       refResults.push_back(temp);
+cout << db[mergedResults[i].num]->getName() << endl;
+               }
+               
+               delete queryRight;
+               delete queryLeft;
+               delete database;
+               
+               return refResults;
+       }
+       catch(exception& e) {
+               errorOut(e, "Maligner", "getBlastSeqs");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
  
diff --git a/maligner.h b/maligner.h

index 6277e7300a02fbe815468e223298f5f969813a51..abe18a3cb1ad6c6b65d41077b8dd1fb56d802f96 100644 (file)
--- a/maligner.h
+++ b/maligner.h
@@ -10,53 +10,32 @@
   */
   
  #include "decalc.h"
+#include "chimera.h"
  
  /***********************************************************************/
  //This class was modeled after the chimeraMaligner written by the Broad Institute
-/***********************************************************************/
-struct score_struct {
-       int prev;
-       int score;
-       int row;
-       int col;
-};
-/***********************************************************************/
-struct trace_struct {
-       int col;
-       int oldCol;
-       int row;
-};
-/***********************************************************************/
-struct results {
-       int regionStart;
-       int regionEnd;
-       string parent;
-       float queryToParent;
-       float queryToParentLocal;
-       float divR;
-};
-
  /**********************************************************************/
  class Maligner {
  
         public:
                 
-               Maligner(vector<Sequence*>, int, int, int, float, int);
+               Maligner(vector<Sequence*>, int, int, int, float, int, int, string);
                 ~Maligner() {};
                 
-               string getResults(Sequence*);
+               string getResults(Sequence*, DeCalculator*);
                 float getPercentID() {  return percentIdenticalQueryChimera;    }
                 vector<results> getOutput()  {  return outputResults;                   }
                 
                                 
         private:
-               DeCalculator* decalc;
                 Sequence* query;
                 vector<Sequence*> refSeqs;
                 vector<Sequence*> db;
-               int numWanted, matchScore, misMatchPenalty, minCoverage;
+               int numWanted, matchScore, misMatchPenalty, minCoverage, minSimilarity;
+               string searchMethod;
                 float minDivR, percentIdenticalQueryChimera;
                 vector<results> outputResults;
+               map<int, int> spotMap;
                 
                 vector<Sequence*> minCoverageFilter(vector<Sequence*>);  //removes top matches that do not have minimum coverage with query.
                 int computeChimeraPenalty();
@@ -68,6 +47,8 @@ class Maligner {
                 vector<trace_struct> mapTraceRegionsToAlignment(vector<score_struct>, vector<Sequence*>);
                 string constructChimericSeq(vector<trace_struct>, vector<Sequence*>);
                 float computePercentID(string, string);
+               string chimeraMaligner(int, DeCalculator*);
+               vector<Sequence*> getBlastSeqs(Sequence*, int);
                 
  };
  
diff --git a/mergefilecommand.cpp b/mergefilecommand.cpp

index d5973394e5a097e879bf1526689d137266a417c1..0a8c0d8cda62808664a83f46c44cebb770e14f4a 100644 (file)
--- a/mergefilecommand.cpp
+++ b/mergefilecommand.cpp
@@ -96,7 +96,11 @@ int MergeFileCommand::execute(){
                         
                         openInputFile(fileNames[i], inputFile);
                         
-                       while(!inputFile.eof()){        c = inputFile.get(); outputFile << c;   }
+                       while(!inputFile.eof()){        
+                               c = inputFile.get(); 
+                               //-1 is eof char
+                               if (int(c) != -1) { outputFile << c; }   
+                       }
                         
                         inputFile.close();
                 }
diff --git a/mothur.cpp b/mothur.cpp

index b71057a24550aaad2b07bc1d7794a6710191409e..8203c5716f02122ed77feef18f31428c23148552 100644 (file)
--- a/mothur.cpp
+++ b/mothur.cpp
@@ -41,7 +41,11 @@ int main(int argc, char *argv[]){
                         mothurOutJustToLog("Windows version");
                         mothurOutEndLine(); mothurOutEndLine();
                 #endif          
-
+               
+               #ifdef USE_READLINE
+                       mothurOutJustToLog("Using ReadLine");
+                       mothurOutEndLine(); mothurOutEndLine();
+               #endif
                 
                 //header
                 mothurOut("mothur v.1.8");
diff --git a/mothur.h b/mothur.h

index 8190152b8be3c15ee0a3559b93d846174d6c992d..5f8a2fe75b41d57e345d50cb63606921c866ff67 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -831,6 +831,21 @@ inline bool inUsersGroups(string groupname, vector<string> Groups) {
                 exit(1);
         }
  }
+/**************************************************************************************************/
+//returns true if any of the strings in first vector are in second vector
+inline bool inUsersGroups(vector<string> groupnames, vector<string> Groups) {
+       try {
+               
+               for (int i = 0; i < groupnames.size(); i++) {
+                       if (inUsersGroups(groupnames[i], Groups)) { return true; }
+               }
+               return false;
+       }
+       catch(exception& e) {
+               errorOut(e, "mothur", "inUsersGroups");
+               exit(1);
+       }
+}
  
  /**************************************************************************************************/
  
diff --git a/nast.cpp b/nast.cpp

index 6a4ae3db2f679ee4ccadf2b0ff3270f5da4f15aa..c91c9fab537172b3e02808b700af881affffc368 100644 (file)
--- a/nast.cpp
+++ b/nast.cpp
@@ -26,7 +26,6 @@ Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method
                 maxInsertLength = 0;
                 pairwiseAlignSeqs();    //      This is part A in Fig. 2 of DeSantis et al.
                 regapSequences();               //      This is parts B-F in Fig. 2 of DeSantis et al.
-
         }
         catch(exception& e) {
                 errorOut(e, "Nast", "Nast");
@@ -41,7 +40,7 @@ void Nast::pairwiseAlignSeqs(){       //      Here we call one of the pairwise alignment me
         try {
  
                 alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
-       
+
                 string candAln = alignment->getSeqAAln();
                 string tempAln = alignment->getSeqBAln();
         
diff --git a/otuhierarchycommand.cpp b/otuhierarchycommand.cpp

index bc87211e0bde9a4581119a91592683e34262d006..c39a638cf11375644a82e4d56c3c5659c56b35ee 100644 (file)
--- a/otuhierarchycommand.cpp
+++ b/otuhierarchycommand.cpp
@@ -18,7 +18,7 @@ OtuHierarchyCommand::OtuHierarchyCommand(string option){
                 
                 else {
                         //valid paramters for this command
-                       string Array[] =  {"list","label","outputdir","inputdir"};
+                       string Array[] =  {"list","label","output","outputdir","inputdir"};
                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -63,7 +63,11 @@ OtuHierarchyCommand::OtuHierarchyCommand(string option){
                         else { 
                                 splitAtDash(label, labels);
                                 if (labels.size() != 2) { mothurOut("You must provide 2 labels."); mothurOutEndLine(); abort = true; }
-                       }                               
+                       }       
+                       
+                       output = validParameter.validFile(parameters, "output", false);                 if (output == "not found") { output = "name"; }
+                       
+                       if ((output != "name") && (output != "number")) { mothurOut("output options are name and number. I will use name."); mothurOutEndLine(); output = "name"; }
                 }
                 
         }
@@ -77,7 +81,8 @@ OtuHierarchyCommand::OtuHierarchyCommand(string option){
  void OtuHierarchyCommand::help(){
         try {
                 mothurOut("The otu.hierarchy command is used to see how otus relate at two distances. \n");
-               mothurOut("The otu.hierarchy command parameters are list and label.  Both parameters are required. \n");
+               mothurOut("The otu.hierarchy command parameters are list, label and output.  list and label parameters are required. \n");
+               mothurOut("The output parameter allows you to output the names of the sequence in the OTUs or the OTU numbers. Options are name and number, default is name. \n");
                 mothurOut("The otu.hierarchy command should be in the following format: \n");
                 mothurOut("otu.hierarchy(list=yourListFile, label=yourLabels).\n");
                 mothurOut("Example otu.hierarchy(list=amazon.fn.list, label=0.01-0.03).\n");
@@ -140,7 +145,8 @@ int OtuHierarchyCommand::execute(){
                         string names = lists[1].get(i);
                         
                         //output column 1
-                       out << names << '\t';
+                       if (output == "name")   {   out << names << '\t';       }
+                       else                                    {       out << i << '\t';               }
                         
                         map<int, int> bins; //bin numbers in little that are in this bin in big
                         map<int, int>::iterator it;
@@ -157,7 +163,8 @@ int OtuHierarchyCommand::execute(){
                         
                         string col2 = "";
                         for (it = bins.begin(); it != bins.end(); it++) {
-                               col2 += lists[0].get(it->first) + "\t";
+                               if (output == "name")   {   col2 += lists[0].get(it->first) + "\t";     }
+                               else                                    {       col2 += toString(it->first) + "\t";             }
                         }
                         
                         //output column 2
diff --git a/otuhierarchycommand.h b/otuhierarchycommand.h

index c6749ef3841800971f82f5a01c54d76de713b096..1cd427f1a7af8442d4747e65662bbf431ccdcbbd 100644 (file)
--- a/otuhierarchycommand.h
+++ b/otuhierarchycommand.h
@@ -25,7 +25,7 @@ public:
  private:
         bool abort;
         set<string> labels; //holds labels to be used
-       string label, listFile, outputDir;
+       string label, listFile, outputDir, output;
         
         vector<ListVector> getListVectors();
                 
diff --git a/pintail.cpp b/pintail.cpp

index 6adfdf91ebeaf30847bee05d90f2c9b22d3c5946..74e1ce47a80e60ec4c27e685f53e6dbfedfb6da9 100644 (file)
--- a/pintail.cpp
+++ b/pintail.cpp
@@ -18,106 +18,39 @@ inline bool compareQuanMembers(quanMember left, quanMember right){
  } 
  //***************************************************************************************************************
  
-Pintail::Pintail(string filename, string temp, string o) {  fastafile = filename;  templateFile = temp; outputDir = o; }
+Pintail::Pintail(string filename, string o) {  
+       fastafile = filename;  outputDir = o; 
+       distcalculator = new eachGapDist();
+       decalc = new DeCalculator();
+}
  //***************************************************************************************************************
  
  Pintail::~Pintail() {
         try {
-               for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
-               for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
-               for (int i = 0; i < bestfit.size(); i++)                {  delete bestfit[i];           }  
-       }
-       catch(exception& e) {
-               errorOut(e, "Pintail", "~Pintail");
-               exit(1);
-       }
-}      
-//***************************************************************************************************************
-void Pintail::print(ostream& out) {
-       try {
-               
-               mothurOutEndLine();
                 
-               for (int i = 0; i < querySeqs.size(); i++) {
-                       
-                       int index = ceil(deviation[i]);
-                                               
-                       //is your DE value higher than the 95%
-                       string chimera;
-                       if (quantiles[index][4] == 0.0) {
-                               chimera = "Your template does not include sequences that provide quantile values at distance " + toString(index);
-                       }else {
-                               if (DE[i] > quantiles[index][4])                {       chimera = "Yes";        }
-                               else                                                                    {       chimera = "No";         }
-                       }
-                       
-                       out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
-                       if (chimera == "Yes") {
-                               mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
-                       }
-                       out << "Observed\t";
-                       
-                       for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
-                       out << endl;
-                       
-                       out << "Expected\t";
-                       
-                       for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
-                       out << endl;
-                       
-               }
+               delete distcalculator;
+               delete decalc; 
         }
         catch(exception& e) {
-               errorOut(e, "Pintail", "print");
+               errorOut(e, "Pintail", "~Pintail");
                 exit(1);
         }
  }
-
  //***************************************************************************************************************
-int Pintail::getChimeras() {
+void Pintail::doPrep() {
         try {
-               
-               //read in query sequences and subject sequences
-               mothurOut("Reading sequences and template file... "); cout.flush();
-               querySeqs = readSeqs(fastafile);
-               templateSeqs = readSeqs(templateFile);
-               mothurOut("Done."); mothurOutEndLine();
-               
-               int numSeqs = querySeqs.size();
-               
-               if (unaligned) { mothurOut("Your sequences need to be aligned when you use the pintail method."); mothurOutEndLine(); return 1;  }
-               
-               obsDistance.resize(numSeqs);
-               expectedDistance.resize(numSeqs);
-               seqCoef.resize(numSeqs);
-               DE.resize(numSeqs);
-               Qav.resize(numSeqs);
-               bestfit.resize(numSeqs);
-               deviation.resize(numSeqs);
-               trimmed.resize(numSeqs);
-               windowSizes.resize(numSeqs, window);
+       
                 windowSizesTemplate.resize(templateSeqs.size(), window);
-               windowsForeachQuery.resize(numSeqs);
-               h.resize(numSeqs);
                 quantiles.resize(100);  //one for every percent mismatch
                 quantilesMembers.resize(100);  //one for every percent mismatch
                 
-               //break up file if needed
-               int linesPerProcess = numSeqs / processors ;
+               //if the user does not enter a mask then you want to keep all the spots in the alignment
+               if (seqMask.length() == 0)      {       decalc->setAlignmentLength(templateSeqs[0]->getAligned().length());     }
+               else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
+               
+               decalc->setMask(seqMask);
                 
                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       //find breakup of sequences for all times we will Parallelize
-                       if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
-                       else {
-                               //fill line pairs
-                               for (int i = 0; i < (processors-1); i++) {                      
-                                       lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
-                               }
-                               //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
-                               int i = processors - 1;
-                               lines.push_back(new linePair((i*linesPerProcess), numSeqs));
-                       }
-                       
                         //find breakup of templatefile for quantiles
                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
                         else { 
@@ -128,149 +61,38 @@ int Pintail::getChimeras() {
                                 }
                         }
                 #else
-                       lines.push_back(new linePair(0, numSeqs));
                         templateLines.push_back(new linePair(0, templateSeqs.size()));
                 #endif
-               
-               distcalculator = new eachGapDist();
-               decalc = new DeCalculator();
-               
-               //if the user does enter a mask then you want to keep all the spots in the alignment
-               if (seqMask.length() == 0)      {       decalc->setAlignmentLength(querySeqs[0]->getAligned().length());        }
-               else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
-               
-               decalc->setMask(seqMask);
-               
-               //find pairs
-               if (processors == 1) { 
-                       mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
-                       bestfit = findPairs(lines[0]->start, lines[0]->end);
-                       mothurOut("Done."); mothurOutEndLine();
-               }else {         createProcessesPairs();         }
-               
-//string o = "closestmatch.eachgap.fasta";
-//ofstream out7;
-//openOutputFile(o, out7);
  
-//for (int i = 0; i < bestfit.size(); i++) {
-       //out7 << ">" << querySeqs[i]->getName() << "-"<< bestfit[i]->getName() << endl;
-       //out7 << bestfit[i]->getAligned() << endl;
-//}            
-//out7.close();        
-               //find P
+               
                 mothurOut("Getting conservation... "); cout.flush();
                 if (consfile == "") { 
                         mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command.  Providing the .freq file will improve speed.    "); cout.flush();
-                       probabilityProfile = decalc->calcFreq(templateSeqs, outputDir + getSimpleName(templateFile)); 
+                       probabilityProfile = decalc->calcFreq(templateSeqs, outputDir + getSimpleName(templateFileName)); 
                         mothurOut("Done."); mothurOutEndLine();
-               }else                           {   probabilityProfile = readFreq();                      }
-
-               //make P into Q
-               for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
-               mothurOut("Done."); mothurOutEndLine();
-               
-               //mask sequences if the user wants to 
-               if (seqMask != "") {
-                       //mask querys
-                       for (int i = 0; i < querySeqs.size(); i++) {
-                               decalc->runMask(querySeqs[i]);
-                       }
-               
-                       //mask templates
-                       for (int i = 0; i < templateSeqs.size(); i++) {
-                               decalc->runMask(templateSeqs[i]);
-                       }
-                       
-                       for (int i = 0; i < bestfit.size(); i++) { 
-                               decalc->runMask(bestfit[i]);
-                       }
-
-               }
-               
-               if (filter) {
-                       vector<Sequence*> temp = templateSeqs;
-                       for (int i = 0; i < querySeqs.size(); i++) { temp.push_back(querySeqs[i]);  }
-                       
-                       createFilter(temp);
-                       
-                       runFilter(querySeqs);
-                       runFilter(templateSeqs);
-                       runFilter(bestfit);
-               }
-                                                                               
-                                                                                                                                               
-               if (processors == 1) { 
-       
-                       for (int j = 0; j < bestfit.size(); j++) { 
-                               decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
-                       }
-                       
-                       mothurOut("Finding window breaks... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               it = trimmed[i].begin();
-                               vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
-                               windowsForeachQuery[i] = win;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-               
-               }else {         createProcessesSpots();         }
-               
-               if (processors == 1) { 
-                                               
-                       mothurOut("Calculating observed distance... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
+               }else                           {   probabilityProfile = readFreq();    mothurOut("Done.");               }
+               mothurOutEndLine();
         
-                               obsDistance[i] = obsi;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       mothurOut("Finding variability... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
-
-                               Qav[i] = q;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       mothurOut("Calculating alpha... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
-                               seqCoef[i] = alpha;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-               
-               
-                       mothurOut("Calculating expected distance... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
-                               expectedDistance[i] = exp;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       mothurOut("Finding deviation... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
-                               DE[i] = de;
-                       
-                               it = trimmed[i].begin();
-                               float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
-                               deviation[i] = dist;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-               } 
-               else {          createProcesses();              }
-               
                 //quantiles are used to determine whether the de values found indicate a chimera
                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
                 //combination of sequences in the template
                 if (quanfile != "") {  quantiles = readQuantiles();  }
                 else {
                         
+                       //if user has not provided the quantiles and wants the mask we need to mask, but then delete and reread or we will mess up the best match process in getChimeras
+                       if (seqMask != "") {
+                               //mask templates
+                               for (int i = 0; i < templateSeqs.size(); i++) {
+                                       decalc->runMask(templateSeqs[i]);
+                               }
+                       }
+                       
+                       if (filter) {
+                               createFilter(templateSeqs);
+                               for (int i = 0; i < templateSeqs.size(); i++) {  runFilter(templateSeqs[i]);  }
+                       }
+
+                       
                         mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
                         if (processors == 1) { 
                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
@@ -281,58 +103,16 @@ int Pintail::getChimeras() {
                         string noOutliers, outliers;
                         
                         if ((!filter) && (seqMask == "")) {
-                               noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.quan";
+                               noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.quan";
                         }else if ((filter) && (seqMask == "")) { 
-                               noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.filtered.quan";
+                               noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.filtered.quan";
                         }else if ((!filter) && (seqMask != "")) { 
-                               noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.masked.quan";
+                               noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.masked.quan";
                         }else if ((filter) && (seqMask != "")) { 
-                               noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.filtered.masked.quan";
+                               noOutliers = outputDir + getRootName(getSimpleName(templateFileName)) + "pintail.filtered.masked.quan";
                         }
  
-                       //outliers = getRootName(templateFile) + "pintail.quanYESOUTLIERS";
-                       
-                       /*openOutputFile(outliers, out4);
-                       
-                       //adjust quantiles
-                       for (int i = 0; i < quantilesMembers.size(); i++) {
-                               vector<float> temp;
-                               
-                               if (quantilesMembers[i].size() == 0) {
-                                       //in case this is not a distance found in your template files
-                                       for (int g = 0; g < 6; g++) {
-                                               temp.push_back(0.0);
-                                       }
-                               }else{
-                                       
-                                       sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
-                                       
-                                       //save 10%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
-                                       //save 25%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
-                                       //save 50%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
-                                       //save 75%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
-                                       //save 95%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
-                                       //save 99%
-                                       temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
-                                       
-                               }
-                               
-                               //output quan value
-                               out4 << i+1 << '\t';                            
-                               for (int u = 0; u < temp.size(); u++) {   out4 << temp[u] << '\t'; }
-                               out4 << endl;
-                               
-                               quantiles[i] = temp;
-                               
-                       }
-                       
-                       out4.close();*/
-                       
+                                               
                         decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
                         
                         openOutputFile(noOutliers, out5);
@@ -376,15 +156,112 @@ int Pintail::getChimeras() {
  
                         mothurOut("Done."); mothurOutEndLine();
                         
-               }
+                       //if mask reread template
+                       if ((seqMask != "") || (filter)) {
+                               for (int i = 0; i < templateSeqs.size(); i++)           {  delete templateSeqs[i];              }
+                               templateSeqs = readSeqs(templateFileName);
+                       }
                         
+               }
+               
                 //free memory
-               for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
-               for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
-                       
-               delete distcalculator;
-               delete decalc;
+               for (int i = 0; i < templateLines.size(); i++) { delete templateLines[i];  }
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "doPrep");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+void Pintail::print(ostream& out) {
+       try {
+               int index = ceil(deviation);
+               
+               //is your DE value higher than the 95%
+               string chimera;
+               if (index != 0) {  //if index is 0 then its an exact match to a template seq
+                       if (quantiles[index][4] == 0.0) {
+                               chimera = "Your template does not include sequences that provide quantile values at distance " + toString(index);
+                       }else {
+                               if (DE > quantiles[index][4])           {       chimera = "Yes";        }
+                               else                                                            {       chimera = "No";         }
+                       }
+               }else{ chimera = "No";          }
+               
+               out << querySeq->getName() << '\t' << "div: " << deviation << "\tstDev: " << DE << "\tchimera flag: " << chimera << endl;
+               if (chimera == "Yes") {
+                       mothurOut(querySeq->getName() + "\tdiv: " + toString(deviation) + "\tstDev: " + toString(DE) + "\tchimera flag: " + chimera); mothurOutEndLine();
+               }
+               out << "Observed\t";
+               
+               for (int j = 0; j < obsDistance.size(); j++) {  out << obsDistance[j] << '\t';  }
+               out << endl;
+               
+               out << "Expected\t";
+               
+               for (int m = 0; m < expectedDistance.size(); m++) {  out << expectedDistance[m] << '\t';  }
+               out << endl;
+               
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "print");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+int Pintail::getChimeras(Sequence* query) {
+       try {
+               querySeq = query;
+               trimmed.clear();
+               windowSizes = window;
+                               
+               //find pairs
+               bestfit = findPairs(query);
+                                                       
+               //make P into Q
+               for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
+       
+               //mask sequences if the user wants to 
+               if (seqMask != "") {
+                       decalc->runMask(query);
+                       decalc->runMask(bestfit);
+               }
+               
+               if (filter) {
+                       runFilter(query);
+                       runFilter(bestfit);
+               }
+               
+               //trim seq
+               decalc->trimSeqs(query, bestfit, trimmed);  
+               
+               //find windows
+               it = trimmed.begin();
+               windowsForeachQuery = decalc->findWindows(query, it->first, it->second, windowSizes, increment);
+
+               //find observed distance
+               obsDistance = decalc->calcObserved(query, bestfit, windowsForeachQuery, windowSizes);
+                               
+               Qav = decalc->findQav(windowsForeachQuery, windowSizes, probabilityProfile);
+
+               //find alpha                    
+               seqCoef = decalc->getCoef(obsDistance, Qav);
+               
+               //calculating expected distance
+               expectedDistance = decalc->calcExpected(Qav, seqCoef);
+               
+               //finding de
+               DE = decalc->calcDE(obsDistance, expectedDistance);
+               
+               //find distance between query and closest match
+               it = trimmed.begin();
+               deviation = decalc->calcDist(query, bestfit, it->first, it->second); 
                 
+               delete bestfit;
+                                                                       
                 return 0;
         }
         catch(exception& e) {
@@ -437,16 +314,13 @@ vector<float> Pintail::readFreq() {
  
  //***************************************************************************************************************
  //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
-vector<Sequence*> Pintail::findPairs(int start, int end) {
+Sequence* Pintail::findPairs(Sequence* q) {
         try {
                 
-               vector<Sequence*> seqsMatches;  
+               Sequence* seqsMatches;  
                 
-               for(int i = start; i < end; i++){
-                       
-                       vector<Sequence*> copy = decalc->findClosest(querySeqs[i], templateSeqs, 1);
-                       seqsMatches.push_back(copy[0]);
-               }
+               vector<Sequence*> copy = decalc->findClosest(q, templateSeqs, 1);
+               seqsMatches = copy[0];
                 
                 return seqsMatches;
         
@@ -456,458 +330,7 @@ vector<Sequence*> Pintail::findPairs(int start, int end) {
                 exit(1);
         }
  }
-
-/**************************************************************************************************/
-
-void Pintail::createProcessesSpots() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                               
-                               for (int j = lines[process]->start; j < lines[process]->end; j++) {
-                               
-                                       //chops off beginning and end of sequences so they both start and end with a base
-                                       map<int, int> trim;
-
-                                       decalc->trimSeqs(querySeqs[j], bestfit[j], trim); 
-                                       trimmed[j] = trim;
-                                       
-                               }
-
-                               mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       it = trimmed[i].begin();
-                                       windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
-                               }
-                               mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output windowsForeachQuery
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << windowsForeachQuery[i].size() << '\t';
-                                       for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
-                                               out << windowsForeachQuery[i][j] << '\t';
-                                       }
-                                       out << endl;
-                               }
-                       
-                               //output windowSizes
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << windowSizes[i] << '\t';
-                               }
-                               out << endl;
-                               
-                               //output trimmed values
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       it = trimmed[i].begin();                                        
-                                       out << it->first << '\t' << it->second << endl;
-                               }
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-               
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-                       
-                       int size = lines[i]->end - lines[i]->start;
-                                       
-                       int count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               int num;
-                               in >> num;
-                       
-                               vector<int> win;  int w;
-                               for (int j = 0; j < num; j++) {
-                                       in >> w;
-                                       win.push_back(w);
-                               }
-                       
-                               windowsForeachQuery[count] = win;
-                               count++;
-                               gobble(in);
-                       }
-               
-                       gobble(in);
-                       count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               int num;
-                               in >> num;
-                               
-                               windowSizes[count] = num;
-                               count++;
-                       }
-                       
-                       gobble(in);
-                       
-                       count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               int front, back;
-                               in >> front >> back;
-                       
-                               map<int, int> t;
-                               
-                               t[front] = back;
-                               
-                               trimmed[count] = t;
-                               count++;
-                               
-                               gobble(in);
-                       }
-
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-                       
-       
-#else
-               for (int j = 0; j < bestfit.size(); j++) {
-                       //chops off beginning and end of sequences so they both start and end with a base
-                       decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
-               }
-
-               for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               it = trimmed[i].begin();
-                               vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
-                               windowsForeachQuery[i] = win;
-               }
-
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "Pintail", "createProcessesSpots");
-               exit(1);
-       }
-}
  /**************************************************************************************************/
-
-void Pintail::createProcessesPairs() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                               
-                               mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               bestfit = findPairs(lines[process]->start, lines[process]->end);
-                               mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               //output range and size
-                               out << bestfit.size() << endl;
-                               
-                               //output pairs
-                               for (int i = 0; i < bestfit.size(); i++) {
-                                       out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl;
-                               }
-                               out.close();
-                               
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-               
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-                       
-                       int size;
-                       in >> size;  gobble(in);
-                               
-                       //get pairs
-                       int count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               Sequence* temp = new Sequence(in);
-                               bestfit[count] = temp;
-                       
-                               count++;
-                               gobble(in);
-                       }
-                       
-                       in.close();
-                       remove(s.c_str());
-               }
-                       
-       
-#else
-               bestfit = findPairs(lines[0]->start, lines[0]->end);
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "Pintail", "createProcessesPairs");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-
-void Pintail::createProcesses() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                               
-                               mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       
-                                       vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
-                                       obsDistance[i] = obsi;
-                               
-                                       //calc Qav
-                                       vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
-                                       
-                                       //get alpha
-                                       float alpha = decalc->getCoef(obsDistance[i], q);
-                                       
-                                       //find expected
-                                       vector<float> exp = decalc->calcExpected(q, alpha);
-                                       expectedDistance[i] = exp;
-                                       
-                                       //get de and deviation
-                                       float dei = decalc->calcDE(obsi, exp);
-                                       DE[i] = dei;
-                                       
-                                       it = trimmed[i].begin();
-                                       float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
-                                       deviation[i] = dist;
-                               }
-                               mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
-                               
-                               //write out data to file so parent can read it
-                               ofstream out;
-                               string s = toString(getpid()) + ".temp";
-                               openOutputFile(s, out);
-                               
-                               int size = lines[process]->end - lines[process]->start;
-                               out << size << endl;
-                                                               
-                               //output observed distances
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << obsDistance[i].size() << '\t';
-                                       for (int j = 0; j < obsDistance[i].size(); j++) {
-                                               out << obsDistance[i][j] << '\t';
-                                       }
-                                       out << endl;
-                               }
-                               
-                               
-                               //output expected distances
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << expectedDistance[i].size() << '\t';
-                                       for (int j = 0; j < expectedDistance[i].size(); j++) {
-                                               out << expectedDistance[i][j] << '\t';
-                                       }
-                                       out << endl;
-                               }
-
-                       
-                               //output de values
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << DE[i] << '\t';
-                               }
-                               out << endl;    
-                               
-                               //output de values
-                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
-                                       out << deviation[i] << '\t';
-                               }
-                               out << endl;
-                               
-                               out.close();
-
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-               
-               //get data created by processes
-               for (int i=0;i<processors;i++) { 
-                       ifstream in;
-                       string s = toString(processIDS[i]) + ".temp";
-                       openInputFile(s, in);
-                       
-                       int size;
-                       in >> size;  gobble(in);
-                       
-                       //get observed distances
-                       int count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               int num;
-                               in >> num;
-                       
-                               vector<float> obs;  float w;
-                               for (int j = 0; j < num; j++) {
-                                       in >> w;
-                                       obs.push_back(w);
-                               }
-                       
-                               obsDistance[count] = obs;
-                               count++;
-                               gobble(in);
-                       }
-                       
-                       gobble(in);
-                       
-                       //get expected distances
-                       count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               int num;
-                               in >> num;
-                       
-                               vector<float> exp;  float w;
-                               for (int j = 0; j < num; j++) {
-                                       in >> w;
-                                       exp.push_back(w);
-                               }
-                       
-                               expectedDistance[count] = exp;
-                               count++;
-                               gobble(in);
-                       }
-
-                       gobble(in);
-                       
-                       count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               float num;
-                               in >> num;
-                               
-                               DE[count] = num;
-                               count++;
-                       }
-                       
-                       gobble(in);
-                       
-                       count = lines[i]->start;
-                       for (int m = 0; m < size; m++) {
-                               float num;
-                               in >> num;
-                               
-                               deviation[count] = num;
-                               count++;
-                       }
-
-                       in.close();
-                       remove(s.c_str());
-               }
-
-                               
-#else
-                       mothurOut("Calculating observed distance... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
-                               obsDistance[i] = obsi;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       
-                       mothurOut("Finding variability... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
-                               Qav[i] = q;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       
-                       mothurOut("Calculating alpha... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
-                               seqCoef.push_back(alpha);
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-               
-               
-               
-                       mothurOut("Calculating expected distance... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
-                               expectedDistance[i] = exp;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-                       
-                       
-                       
-                       mothurOut("Finding deviation... "); cout.flush();
-                       for (int i = lines[0]->start; i < lines[0]->end; i++) {
-                               float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
-                               DE[i] = de;
-                               
-                               it = trimmed[i].begin();
-                               float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
-                               deviation[i] = dist;
-                       }
-                       mothurOut("Done."); mothurOutEndLine();
-
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "Pintail", "createProcesses");
-               exit(1);
-       }
-}
-
-
-/**************************************************************************************************/
-
  void Pintail::createProcessesQuan() {
         try {
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
diff --git a/pintail.h b/pintail.h

index 26154b2403176e14691fd86248a9edc4ac03a085..ed52c3aa0d010c70bd99bb045ac3715a5d8cb94e 100644 (file)
--- a/pintail.h
+++ b/pintail.h
@@ -24,10 +24,10 @@
  class Pintail : public Chimera {
         
         public:
-               Pintail(string, string, string);        
+               Pintail(string, string);        
                 ~Pintail();
                 
-               int getChimeras();
+               int getChimeras(Sequence*);
                 void print(ostream&);
                 
                 void setCons(string c)          { consfile = c;  }
@@ -39,44 +39,39 @@ class Pintail : public Chimera {
                 Dist* distcalculator;
                 DeCalculator* decalc;
                 int iters;
-               string fastafile, templateFile, consfile;
+               string fastafile, consfile;
                 
-               
-               vector<linePair*> lines;
                 vector<linePair*> templateLines;
-               vector<Sequence*> querySeqs;
-               vector<Sequence*> templateSeqs;
-               
-               vector<Sequence*> bestfit;  //bestfit[0] matches queryseqs[0]...
+               Sequence*querySeq;
+                               
+               Sequence* bestfit;  //closest match to query in template
                 
-               vector< vector<float> > obsDistance;  //obsDistance[0] is the vector of observed distances for queryseqs[0]... 
-               vector< vector<float> > expectedDistance;  //expectedDistance[0] is the vector of expected distances for queryseqs[0]... 
-               vector<float> deviation;  //deviation[0] is the percentage of mismatched pairs over the whole seq between querySeqs[0] and its best match.
-               vector< vector<int> > windowsForeachQuery;  // windowsForeachQuery[0] is a vector containing the starting spot in queryseqs[0] aligned sequence for each window.
+               vector<float>  obsDistance;  //obsDistance is the vector of observed distances for query 
+               vector<float>  expectedDistance;  //expectedDistance is the vector of expected distances for query
+               float deviation;  //deviation is the percentage of mismatched pairs over the whole seq between query and its best match.
+               vector<int>  windowsForeachQuery;  // windowsForeachQuery is a vector containing the starting spot in query aligned sequence for each window.
                                                                                 //this is needed so you can move by bases and not just spots in the alignment
                                                                                 
-               vector<int> windowSizes;                        //windowSizes[0] = window size of querySeqs[0]
+               int windowSizes;                        //windowSizes = window size of query
                 vector<int> windowSizesTemplate;    //windowSizesTemplate[0] = window size of templateSeqs[0]
                 
-               vector< map<int, int> > trimmed;    //trimmed[0] = start and stop of trimmed sequences for querySeqs[0]
+               map<int, int> trimmed;    //trimmed = start and stop of trimmed sequences for query
                 map<int, int>::iterator it;
                 
-               vector< vector<float> > Qav;    //Qav[0] is the vector of average variablility for queryseqs[0]... 
-               vector<float>  seqCoef;                         //seqCoef[0] is the coeff for queryseqs[0]...
-               vector<float> DE;                                       //DE[0] is the deviaation for queryseqs[0]...
+               vector<float>  Qav;     //Qav is the vector of average variablility for query
+               float  seqCoef;         //seqCoef is the coeff for query
+               float DE;                       //DE is the deviaation for query
                 vector<float> probabilityProfile;
                 vector< vector<float> > quantiles;  //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
                 vector< vector<quanMember> > quantilesMembers;  //quantiles[0] is the vector of deviations with ceiling score of 1, quantiles[1] is the vector of deviations with ceiling score of 2...
-               vector< set<int> > h;
+               set<int>  h;
                 
                 
                 vector<float> readFreq();
-               vector<Sequence*> findPairs(int, int);
+               Sequence* findPairs(Sequence*);
                         
-               void createProcessesSpots();
-               void createProcessesPairs();
-               void createProcesses();
                 void createProcessesQuan();
+               void doPrep();
                 
  };
  
diff --git a/readtree.cpp b/readtree.cpp

index 25fee86518bc6e2177b11824aa7989168c38772f..cdaa38701e0593817d5af7b415e308429fe2e6a1 100644 (file)
--- a/readtree.cpp
+++ b/readtree.cpp
@@ -354,7 +354,9 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
                                 group = "xxx";
                         }
                         
-                       T->tree[n1].setGroup(group);
+                       vector<string> tempGroup; tempGroup.push_back(group);
+                       
+                       T->tree[n1].setGroup(tempGroup);
                         T->tree[n1].setChildren(-1,-1);
                 
                         if(blen == 1){  
diff --git a/readtreecommand.cpp b/readtreecommand.cpp

index 09f899b947bf9bb4b455de86479d34ffa709d46f..a11465d669a55de0c5555f616c36f51caef74ea2 100644 (file)
--- a/readtreecommand.cpp
+++ b/readtreecommand.cpp
@@ -87,7 +87,7 @@ ReadTreeCommand::ReadTreeCommand(string option){
                         
                         namefile = validParameter.validFile(parameters, "name", true);
                         if (namefile == "not open") { abort = true; }
-                       else if (namefile == "not found") { treefile = ""; }
+                       else if (namefile == "not found") { namefile = ""; }
                         else { readNamesFile(); }       
                         
                         if (abort == false) {
@@ -162,6 +162,7 @@ int ReadTreeCommand::execute(){
                                         if (it == nameMap.end()) {
                                                 mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); mothurOutEndLine();
                                                 treeMap->removeSeq(treeMap->namesOfSeqs[i]);
+                                               i--; //need this because removeSeq removes name from namesOfSeqs
                                         }
                                 }
                         }
diff --git a/slayer.cpp b/slayer.cpp

index 8e9eb56c8e55bddd9405cc790759c45f2ed7dc12..66db22b6a36ae9a0f7f9dd383201cd3852a120b2 100644 (file)
--- a/slayer.cpp
+++ b/slayer.cpp
@@ -10,8 +10,8 @@
  #include "slayer.h"
  
  /***********************************************************************/
-Slayer::Slayer(int win, int increment, int parentThreshold, float div, int i) :
-               windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div), iters(i){}
+Slayer::Slayer(int win, int increment, int parentThreshold, float div, int i, int snp) :
+               windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div), iters(i), percentSNPSample(snp){}
  /***********************************************************************/
  string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
         try {
@@ -20,13 +20,14 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                 for (int i = 0; i < refSeqs.size(); i++) {
                 
                         for (int j = i+1; j < refSeqs.size(); j++) {
-                       
+       
                                 //make copies of query and each parent because runBellerophon removes gaps and messes them up
                                 Sequence* q = new Sequence(query->getName(), query->getAligned());
                                 Sequence* leftParent = new Sequence(refSeqs[i]->getName(), refSeqs[i]->getAligned());
                                 Sequence* rightParent = new Sequence(refSeqs[j]->getName(), refSeqs[j]->getAligned());
                                 
-                               vector<data_struct> divs = runBellerophon(q, leftParent, rightParent);
+                               map<int, int> spots;  //map from spot in original sequence to spot in filtered sequence for query and both parents
+                               vector<data_struct> divs = runBellerophon(q, leftParent, rightParent, spots);
                                 
                                 vector<data_struct> selectedDivs;
                                 for (int k = 0; k < divs.size(); k++) {
@@ -39,16 +40,17 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                                         
                                         //require at least 3 SNPs on each side of the break
                                         if ((numSNPSLeft >= 3) && (numSNPSRight >= 3)) {
+                                       
+                                               //removed in 12/09 version of chimeraSlayer
+                                               //int winSizeLeft = divs[k].winLEnd - divs[k].winLStart + 1;
+                                               //int winSizeRight = divs[k].winREnd - divs[k].winRStart + 1;
                                                 
-                                               int winSizeLeft = divs[k].winLEnd - divs[k].winLStart + 1;
-                                               int winSizeRight = divs[k].winREnd - divs[k].winRStart + 1;
-                                               
-                                               float snpRateLeft = numSNPSLeft / (float) winSizeLeft;
-                                               float snpRateRight = numSNPSRight / (float) winSizeRight;
-                                               float logR = log(snpRateLeft / snpRateRight) / log(2.0);
+                                               //float snpRateLeft = numSNPSLeft / (float) winSizeLeft;
+                                               //float snpRateRight = numSNPSRight / (float) winSizeRight;
+                                               //float logR = log(snpRateLeft / snpRateRight) / log(2.0); 
                                                 
                                                 // do not accept excess snp ratio on either side of the break
-                                               if (abs(logR) < 1 ) {  
+                                               //if (abs(logR) < 1 ) {  
                                                         
                                                         float BS_A, BS_B;
                                                         bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B);
@@ -59,9 +61,15 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                                                         divs[k].bsMax = max(BS_A, BS_B);
                                                 
                                                         divs[k].chimeraMax = max(divs[k].qla_qrb, divs[k].qlb_qra);
+                                                       
+                                                       //so results reflect orignal alignment
+                                                       divs[k].winLStart = spots[divs[k].winLStart];
+                                                       divs[k].winLEnd = spots[divs[k].winLEnd];  
+                                                       divs[k].winRStart = spots[divs[k].winRStart]; 
+                                                       divs[k].winREnd = spots[divs[k].winREnd]; 
                                                 
                                                         selectedDivs.push_back(divs[k]);
-                                               }
+                                               //}
                                         }
                                 }
                                 
@@ -74,7 +82,7 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                         }
                 }
                 
-               
+
                 // compute bootstrap support
                 if (all.size() > 0) {
                         //sort them
@@ -95,22 +103,31 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
         }
  }
  /***********************************************************************/
-vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence* pB) {
+vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence* pB, map<int, int>& spots) {
         try{
                 
                 vector<data_struct> data;
-                               
+               
                 //vertical filter
                 vector<Sequence*> temp;
                 temp.push_back(q); temp.push_back(pA); temp.push_back(pB);
-               map<int, int> spots = verticalFilter(temp);
+               
+               //maps spot in new alignment to spot in alignment before filter
+               spots = verticalFilter(temp);  //fills baseSpots
                 
                 //get these to avoid numerous function calls
                 string query = q->getAligned();
                 string parentA = pA->getAligned();
                 string parentB = pB->getAligned();
                 int length = query.length();
-               
+//cout << q->getName() << endl << q->getAligned() << endl << endl;     
+//cout << pA->getName() << endl << pA->getAligned() << endl << endl;           
+//cout << pB->getName() << endl << pB->getAligned() << endl << endl;   
+//cout << " length = " << length << endl;
+//cout << q->getName() << endl;
+//cout << pA->getName() << '\t';
+//cout << pB->getName() << endl;
+       
                 //check window size
                 if (length < (2*windowSize+windowStep)) { 
                         mothurOut("Your window size is too large for " + q->getName() + ". I will make the window size " + toString(length/4) + " which is 1/4 the filtered length."); mothurOutEndLine();      
@@ -143,9 +160,11 @@ vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence*
                         //float avgQA_QB = ((QA*leftLength) + (QB*rightLength)) / (float) length;
                 
                         float divR_QLA_QRB = min((QLA_QRB/QA), (QLA_QRB/QB));
-               
                         float divR_QLB_QRA = min((QLB_QRA/QA), (QLB_QRA/QB));
+//cout << leftLength << '\t' << rightLength << '\t' << QLA << '\t' << QRB << '\t' << QLB << '\t' << QRA  << '\t' << LAB << '\t' << RAB << '\t' << AB << '\t' << QA << '\t' << QB << '\t' << QLA_QRB << '\t' <<  QLB_QRA <<    endl;                    
  
+//cout << divRThreshold << endl;
+//cout << breakpoint << '\t' << divR_QLA_QRB << '\t' << divR_QLB_QRA << endl;
                         //is one of them above the 
                         if (divR_QLA_QRB >= divRThreshold || divR_QLB_QRA >= divRThreshold) {
                                 
@@ -167,10 +186,10 @@ vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence*
                                         member.rab = RAB; 
                                         member.qra = QRA; 
                                         member.qlb = QLB; 
-                                       member.winLStart = spots[0];
-                                       member.winLEnd = spots[breakpoint];  //so breakpoint reflects spot in alignment before filter
-                                       member.winRStart = spots[breakpoint+1]; 
-                                       member.winREnd = spots[length-1]; 
+                                       member.winLStart = 0;
+                                       member.winLEnd = breakpoint;  
+                                       member.winRStart = breakpoint+1; 
+                                       member.winREnd = length-1; 
                                         member.querySeq = *(q); 
                                         member.parentA = *(pA);
                                         member.parentB = *(pB);
@@ -198,7 +217,7 @@ vector<snps> Slayer::getSNPS(string parentA, string query, string parentB, int l
         try {
         
                 vector<snps> data;
-
+//cout << left << '\t' << right << endl;
                 for (int i = left; i <= right; i++) {
                         
                         char A = parentA[i];
@@ -206,12 +225,31 @@ vector<snps> Slayer::getSNPS(string parentA, string query, string parentB, int l
                         char B = parentB[i];
                         
                         if ((A != Q) || (B != Q)) {
-                               snps member;
-                               member.queryChar = Q;
-                               member.parentAChar = A;
-                               member.parentBChar = B;
+//cout << "not equal " << Q << '\t' << A << '\t' << B << endl;
+                       
+                               //ensure not neighboring a gap. change to 12/09 release of chimeraSlayer - not sure what this adds, but it eliminates alot of SNPS
+                               if (
+                                       //did query loose a base here during filter??
+                                       ( i == 0 || abs (baseSpots[0][i] - baseSpots[0][i-1]) == 1) &&
+                                       ( i == query.length() || abs (baseSpots[0][i] - baseSpots[0][i+1]) == 1)
+                                       &&
+                                       //did parentA loose a base here during filter??
+                                       ( i == 0 || abs (baseSpots[1][i] - baseSpots[1][i-1]) == 1) &&
+                                       ( i == parentA.length() || abs (baseSpots[1][i] - baseSpots[1][i+1]) == 1) 
+                                       &&
+                                       //did parentB loose a base here during filter??
+                                       ( i == 0 || abs (baseSpots[2][i] - baseSpots[2][i-1]) == 1) &&
+                                       ( i == parentB.length() || abs (baseSpots[2][i] - baseSpots[2][i+1]) == 1)
+                                       ) 
+                               { 
                                 
-                               data.push_back(member);
+                                       snps member;
+                                       member.queryChar = Q;
+                                       member.parentAChar = A;
+                                       member.parentBChar = B;
+//cout << "not neighboring a gap " << Q << '\t' << A << '\t' << B << '\t' << baseSpots[0][i] << '\t' << baseSpots[0][i+1] << '\t' << baseSpots[0][i-1] << '\t' << baseSpots[1][i] << '\t' << baseSpots[1][i+1] << '\t' << baseSpots[1][i-1] << '\t' << baseSpots[2][i] << '\t' << baseSpots[2][i+1] << '\t' << baseSpots[2][i-1] << endl;                              
+                                       data.push_back(member);
+                               }
                         }
                 }
                 
@@ -232,9 +270,9 @@ void Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, fl
                 int count_A = 0; // sceneario QLA,QRB supported
                 int count_B = 0; // sceneario QLB,QRA supported
         
-               int numLeft = max(1, int(left.size()/10 +0.5));
-               int numRight = max(1, int(right.size()/10 + 0.5));
-               
+               int numLeft = max(1, int(left.size() * (percentSNPSample/(float)100) + 0.5));
+               int numRight = max(1, int(right.size() * (percentSNPSample/(float)100) + 0.5));
+
                 for (int i = 0; i < iters; i++) {
                         //random sampling with replacement.
                 
@@ -278,11 +316,29 @@ void Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, fl
                         if ((QLB > QLA) && (QRA > QRB)) {
                                 count_B++;
                         }
-               
+                       
+//cout << "selected left snp: \n";
+//for (int j = 0; j < selectedLeft.size(); j++) {  cout << selectedLeft[j].parentAChar;  } 
+//cout << endl;
+//for (int j = 0; j < selectedLeft.size(); j++) {  cout << selectedLeft[j].queryChar;  }
+//cout << endl;
+//for (int j = 0; j < selectedLeft.size(); j++) {  cout << selectedLeft[j].parentBChar;  }
+//cout << endl;
+//cout << "selected right snp: \n";
+//for (int j = 0; j < selectedRight.size(); j++) {  cout << selectedRight[j].parentAChar;  } 
+//cout << endl;
+//for (int i = 0; i < selectedRight.size(); i++) {  cout << selectedRight[i].queryChar;  }
+//cout << endl;
+//for (int i = 0; i < selectedRight.size(); i++) {  cout << selectedRight[i].parentBChar;  }
+//cout << endl;                
                 }
  
+
+
+
                 BSA = ((float) count_A / (float) iters) * 100;
                 BSB = ((float) count_B / (float) iters) * 100;
+//cout << "bsa = " << BSA << " bsb = " << BSB << endl;
         
         }
         catch(exception& e) {
@@ -359,7 +415,7 @@ float Slayer::computePercentID(string queryFrag, string parent, int left, int ri
         try {
                 int total = 0;
                 int matches = 0;
-       
+
                 for (int i = left; i <= right; i++) {
                         total++;
                         if (queryFrag[i] == parent[i]) {
@@ -380,6 +436,10 @@ float Slayer::computePercentID(string queryFrag, string parent, int left, int ri
  //remove columns that contain any gaps
  map<int, int> Slayer::verticalFilter(vector<Sequence*> seqs) {
         try {
+               //find baseSpots
+               baseSpots.clear(); 
+               baseSpots.resize(3);  //query, parentA, parentB
+       
                 vector<int> gaps;       gaps.resize(seqs[0]->getAligned().length(), 0);
                 
                 string filterString = (string(seqs[0]->getAligned().length(), '1'));
@@ -391,11 +451,11 @@ map<int, int> Slayer::verticalFilter(vector<Sequence*> seqs) {
                         
                         for (int j = 0; j < seqAligned.length(); j++) {
                                 //if this spot is a gap
-                               if ((seqAligned[j] == '-') || (seqAligned[j] == '.') || (toupper(seqAligned[j]) == 'N'))        {       gaps[j]++;      }
+                               if ((seqAligned[j] == '-') || (seqAligned[j] == '.') || (toupper(seqAligned[j]) == 'N'))        {   gaps[j]++;  }
                         }
                 }
                 
-               //zero out spot where all sequences have blanks
+               //zero out spot where any sequences have blanks
                 int numColRemoved = 0;
                 int count = 0;
                 map<int, int> maskMap; maskMap.clear();
@@ -407,16 +467,25 @@ map<int, int> Slayer::verticalFilter(vector<Sequence*> seqs) {
                                 count++;
                         }
                 }
-               
+
                 //for each sequence
                 for (int i = 0; i < seqs.size(); i++) {
                 
                         string seqAligned = seqs[i]->getAligned();
                         string newAligned = "";
                         
+                       int baseCount = 0;
+                       int count = 0;
                         for (int j = 0; j < seqAligned.length(); j++) {
+                               //are you a base
+                               if ((seqAligned[j] != '-') && (seqAligned[j] != '.') && (toupper(seqAligned[j]) != 'N'))        { baseCount++; }
+                       
                                 //if this spot is not a gap
-                               if (filterString[j] == '1') { newAligned += seqAligned[j]; }
+                               if (filterString[j] == '1') { 
+                                       newAligned += seqAligned[j]; 
+                                       baseSpots[i][count] = baseCount;
+                                       count++;
+                               }
                         }
                         
                         seqs[i]->setAligned(newAligned);
diff --git a/slayer.h b/slayer.h

index dc13e6daa3f1e4ce9e2340da2cf201c9881a3083..099cb208f1e69ee9f943ea81ee395aba9bbc4efc 100644 (file)
--- a/slayer.h
+++ b/slayer.h
@@ -64,7 +64,7 @@ class Slayer {
  
         public:
                 
-               Slayer(int, int, int, float, int);
+               Slayer(int, int, int, float, int, int);
                 ~Slayer() {};
                 
                 string getResults(Sequence*, vector<Sequence*>);
@@ -73,14 +73,15 @@ class Slayer {
                                 
         private:
                 
-               int windowSize, windowStep, parentFragmentThreshold, iters;
+               int windowSize, windowStep, parentFragmentThreshold, iters, percentSNPSample;
                 float divRThreshold; 
                 vector<data_struct>  outputResults;
+               vector< map<int, int> > baseSpots;
                 
                 map<int, int> verticalFilter(vector<Sequence*>);
                 float computePercentID(string, string, int, int);
                 
-               vector<data_struct> runBellerophon(Sequence*, Sequence*, Sequence*);
+               vector<data_struct> runBellerophon(Sequence*, Sequence*, Sequence*, map<int, int>&);
                 vector<snps> getSNPS(string, string, string, int, int);
                 void bootstrapSNPS(vector<snps>, vector<snps>, float&, float&);
                 float snpQA(vector<snps>);
diff --git a/tree.cpp b/tree.cpp

index 52b7322cb1064546fd0f446f63fd61ef61d868fe..731eb64d93f95b485f8607308e3e88345138813c 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -27,7 +27,8 @@ Tree::Tree() {
                         //initialize leaf nodes
                         if (i <= (numLeaves-1)) {
                                 tree[i].setName(globaldata->Treenames[i]);
-                               tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
+                               vector<string> tempGroups; tempGroups.push_back(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
+                               tree[i].setGroup(tempGroups);
                                 //set pcount and pGroup for groupname to 1.
                                 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
                                 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
@@ -37,7 +38,8 @@ Tree::Tree() {
                         //intialize non leaf nodes
                         }else if (i > (numLeaves-1)) {
                                 tree[i].setName("");
-                               tree[i].setGroup("");
+                               vector<string> tempGroups;
+                               tree[i].setGroup(tempGroups);
                         }
                 }
         }
@@ -119,6 +121,14 @@ void Tree::addNamesToCounts() {
                                         }
                                 }//end if
                                 
+                               //update groups to reflect all the groups this node represents
+                               vector<string> nodeGroups;
+                               map<string, int>::iterator itGroups;
+                               for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) {
+                                       nodeGroups.push_back(itGroups->first);
+                               }
+                               tree[i].setGroup(nodeGroups);
+                               
                         }//end else
                 }//end for
                                 
@@ -375,7 +385,7 @@ void Tree::randomLabels(vector<string> g) {
                                 tree[z].pGroups = (tree[i].pGroups);
                                 tree[i].pGroups = (lib_hold);
                                 
-                               string zgroup = tree[z].getGroup();
+                               vector<string> zgroup = tree[z].getGroup();
                                 tree[z].setGroup(tree[i].getGroup());
                                 tree[i].setGroup(zgroup);
                                 
@@ -394,7 +404,7 @@ void Tree::randomLabels(vector<string> g) {
                 exit(1);
         }
  }
-/**************************************************************************************************/
+/**************************************************************************************************
  
  void Tree::randomLabels(string groupA, string groupB) {
         try {
@@ -447,7 +457,9 @@ void Tree::assembleRandomUnifracTree(vector<string> g) {
  }
  /*************************************************************************************************/
  void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
-       randomLabels(groupA, groupB);
+
+       vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
+       randomLabels(temp);
         assembleTree();
  }
  
@@ -584,7 +596,9 @@ void Tree::printBranch(int node, ostream& out, string mode) {
                                 }
                         }
                 }else { //you are a leaf
-                       out << tree[node].getGroup(); 
+                       string leafGroup = globaldata->gTreemap->getGroup(tree[node].getName());
+                       
+                       out << leafGroup; 
                         if (mode == "branch") {
                                 //if there is a branch length then print it
                                 if (tree[node].getBranchLength() != -1) {
diff --git a/tree.h b/tree.h

index a076cef8eb30e3cda0efd8d74fcb680ae007cb24..37028be57e90f3d5d38eeb3080b36a8538e8bf8e 100644 (file)
--- a/tree.h
+++ b/tree.h
@@ -55,7 +55,7 @@ private:
         void randomTopology();
         void randomBlengths();
         void randomLabels(vector<string>);
-       void randomLabels(string, string);
+       //void randomLabels(string, string);
         void printBranch(int, ostream&, string);  //recursively print out tree
         void parseTreeFile();   //parses through tree file to find names of nodes and number of them
                                                         //this is required in case user has sequences in the names file that are
diff --git a/treemap.cpp b/treemap.cpp

index e472570895414a4ec535e10c5f02bee10dfea982..cd046f9dced6bbbe33c0d611d603217e28884d7c 100644 (file)
--- a/treemap.cpp
+++ b/treemap.cpp
@@ -49,7 +49,7 @@ void TreeMap::removeSeq(string seqName) {
         //erase name from namesOfSeqs
         for (int i = 0; i < namesOfSeqs.size(); i++) {
                 if (namesOfSeqs[i] == seqName)  {
-                       namesOfSeqs.erase (namesOfSeqs.begin()+i);
+                       namesOfSeqs.erase(namesOfSeqs.begin()+i);
                         break;
                 }
         }
@@ -61,8 +61,6 @@ void TreeMap::removeSeq(string seqName) {
         //remove seq from treemap
         it = treemap.find(seqName);
         treemap.erase(it);
-       
-
  }
  /************************************************************/
  
diff --git a/treenode.cpp b/treenode.cpp

index 54cdae4e502250229a6e70e5ce937a631a843b1e..648707ab246725ce6f36e912d9b8509024e362b7 100644 (file)
--- a/treenode.cpp
+++ b/treenode.cpp
@@ -25,7 +25,7 @@ Node::Node() {
  /****************************************************************/
  void Node::setName(string Name) {  name = Name; }
  /****************************************************************/
-void Node::setGroup(string groups)  { group =groups; }
+void Node::setGroup(vector<string> groups)  { group =groups; }
  /****************************************************************/
  void Node::setBranchLength(float l) { branchLength = l; }
  /****************************************************************/
@@ -41,7 +41,7 @@ void Node::setChildren(int lc, int rc) { lchild = lc; rchild = rc; }  //leftchild
  /****************************************************************/
  string Node::getName() { return name; }
  /****************************************************************/
-string Node::getGroup() { return group; }
+vector<string> Node::getGroup() { return group; }
  /****************************************************************/
  float Node::getBranchLength() { return branchLength; }
  /****************************************************************/
@@ -60,12 +60,16 @@ int Node::getIndex() { return vectorIndex; }
  //to be used by printTree in the Tree class to print the leaf info                     
  void Node::printNode() {
         try{
-               mothurOut(toString(parent) + " " + toString(lchild) + " " + toString(rchild) + " " + group);
+               mothurOut(toString(parent) + " " + toString(lchild) + " " + toString(rchild) + " ");
+               
+               for (int i = 0; i < group.size(); i++) {  mothurOut( group[i] + " "); }
+               
                 //there is a branch length
                 if (branchLength != -1) { 
                         mothurOut(" " + toString(branchLength)); 
                 }
                 mothurOut(" |");
+               
                 map<string, int>::iterator it;
                 for(it=pGroups.begin();it!=pGroups.end();it++){
                         mothurOut(" " + it->first + ":" + toString(it->second));
diff --git a/treenode.h b/treenode.h

index d6163a2b1275f9816a576659019d178005042e6d..c25806b7445cb90cc97ae45ab33d917a3981257e 100644 (file)
--- a/treenode.h
+++ b/treenode.h
@@ -21,7 +21,7 @@ class Node  {
                 ~Node() { pGroups.clear(); pcount.clear(); };
                 
                 void setName(string);
-               void setGroup(string);  
+               void setGroup(vector<string>);  
                 void setBranchLength(float);
                 void setLabel(float);
                 void setParent(int);
@@ -30,7 +30,7 @@ class Node  {
                 void setLengthToLeaves(float);
                 
                 string getName();
-               string getGroup();  
+               vector<string> getGroup();  
                 float getBranchLength();
                 float getLengthToLeaves();
                 float getLabel();
@@ -52,7 +52,7 @@ class Node  {
                         
         private:
                 string                  name;
-               string                  group;
+               vector<string>  group; 
                 float                   branchLength, length2leaf, label;
                 int                             parent;
                 int                             lchild;
diff --git a/unweighted.cpp b/unweighted.cpp

index c39e6629df1c2749928833fe24af65d48e853d64..e6ed0eb83e7a71d2fd34e4b0fc7627c1368b5d6e 100644 (file)
--- a/unweighted.cpp
+++ b/unweighted.cpp
@@ -48,14 +48,8 @@ EstOutput Unweighted::getValues(Tree* t) {
                                 //groups in this combo
                                 groups.push_back(globaldata->Groups[a]); groups.push_back(globaldata->Groups[l]);
                 
-                               for(int i=t->getNumLeaves();i<t->getNumNodes();i++){
-               
-                                       int lc = t->tree[i].getLChild();  //lc = vector index of left child
-                                       int rc = t->tree[i].getRChild();  //rc = vector index of right child
-                       
-                                       /**********************************************************************/
-                                       //This section adds in all lengths that are non leaf
-                       
+                               for(int i=0;i<t->getNumNodes();i++){
+       
                                         copyIpcount = t->tree[i].pcount;
                                         for (it = copyIpcount.begin(); it != copyIpcount.end();) {
                                                 if (inUsersGroups(it->first, groups) != true) { 
@@ -73,30 +67,10 @@ EstOutput Unweighted::getValues(Tree* t) {
                                                 totalBL += abs(t->tree[i].getBranchLength()); 
                                         }
                         
-                                       /**********************************************************************/
-                                       //This section adds in all lengths that are leaf
-                       
-                                       //if i's chidren are leaves
-                                       if (t->tree[rc].getRChild() == -1) {
-                                               //if rc is a valid group and rc has a BL
-                                               if ((inUsersGroups(t->tree[rc].getGroup(), groups) == true) && (t->tree[rc].getBranchLength() != -1)) {
-                                                       UniqueBL += abs(t->tree[rc].getBranchLength());
-                                                       totalBL += abs(t->tree[rc].getBranchLength()); 
-                                               }
-                                       }
-                       
-                                       if (t->tree[lc].getLChild() == -1) {
-                                               //if lc is a valid group and lc has a BL
-                                               if ((inUsersGroups(t->tree[lc].getGroup(), groups) == true) && (t->tree[lc].getBranchLength() != -1)) {
-                                                       UniqueBL += abs(t->tree[lc].getBranchLength());
-                                                       totalBL += abs(t->tree[lc].getBranchLength()); 
-                                               }
-                                       }
-                       
-                                       /**********************************************************************/
                                 }
                 
                                 UW = (UniqueBL / totalBL);  
+               //cout << globaldata->Groups[a] << globaldata->Groups[l] << '\t' << UniqueBL << '\t' << totalBL << endl;
         
                                 if (isnan(UW) || isinf(UW)) { UW = 0; }
         
@@ -126,14 +100,8 @@ EstOutput Unweighted::getValues(Tree* t) {
                         UW = 0.00;              //Unweighted Value = UniqueBL / totalBL;
                         copyIpcount.clear();
                                 
-                       for(int i=t->getNumLeaves();i<t->getNumNodes();i++){
-               
-                               int lc = t->tree[i].getLChild();  //lc = vector index of left child
-                               int rc = t->tree[i].getRChild();  //rc = vector index of right child
-                       
-                               /**********************************************************************/
-                               //This section adds in all lengths that are non leaf
-                       
+                       for(int i=0;i<t->getNumNodes();i++){
+                               
                                 copyIpcount = t->tree[i].pcount;
                                 for (it = copyIpcount.begin(); it != copyIpcount.end();) {
                                         if (inUsersGroups(it->first, groups) != true) { 
@@ -151,27 +119,6 @@ EstOutput Unweighted::getValues(Tree* t) {
                                         totalBL += abs(t->tree[i].getBranchLength()); 
                                 }
                         
-                               /**********************************************************************/
-                               //This section adds in all lengths that are leaf
-                       
-                               //if i's chidren are leaves
-                               if (t->tree[rc].getRChild() == -1) {
-                                       //if rc is a valid group and rc has a BL
-                                       if ((inUsersGroups(t->tree[rc].getGroup(), groups) == true) && (t->tree[rc].getBranchLength() != -1)) {
-                                               UniqueBL += abs(t->tree[rc].getBranchLength());
-                                               totalBL += abs(t->tree[rc].getBranchLength()); 
-                                       }
-                               }
-                       
-                               if (t->tree[lc].getLChild() == -1) {
-                                       //if lc is a valid group and lc has a BL
-                                       if ((inUsersGroups(t->tree[lc].getGroup(), groups) == true) && (t->tree[lc].getBranchLength() != -1)) {
-                                               UniqueBL += abs(t->tree[lc].getBranchLength());
-                                               totalBL += abs(t->tree[lc].getBranchLength()); 
-                                       }
-                               }
-                       
-                               /**********************************************************************/
                         }
                 
                         UW = (UniqueBL / totalBL);  
@@ -236,12 +183,9 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                                 copyTree->assembleRandomUnifracTree(groups[0], groups[1]);
                                 
                                 //copyTree->createNewickFile("random"+groupA+toString(count));
-               
-                               for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
-               
-                                       int lc = copyTree->tree[i].getLChild();  //lc = vector index of left child
-                                       int rc = copyTree->tree[i].getRChild();  //rc = vector index of right child
-                       
+                               
+                               for(int i=0;i<copyTree->getNumNodes();i++){
+                                               
                                         /**********************************************************************/
                                         //This section adds in all lengths that are non leaf
                                         copyIpcount = copyTree->tree[i].pcount;
@@ -261,27 +205,6 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                                                 totalBL += abs(copyTree->tree[i].getBranchLength()); 
                                         }
                         
-                                       /**********************************************************************/
-                                       //This section adds in all lengths that are leaf
-                       
-                                       //if i's chidren are leaves
-                                       if (copyTree->tree[rc].getRChild() == -1) {
-                                               //if rc is a valid group and rc has a BL
-                                               if ((inUsersGroups(copyTree->tree[rc].getGroup(), groups) == true) && (copyTree->tree[rc].getBranchLength() != -1)) {
-                                                       UniqueBL += abs(copyTree->tree[rc].getBranchLength());
-                                                       totalBL += abs(copyTree->tree[rc].getBranchLength()); 
-                                               }
-                                       }
-                       
-                                       if (copyTree->tree[lc].getLChild() == -1) {
-                                               //if lc is a valid group and lc has a BL
-                                               if ((inUsersGroups(copyTree->tree[lc].getGroup(), groups) == true) && (copyTree->tree[lc].getBranchLength() != -1)) {
-                                                       UniqueBL += abs(copyTree->tree[lc].getBranchLength());
-                                                       totalBL += abs(copyTree->tree[lc].getBranchLength()); 
-                                               }
-                                       }
-                       
-                                       /**********************************************************************/
                                 }
                 
                                 UW = (UniqueBL / totalBL);  
@@ -320,13 +243,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                         //swap labels in all the groups you want to compare
                         copyTree->assembleRandomUnifracTree(groups);
  
-                       for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
-               
-                               int lc = copyTree->tree[i].getLChild();  //lc = vector index of left child
-                               int rc = copyTree->tree[i].getRChild();  //rc = vector index of right child
-                       
-                               /**********************************************************************/
-                               //This section adds in all lengths that are non leaf
+                       for(int i=0;i<copyTree->getNumNodes();i++){
                         
                                 copyIpcount = copyTree->tree[i].pcount;
                                 for (it = copyIpcount.begin(); it != copyIpcount.end();) {
@@ -345,27 +262,6 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB) {
                                         totalBL += abs(copyTree->tree[i].getBranchLength()); 
                                 }
                         
-                               /**********************************************************************/
-                               //This section adds in all lengths that are leaf
-                       
-                               //if i's chidren are leaves
-                               if (copyTree->tree[rc].getRChild() == -1) {
-                                       //if rc is a valid group and rc has a BL
-                                       if ((inUsersGroups(copyTree->tree[rc].getGroup(), groups) == true) && (copyTree->tree[rc].getBranchLength() != -1)) {
-                                               UniqueBL += abs(copyTree->tree[rc].getBranchLength());
-                                               totalBL += abs(copyTree->tree[rc].getBranchLength()); 
-                                       }
-                               }
-                       
-                               if (copyTree->tree[lc].getLChild() == -1) {
-                                       //if lc is a valid group and lc has a BL
-                                       if ((inUsersGroups(copyTree->tree[lc].getGroup(), groups) == true) && (copyTree->tree[lc].getBranchLength() != -1)) {
-                                               UniqueBL += abs(copyTree->tree[lc].getBranchLength());
-                                               totalBL += abs(copyTree->tree[lc].getBranchLength()); 
-                                       }
-                               }
-                       
-                               /**********************************************************************/
                         }
                 
                         UW = (UniqueBL / totalBL);  
diff --git a/weighted.cpp b/weighted.cpp

index d1bc40ea72b096e6ae8ffce9f5488dec31a85000..8a312297cc677ffb78aec597bf8128efbf500a0f 100644 (file)
--- a/weighted.cpp
+++ b/weighted.cpp
@@ -26,6 +26,8 @@ EstOutput Weighted::getValues(Tree* t) {
                                 //initialize weighted scores
                                 WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0;
                                 
+                               vector<string> groups; groups.push_back(globaldata->Groups[i]); groups.push_back(globaldata->Groups[l]);
+                               
                                 D.push_back(0.0000); //initialize a spot in D for each combination
                                 
                                 /********************************************************/
@@ -52,9 +54,25 @@ EstOutput Weighted::getValues(Tree* t) {
                                         //is this sum from a sequence which is in one of the users groups
                                         if (inUsersGroups(t->tree[v].getGroup(), globaldata->Groups) == true) {
                                                 //is this sum from a sequence which is in this groupCombo
-                                               if ((t->tree[v].getGroup() == globaldata->Groups[i]) || (t->tree[v].getGroup() == globaldata->Groups[l])) {
-                                                       sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()];
-                                                       D[count] += sum; 
+                                               if (inUsersGroups(t->tree[v].getGroup(), groups)) {
+                                                       int numSeqsInGroupI, numSeqsInGroupL;
+                                                       
+                                                       map<string, int>::iterator it;
+                                                       it = t->tree[v].pcount.find(groups[0]);
+                                                       if (it != t->tree[v].pcount.end()) { //this leaf node contains seqs from group i
+                                                               numSeqsInGroupI = it->second;
+                                                       }else{ numSeqsInGroupI = 0; }
+                                                       
+                                                       it = t->tree[v].pcount.find(groups[1]);
+                                                       if (it != t->tree[v].pcount.end()) { //this leaf node contains seqs from group l
+                                                               numSeqsInGroupL = it->second;
+                                                       }else{ numSeqsInGroupL = 0; }
+                                                       
+                                                       double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groups[0]]) + ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groups[1]]);
+
+                                                       //sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()];
+                                                       
+                                                       D[count] += weightedSum; 
                                                 }
                                         }
                                 }
@@ -125,6 +143,7 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                 WScore[(groupA+groupB)] = 0.0;
                 float D = 0.0;
                 
+               vector<string> groups; groups.push_back(groupA); groups.push_back(groupB);
                                                 
                 /********************************************************/
                 //calculate a D value for the group combo
@@ -147,9 +166,25 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                                 sum += abs(t->tree[index].getBranchLength());
                         }
                                                 
-                       if ((t->tree[v].getGroup() == groupA) || (t->tree[v].getGroup() == groupB)) {
-                               sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()];
-                               D += sum; 
+                       if (inUsersGroups(t->tree[v].getGroup(), groups)) {
+                               int numSeqsInGroupI, numSeqsInGroupL;
+                                                       
+                               map<string, int>::iterator it;
+                               it = t->tree[v].pcount.find(groups[0]);
+                               if (it != t->tree[v].pcount.end()) { //this leaf node contains seqs from group i
+                                       numSeqsInGroupI = it->second;
+                               }else{ numSeqsInGroupI = 0; }
+                               
+                               it = t->tree[v].pcount.find(groups[1]);
+                               if (it != t->tree[v].pcount.end()) { //this leaf node contains seqs from group l
+                                       numSeqsInGroupL = it->second;
+                               }else{ numSeqsInGroupL = 0; }
+                               
+                               double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groups[0]]) + ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groups[1]]);
+                               
+                               //sum /= (double)tmap->seqsPerGroup[t->tree[v].getGroup()];
+                               
+                               D += weightedSum; 
                         }
                 }
                 /********************************************************/
author	westcott <westcott>
	Fri, 12 Feb 2010 19:50:54 +0000 (19:50 +0000)
committer	westcott <westcott>
	Fri, 12 Feb 2010 19:50:54 +0000 (19:50 +0000)
Mothur.xcodeproj/project.pbxproj		patch \| blob \| history
aligncommand.cpp		patch \| blob \| history
bellerophon.cpp		patch \| blob \| history
blastdb.cpp		patch \| blob \| history
blastdb.hpp		patch \| blob \| history
ccode.cpp		patch \| blob \| history
ccode.h		patch \| blob \| history
chimera.cpp		patch \| blob \| history
chimera.h		patch \| blob \| history
chimeracheckrdp.cpp		patch \| blob \| history
chimeracheckrdp.h		patch \| blob \| history
chimerarealigner.cpp	[new file with mode: 0644]	patch \| blob
chimerarealigner.h	[new file with mode: 0644]	patch \| blob
chimeraseqscommand.cpp		patch \| blob \| history
chimeraseqscommand.h		patch \| blob \| history
chimeraslayer.cpp		patch \| blob \| history
chimeraslayer.h		patch \| blob \| history
commandfactory.cpp		patch \| blob \| history
database.hpp		patch \| blob \| history
decalc.cpp		patch \| blob \| history
decalc.h		patch \| blob \| history
engine.cpp		patch \| blob \| history
globaldata.cpp		patch \| blob \| history
maligner.cpp		patch \| blob \| history
maligner.h		patch \| blob \| history
mergefilecommand.cpp		patch \| blob \| history
mothur.cpp		patch \| blob \| history
mothur.h		patch \| blob \| history
nast.cpp		patch \| blob \| history
otuhierarchycommand.cpp		patch \| blob \| history
otuhierarchycommand.h		patch \| blob \| history
pintail.cpp		patch \| blob \| history
pintail.h		patch \| blob \| history
readtree.cpp		patch \| blob \| history
readtreecommand.cpp		patch \| blob \| history
slayer.cpp		patch \| blob \| history
slayer.h		patch \| blob \| history
tree.cpp		patch \| blob \| history
tree.h		patch \| blob \| history
treemap.cpp		patch \| blob \| history
treenode.cpp		patch \| blob \| history
treenode.h		patch \| blob \| history
unweighted.cpp		patch \| blob \| history
weighted.cpp		patch \| blob \| history