added list.seqs command

author westcott <westcott>

Wed, 8 Jul 2009 13:31:28 +0000 (13:31 +0000)

committer westcott <westcott>

Wed, 8 Jul 2009 13:31:28 +0000 (13:31 +0000)
author westcott <westcott>
Wed, 8 Jul 2009 13:31:28 +0000 (13:31 +0000)
committer westcott <westcott>
Wed, 8 Jul 2009 13:31:28 +0000 (13:31 +0000)
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj

index b8a10577b717fc26f6d167c0fec1589b24bbc3d4..f5bc8ad01ae02e054c66ca4d87ea6d1b0ba478e8 100644 (file)
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -76,6 +76,7 @@
                 37AD4DCA0F28F3DD00AA2D49 /* readtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */; };
                 37AFC71F0F445386005F492D /* sharedsobscollectsummary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AFC71E0F445386005F492D /* sharedsobscollectsummary.cpp */; };
                 37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B28F670F27590100808A62 /* deconvolutecommand.cpp */; };
+               37B73C761004BEFD008C4B41 /* listseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B73C751004BEFD008C4B41 /* listseqscommand.cpp */; };
                 37C1D9730F86506E0059E3F0 /* binsequencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */; };
                 37C753CE0FB3415200DBD02E /* distancecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37C753CD0FB3415200DBD02E /* distancecommand.cpp */; };
                 37D928550F21331F001D4494 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927B80F21331F001D4494 /* ace.cpp */; };
@@ -318,6 +319,8 @@
                 37AFC71E0F445386005F492D /* sharedsobscollectsummary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedsobscollectsummary.cpp; sourceTree = SOURCE_ROOT; };
                 37B28F660F27590100808A62 /* deconvolutecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deconvolutecommand.h; sourceTree = SOURCE_ROOT; };
                 37B28F670F27590100808A62 /* deconvolutecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deconvolutecommand.cpp; sourceTree = SOURCE_ROOT; };
+               37B73C741004BEFD008C4B41 /* listseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = listseqscommand.h; sourceTree = "<group>"; };
+               37B73C751004BEFD008C4B41 /* listseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = listseqscommand.cpp; sourceTree = "<group>"; };
                 37C1D9710F86506E0059E3F0 /* binsequencecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = binsequencecommand.h; sourceTree = SOURCE_ROOT; };
                 37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = binsequencecommand.cpp; sourceTree = SOURCE_ROOT; };
                 37C753CC0FB3415200DBD02E /* distancecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = distancecommand.h; sourceTree = SOURCE_ROOT; };
@@ -788,6 +791,8 @@
                                 37D927E30F21331F001D4494 /* helpcommand.cpp */,
                                 375873F40F7D648F0040F377 /* libshuffcommand.h */,
                                 375873F30F7D648F0040F377 /* libshuffcommand.cpp */,
+                               37B73C741004BEFD008C4B41 /* listseqscommand.h */,
+                               37B73C751004BEFD008C4B41 /* listseqscommand.cpp */,
                                 21E859D60FC4632E005E1A48 /* matrixoutputcommand.h */,
                                 21E859D70FC4632E005E1A48 /* matrixoutputcommand.cpp */,
                                 7E5A17AD0FE57292003C6A03 /* mergefilecommand.h */,
@@ -1111,6 +1116,7 @@
                                 378599100FDD7E8E00EF9D03 /* optionparser.cpp in Sources */,
                                 7E5A17AF0FE57292003C6A03 /* mergefilecommand.cpp in Sources */,
                                 379D3D510FF90E090068C1C0 /* chimeraseqscommand.cpp in Sources */,
+                               37B73C761004BEFD008C4B41 /* listseqscommand.cpp in Sources */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
                 };
diff --git a/aligncommand.cpp b/aligncommand.cpp

index 0f5c86f5bf1cdc73f048a4c6a7a52b928875bbee..5af8e959a6caa3edd98a33a21fa680bd5815d2dc 100644 (file)
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -195,10 +195,12 @@ int AlignCommand::execute(){
                         ifstream inFASTA;
                         openInputFile(candidateFileName, inFASTA);
                         
+                       string input;
                         while(!inFASTA.eof()){
-                               char c = inFASTA.get();
-                               if(c == '>'){   int pos = inFASTA.tellg(); positions.push_back(pos-1);  }
-                               while (!inFASTA.eof())  {       c = inFASTA.get(); if (c == 10 || c == 13){     break;  }       } // get rest of line if there's any crap there
+                               getline(inFASTA, input);
+                               if (input.length() != 0) {
+                                       if(input[0] == '>'){    int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);       }
+                               }
                         }
                         inFASTA.close();
                         
diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp

index 182c25e4b909ba2dd49fc1971beb7abee3b99188..f19fe11bcb907b86bc67429388fddfaa485601aa 100644 (file)
--- a/chimeraseqscommand.cpp
+++ b/chimeraseqscommand.cpp
@@ -8,6 +8,7 @@
   */
  
  #include "chimeraseqscommand.h"
+#include "eachgapdist.h"
  
  //***************************************************************************************************************
  
@@ -20,7 +21,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                 
                 else {
                         //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method" };
+                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment" };
                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -39,16 +40,24 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                         else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true;  }     
                         
                         string temp;
-                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
+                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "T"; }
                         filter = isTrue(temp);
                         
                         temp = validParameter.validFile(parameters, "correction", false);               if (temp == "not found") { temp = "T"; }
                         correction = isTrue(temp);
                         
-                       temp = validParameter.validFile(parameters, "processors", true);                if (temp == "not found") { temp = "1"; }
+                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
                         convert(temp, processors);
                         
+                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
+                       convert(temp, window);
+                                       
+                       temp = validParameter.validFile(parameters, "increment", false);                        if (temp == "not found") { temp = "10"; }
+                       convert(temp, increment);
+                               
                         method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "bellerophon"; }
+                       
+                       if (method != "bellerophon") { mothurOut(method + " is not a valid method."); mothurOutEndLine();  abort = true; }
  
                 }
         }
@@ -77,6 +86,11 @@ void ChimeraSeqsCommand::help(){
                 exit(1);
         }
  }
+//********************************************************************************************************************
+//sorts highest score to lowest
+inline bool comparePref(Preference left, Preference right){
+       return (left.score[0] > right.score[0]);        
+}
  
  //***************************************************************************************************************
  
@@ -89,9 +103,10 @@ int ChimeraSeqsCommand::execute(){
                 
                 if (abort == true) { return 0; }
                 
+               
                 //do soft filter
                 if (filter)  {
-                       string optionString = "fasta=" + fastafile + ", soft=50.0, vertical=F";
+                       string optionString = "fasta=" + fastafile + ", soft=50, vertical=F";
                         filterSeqs = new FilterSeqsCommand(optionString);
                         filterSeqs->execute();
                         delete filterSeqs;
@@ -100,76 +115,169 @@ int ChimeraSeqsCommand::execute(){
                         fastafile = getRootName(fastafile) + "filter.fasta";
                 }
                 
+               distCalculator = new eachGapDist();
+               
                 //read in sequences
                 readSeqs();
                 
-               //int numSeqs = seqs.size();
+               int numSeqs = seqs.size();
                 
-               //find average midpoint of seqs
-               midpoint = findAverageMidPoint();
+               if (numSeqs == 0) { mothurOut("Error in reading you sequences."); mothurOutEndLine(); return 0; }
                 
-               //create 2 vectors of sequences, 1 for left side and one for right side
-               vector<Sequence> left;  vector<Sequence> right;
+               //set default window to 25% of sequence length
+               string seq0 = seqs[0].getAligned();
+               if (window == 0) { window = seq0.length() / 4;  }
+               else if (window > (seq0.length() / 2)) {  
+                       mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); mothurOutEndLine();
+                       window = (seq0.length() / 2);
+               }
                 
-               for (int i = 0; i < seqs.size(); i++) {
-                       //save left side
-                       string seqLeft = seqs[i].getAligned();
-                       seqLeft = seqLeft.substr(0, midpoint);
-                       Sequence tempLeft(seqs[i].getName(), seqLeft);
-                       left.push_back(tempLeft);
+               if (increment > (seqs[0].getAlignLength() - (2*window))) { 
+                       if (increment != 10) {
                         
-                       //save right side
-                       string seqRight = seqs[i].getAligned();
-                       seqRight = seqRight.substr(midpoint+1, (seqRight.length()-midpoint-1));
-                       Sequence tempRight(seqs[i].getName(), seqRight);
-                       right.push_back(tempRight);
+                               mothurOut("You have selected a increment that is too large. I will use the default."); mothurOutEndLine();
+                               increment = 10;
+                               if (increment > (seqs[0].getAlignLength() - (2*window))) {  increment = 0;  }
+                               
+                       }else{ increment = 0; }
+               }
+cout << "increment = " << increment << endl;           
+               if (increment == 0) { iters = 1; }
+               else { iters = ((seqs[0].getAlignLength() - (2*window)) / increment); }
+               
+               //initialize pref
+               pref.resize(numSeqs);  
+               
+               for (int i = 0; i < numSeqs; i++ ) { 
+                       pref[i].leftParent.resize(2); pref[i].rightParent.resize(2); pref[i].score.resize(2);   pref[i].closestLeft.resize(2); pref[i].closestRight.resize(3);
+                       pref[i].name = seqs[i].getName();
+                       pref[i].score[0] = 0.0;  pref[i].score[1] = 0.0; 
+                       pref[i].closestLeft[0] = 100000.0;  pref[i].closestLeft[1] = 100000.0;  
+                       pref[i].closestRight[0] = 100000.0;  pref[i].closestRight[1] = 100000.0;  
                 }
+
+               int midpoint = window;
+               int count = 0;
+               while (count < iters) {
+                               
+                               //create 2 vectors of sequences, 1 for left side and one for right side
+                               vector<Sequence> left;  vector<Sequence> right;
+                               
+                               for (int i = 0; i < seqs.size(); i++) {
+//cout << "whole = " << seqs[i].getAligned() << endl;
+                                       //save left side
+                                       string seqLeft = seqs[i].getAligned().substr(midpoint-window, window);
+                                       Sequence tempLeft;
+                                       tempLeft.setName(seqs[i].getName());
+                                       tempLeft.setAligned(seqLeft);
+                                       left.push_back(tempLeft);
+//cout << "left = " << tempLeft.getAligned() << endl;                  
+                                       //save right side
+                                       string seqRight = seqs[i].getAligned().substr(midpoint, window);
+                                       Sequence tempRight;
+                                       tempRight.setName(seqs[i].getName());
+                                       tempRight.setAligned(seqRight);
+                                       right.push_back(tempRight);
+//cout << "right = " << seqRight << endl;      
+                               }
+                               
+                               //adjust midpoint by increment
+                               midpoint += increment;
+                               
+                               
+                               //this should be parallelized
+                               //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
+                               //create a matrix containing the distance from left to left and right to right
+                               //calculate distances
+                               SparseMatrix* SparseLeft = new SparseMatrix();
+                               SparseMatrix* SparseRight = new SparseMatrix();
+                               
+                               createSparseMatrix(0, left.size(), SparseLeft, left);
+                               createSparseMatrix(0, right.size(), SparseRight, right);
+                               
+                               vector<SeqMap> distMapRight;
+                               vector<SeqMap> distMapLeft;
+                               
+                               // Create a data structure to quickly access the distance information.
+                               // It consists of a vector of distance maps, where each map contains
+                               // all distances of a certain sequence. Vector and maps are accessed
+                               // via the index of a sequence in the distance matrix
+                               distMapRight = vector<SeqMap>(numSeqs); 
+                               distMapLeft = vector<SeqMap>(numSeqs); 
+                               //cout << "left" << endl << endl;
+                               for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
+                                       distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
+                                       //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+                               }
+                               //cout << "right" << endl << endl;
+                               for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
+                                       distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
+                                       //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+                               }
+                               
+                               delete SparseLeft;
+                               delete SparseRight;
                                 
-               //this should be parallelized
-               //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
-               //create a matrix containing the distance from left to left and right to right
-               //calculate distances
-               SparseMatrix* SparseLeft = new SparseMatrix();
-               SparseMatrix* SparseRight = new SparseMatrix();
+                               
+                               //fill preference structure
+                               generatePreferences(distMapLeft, distMapRight, midpoint);
+                               
+                               count++;
+                               
+               }
                 
-               createSparseMatrix(0, left.size(), SparseLeft, left);
-               createSparseMatrix(0, right.size(), SparseRight, right);
+               delete distCalculator;
                 
+               //find average pref score across windows
+               //if (increment != 0) {
+                
+                       //for (int i = 0; i < pref.size(); i++) {
+                               //pref[i].score[0] = pref[i].score[0] / iters;
+                       //}
+               //}
                 
-               //vector<SeqMap> distMapRight;
-               //vector<SeqMap> distMapLeft;
+               //sort Preferences highest to lowest
+               sort(pref.begin(), pref.end(), comparePref);
                 
-               // Create a data structure to quickly access the distance information.
-               // It consists of a vector of distance maps, where each map contains
-               // all distances of a certain sequence. Vector and maps are accessed
-               // via the index of a sequence in the distance matrix
-               //distMapRight = vector<SeqMap>(globaldata->gListVector->size()); 
-               //distMapLeft = vector<SeqMap>(globaldata->gListVector->size()); 
-               for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
-                       //distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
-               }
-               for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
-                       //distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
-               }
-
+               string outputFileName = getRootName(fastafile) + "chimeras";
+               ofstream out;
+               openOutputFile(outputFileName, out);
                 
-               //fill preference structure
-               //generatePreferences(distMapLeft, distMapRight);
+               int above1 = 0;
+               out << "Name\tScore\tLeft\tRight\t" << endl;
+               //output prefenence structure to .chimeras file
+               for (int i = 0; i < pref.size(); i++) {
+                       out << pref[i].name << '\t' << pref[i].score[0] << '\t' << pref[i].leftParent[0] << '\t' << pref[i].rightParent[0] << endl;
+                       
+                       //calc # of seqs with preference above 1.0
+                       if (pref[i].score[0] > 1.0) { 
+                               above1++; 
+                               mothurOut(pref[i].name + " is a suspected chimera at breakpoint " + toString(pref[i].midpoint)); mothurOutEndLine();
+                               mothurOut("It's score is " + toString(pref[i].score[0]) + " with suspected left parent " + pref[i].leftParent[0] + " and right parent " + pref[i].rightParent[0]); mothurOutEndLine();
+                       }
+                       
+                       
+               }
                 
-                               
-               //output results to screen                                              
+               //output results to screen
                 mothurOutEndLine();
-               mothurOut("\t\t"); mothurOutEndLine();
-               //mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); mothurOutEndLine();
-               //mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); mothurOutEndLine();
-               //mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); mothurOutEndLine();
-               //mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); mothurOutEndLine();
-               //mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); mothurOutEndLine();
-               //mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); mothurOutEndLine();
-               //mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); mothurOutEndLine();
-               //mothurOut("# of Seqs:\t" + toString(numSeqs)); mothurOutEndLine();
+               mothurOut("Sequence with preference score above 1.0: " + toString(above1)); mothurOutEndLine();
+               int spot;
+               spot = pref.size()-1;
+               mothurOut("Minimum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.975;
+               mothurOut("2.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.75;
+               mothurOut("25%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.50;
+               mothurOut("Median: \t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.25;
+               mothurOut("75%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.025;
+               mothurOut("97.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = 0;
+               mothurOut("Maximum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
                 
-               //outSummary.close();
                 return 0;
         }
         catch(exception& e) {
@@ -188,6 +296,8 @@ void ChimeraSeqsCommand::readSeqs(){
                 while(!inFASTA.eof()){
                         Sequence current(inFASTA);
                         
+                       if (current.getAligned() == "") { current.setAligned(current.getUnaligned()); }
+                       
                         seqs.push_back(current);
                         
                         gobble(inFASTA);
@@ -201,53 +311,6 @@ void ChimeraSeqsCommand::readSeqs(){
         }
  }
  
-
-//***************************************************************************************************************
-int ChimeraSeqsCommand::findAverageMidPoint(){
-       try {
-               int totalMids = 0;
-               int averageMid = 0;
-               
-               //loop through the seqs and find midpoint
-               for (int i = 0; i < seqs.size(); i++) {
-                       
-                       //get unaligned sequence
-                       seqs[i].setUnaligned(seqs[i].getUnaligned());  //if you read an aligned file the unaligned is really aligned, so we need to make sure its unaligned
-                       
-                       string unaligned = seqs[i].getUnaligned();
-                       string aligned = seqs[i].getAligned();
-                       
-                       //find midpoint of this seq
-                       int count = 0;
-                       int thismid = 0;
-                       for (int j = 0; j < aligned.length(); j++) {
-                               
-                               thismid++;
-                               
-                               //if you are part of the unaligned sequence increment
-                               if (isalpha(aligned[j])) {  count++;  }
-                               
-                               //if you have reached the halfway point stop
-                               if (count >= (unaligned.length() / 2)) { break; }
-                       }
-                       
-                       //add this mid to total
-                       totalMids += thismid;
-               
-               }
-               
-               averageMid = (totalMids / seqs.size());
-               
-               return averageMid; 
-       
-       
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSeqsCommand", "findAverageMidPoint");
-               exit(1);
-       }
-}
-
  /***************************************************************************************************************/
  int ChimeraSeqsCommand::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* sparse, vector<Sequence> s){
         try {
@@ -256,9 +319,9 @@ int ChimeraSeqsCommand::createSparseMatrix(int startSeq, int endSeq, SparseMatri
                         
                         for(int j=0;j<i;j++){
                         
-                               //distCalculator->calcDist(s.get(i), s.get(j));
+                               distCalculator->calcDist(s[i], s[j]);
                                 float dist = distCalculator->getDist();
-                               
+                       
                                 PCell temp(i, j, dist);
                                 sparse->addCell(temp);
                                 
@@ -273,23 +336,123 @@ int ChimeraSeqsCommand::createSparseMatrix(int startSeq, int endSeq, SparseMatri
                 exit(1);
         }
  }
-/***************************************************************************************************************
-void ChimeraSeqsCommand::generatePreferences(vector<SeqMap> left, vector<SeqMap> right){
+/***************************************************************************************************************/
+void ChimeraSeqsCommand::generatePreferences(vector<SeqMap> left, vector<SeqMap> right, int mid){
         try {
+               
+               float dme = 0.0;
+               SeqMap::iterator itR;
+               SeqMap::iterator itL;
+               
+               //initialize pref[i]
+               for (int i = 0; i < pref.size(); i++) {
+                       pref[i].score[1] = 0.0;
+                       pref[i].closestLeft[1] = 100000.0; 
+                       pref[i].closestRight[1] = 100000.0; 
+                       pref[i].leftParent[1] = "";
+                       pref[i].rightParent[1] = "";
+               }
  
+//cout << "in generate left.size() = " << left.size() << endl;         
                 for (int i = 0; i < left.size(); i++) {
                         
-                       int iscore = 0;
-                       float closestLeft = 100000.0;
-                       float closestRight = 100000.0;
+                       SeqMap currentLeft = left[i];    //example i = 3;   currentLeft is a map of 0 to the distance of sequence 3 to sequence 0,
+                                                                                               //                                                                              1 to the distance of sequence 3 to sequence 1,
+                                                                                               //                                                                              2 to the distance of sequence 3 to sequence 2.
+                       SeqMap currentRight = right[i];         // same as left but with distances on the right side.
                         
-                       for (int j = 0; j < left.size(); j++) {
+                       for (int j = 0; j < i; j++) {
                                 
-                               //iscore += abs(left
-                       
+                               itL = currentLeft.find(j);
+                               itR = currentRight.find(j);
+cout << " i = " << i << " j = " << j << " distLeft = " << itL->second << endl;
+cout << " i = " << i << " j = " << j << " distright = " << itR->second << endl;
+                               
+                               //if you can find this entry update the preferences
+                               if ((itL != currentLeft.end()) && (itR != currentRight.end())) {
+                               
+                                       if (!correction) {
+                                               pref[i].score[1] += abs((itL->second - itR->second));
+                                               pref[j].score[1] += abs((itL->second - itR->second));
+cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
+cout << "abs = " << abs((itL->second - itR->second)) << endl;
+cout << i << " score = " << pref[i].score[1] << endl;
+cout << j << " score = " << pref[j].score[1] << endl;
+                                       }else {
+                                               pref[i].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
+                                               pref[j].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
+cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
+cout << "abs = " << abs((sqrt(itL->second) - sqrt(itR->second))) << endl;
+cout << i << " score = " << pref[i].score[1] << endl;
+cout << j << " score = " << pref[j].score[1] << endl;
+                                       }
+cout << "pref[" << i << "].closestLeft[1] = "  <<      pref[i].closestLeft[1] << " parent = " << pref[i].leftParent[1] << endl;                        
+                                       //are you the closest left sequence
+                                       if (itL->second < pref[i].closestLeft[1]) {  
+
+                                               pref[i].closestLeft[1] = itL->second;
+                                               pref[i].leftParent[1] = seqs[j].getName();
+cout << "updating closest left to " << pref[i].leftParent[1] << endl;
+                                       }
+cout << "pref[" << j << "].closestLeft[1] = "  <<      pref[j].closestLeft[1] << " parent = " << pref[j].leftParent[1] << endl;        
+                                       if (itL->second < pref[j].closestLeft[1]) { 
+                                               pref[j].closestLeft[1] = itL->second;
+                                               pref[j].leftParent[1] = seqs[i].getName();
+cout << "updating closest left to " << pref[j].leftParent[1] << endl;
+                                       }
+                                       
+                                       //are you the closest right sequence
+                                       if (itR->second < pref[i].closestRight[1]) {   
+                                               pref[i].closestRight[1] = itR->second;
+                                               pref[i].rightParent[1] = seqs[j].getName();
+                                       }
+                                       if (itR->second < pref[j].closestRight[1]) {   
+                                               pref[j].closestRight[1] = itR->second;
+                                               pref[j].rightParent[1] = seqs[i].getName();
+                                       }
+                                       
+                               }
                         }
                 
                 }
+               
+               
+                 
+               //calculate the dme
+               int count0 = 0;
+               for (int i = 0; i < pref.size(); i++) {  dme += pref[i].score[1];  if (pref[i].score[1] == 0.0) { count0++; }  }
+               
+               float expectedPercent = 1 / (float) (pref.size() - count0);
+cout << endl << "dme = " << dme << endl;
+               //recalculate prefernences based on dme
+               for (int i = 0; i < pref.size(); i++) {
+cout << "unadjusted pref " << i << " = " << pref[i].score[1] << endl;  
+                       // gives the actual percentage of the dme this seq adds
+                       pref[i].score[1] = pref[i].score[1] / dme;
+                       
+                       //how much higher or lower is this than expected
+                       pref[i].score[1] = pref[i].score[1] / expectedPercent;
+                       
+                       //so a non chimeric sequence would be around 1, and a chimeric would be signifigantly higher.
+cout << "adjusted pref " << i << " = " << pref[i].score[1] << endl;                                    
+               }
+               
+               //is this score bigger then the last score
+               for (int i = 0; i < pref.size(); i++) {  
+                       
+                       //update biggest score
+                       if (pref[i].score[1] > pref[i].score[0]) {
+                               pref[i].score[0] = pref[i].score[1];
+                               pref[i].leftParent[0] = pref[i].leftParent[1];
+                               pref[i].rightParent[0] = pref[i].rightParent[1];
+                               pref[i].closestLeft[0] = pref[i].closestLeft[1];
+                               pref[i].closestRight[0] = pref[i].closestRight[1];
+                               pref[i].midpoint = mid;
+                       }
+                       
+                       //total of preference scores across windows
+                       //pref[i].score[0] += pref[i].score[1];
+               }
  
         }
         catch(exception& e) {
diff --git a/chimeraseqscommand.h b/chimeraseqscommand.h

index 5b55120e0d0981b432f71332dc1db57e8fae5ce7..2398ce63b257935d857e06bf6525b6b94e8efd05 100644 (file)
--- a/chimeraseqscommand.h
+++ b/chimeraseqscommand.h
@@ -20,6 +20,19 @@
  typedef list<PCell>::iterator MatData;
  typedef map<int, float> SeqMap;  //maps sequence to all distance for that seqeunce
  
+struct Preference {
+               string name;
+               vector<string> leftParent; //keep the name of closest left associated with the two scores
+               vector<string> rightParent; //keep the name of closest right associated with the two scores
+               vector<float> score;  //so you can keep last score and calc this score and keep whichever is bigger.
+               vector<float> closestLeft;  //keep the closest left associated with the two scores
+               vector<float> closestRight; //keep the closest right associated with the two scores
+               int midpoint;
+
+};
+
+
+
  /***********************************************************/
  
  class ChimeraSeqsCommand : public Command {
@@ -29,28 +42,21 @@ public:
         int execute();
         void help();
         
+               
  private:
-       //Dist* distCalculator;
         
-       struct Preference {
-               string leftParent;
-               string rightParent;
-               float score;
-
-       };
-
         Dist* distCalculator;
         bool abort;
         string method, fastafile;
         bool filter, correction;
-       int processors, midpoint;
+       int processors, midpoint, averageLeft, averageRight, window, iters, increment;
         FilterSeqsCommand* filterSeqs;
+       ListVector* list;
         vector<Sequence> seqs;
         vector<Preference> pref;
         
-       int findAverageMidPoint();
         void readSeqs();
-       void generatePreferences(SparseMatrix*, SparseMatrix*);
+       void generatePreferences(vector<SeqMap>, vector<SeqMap>, int);
         int createSparseMatrix(int, int, SparseMatrix*, vector<Sequence>);
         
  
diff --git a/commandfactory.cpp b/commandfactory.cpp

index 43f6769f92de125e9b76074e2a2d91cdb9ca3889..3d90269590b3f42e2d140821ba39220ccde9c251 100644 (file)
--- a/commandfactory.cpp
+++ b/commandfactory.cpp
@@ -49,6 +49,8 @@
  #include "reversecommand.h"
  #include "trimseqscommand.h"
  #include "mergefilecommand.h"
+#include "chimeraseqscommand.h"
+#include "listseqscommand.h"
  
  /***********************************************************/
  
@@ -96,6 +98,8 @@ CommandFactory::CommandFactory(){
         commands["screen.seqs"]                 = "screen.seqs";
         commands["reverse.seqs"]                = "reverse.seqs";
         commands["trim.seqs"]                   = "trim.seqs";
+       commands["chimera.seqs"]                = "chimera.seqs";
+       commands["list.seqs"]                   = "list.seqs";
         commands["quit"]                                = "quit"; 
  
  }
@@ -145,13 +149,15 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                 else if(commandName == "tree.shared")                   {   command = new TreeGroupCommand(optionString);                       }
                 else if(commandName == "dist.shared")                   {   command = new MatrixOutputCommand(optionString);            }
                 else if(commandName == "bootstrap.shared")              {   command = new BootSharedCommand(optionString);                      }
-               //else if(commandName == "consensus")                           {   command = new ConcensusCommand(optionString);                       }
+               //else if(commandName == "consensus")                   {   command = new ConcensusCommand(optionString);                       }
                 else if(commandName == "dist.seqs")                             {   command = new DistanceCommand(optionString);                        }
                 else if(commandName == "align.seqs")                    {   command = new AlignCommand(optionString);                           }
                 else if(commandName == "summary.seqs")                  {       command = new SeqSummaryCommand(optionString);                  }
                 else if(commandName == "screen.seqs")                   {       command = new ScreenSeqsCommand(optionString);                  }
                 else if(commandName == "reverse.seqs")                  {       command = new ReverseSeqsCommand(optionString);                 }
                 else if(commandName == "trim.seqs")                             {       command = new TrimSeqsCommand(optionString);                    }
+               else if(commandName == "chimera.seqs")                  {       command = new ChimeraSeqsCommand(optionString);                 }
+               else if(commandName == "list.seqs")                             {       command = new ListSeqsCommand(optionString);                    }
                 else if(commandName == "merge.files")                   {       command = new MergeFileCommand(optionString);                   }
                 else                                                                                    {       command = new NoCommand(optionString);                                  }
  
diff --git a/eachgapdist.h b/eachgapdist.h

index d21e55ca1acb4e950b1126ade38a3237a7684ad9..ac9b45302c75cd7cec5ac28613eeaf2c344a21f7 100644 (file)
--- a/eachgapdist.h
+++ b/eachgapdist.h
@@ -23,6 +23,7 @@ public:
                 
                 string seqA = A.getAligned();
                 string seqB = B.getAligned();
+
                 int alignLength = seqA.length();
                 
                 for(int i=0; i<alignLength; i++){
@@ -47,6 +48,7 @@ public:
                 
                 if(length == 0) {       dist = 1.0000;                                                          }
                 else                    {       dist = ((double)diff  / (double)length);        }
+
                 
         }
  };
diff --git a/globaldata.hpp b/globaldata.hpp

index ff09a36c31727bb6d04e3b2dbe50b50574a2a19f..759bcae6fd7631988bf15407e7234330c2c9f659 100644 (file)
--- a/globaldata.hpp
+++ b/globaldata.hpp
@@ -42,7 +42,7 @@ public:
         TreeMap* gTreemap;
         SequenceDB* gSequenceDB;
         string inputFileName, argv;
-       bool allLines, runParse;
+       bool allLines, runParse, jumble;
         vector<string>  Estimators, Groups; //holds estimators to be used
         set<int> lines; //hold lines to be used
         set<string> labels; //holds labels to be used
@@ -57,18 +57,13 @@ public:
         string getNameFile();   //do we need this?
         string getGroupFile();  //do we need this?
         string getOrderFile();
-//     string getFastaFile();
         string getTreeFile();
         string getSharedFile();
         string getFormat();     //do we need this?
-//     string getCandidateFile();
-//     string getTemplateFile();
+
  
         void setListFile(string);
-//     void setFastaFile(string);
         void setTreeFile(string);
-//     void setCandidateFile(string);
-//     void setTemplateFile(string);
         void setGroupFile(string);              //do we need this?
         void setPhylipFile(string);
         void setColumnFile(string);
diff --git a/listseqscommand.cpp b/listseqscommand.cpp

new file mode 100644 (file)

index 0000000..a7d79f3
--- /dev/null
+++ b/listseqscommand.cpp
@@ -0,0 +1,240 @@
+/*
+ *  listseqscommand.cpp
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 7/8/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "listseqscommand.h"
+#include "sequence.hpp"
+
+//**********************************************************************************************************************
+
+ListSeqsCommand::ListSeqsCommand(string option){
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta","name", "group", "align" };
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //check for required parameters
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") {  fastafile = "";  }        
+                       
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { abort = true; }
+                       else if (namefile == "not found") {  namefile = "";  }  
+                       
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { abort = true; }
+                       else if (groupfile == "not found") {  groupfile = "";  }        
+                       
+                       alignfile = validParameter.validFile(parameters, "align", true);
+                       if (alignfile == "not open") { abort = true; }
+                       else if (alignfile == "not found") {  alignfile = "";  }
+                       
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == ""))  { mothurOut("You must provide a file."); mothurOutEndLine(); abort = true; }
+                       
+                       if (parameters.size() > 1) { mothurOut("You may only enter one file."); mothurOutEndLine(); abort = true;  }
+               }
+
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void ListSeqsCommand::help(){
+       try {
+               mothurOut("The list.seqs command reads a fasta, name, group or alignreport file and outputs a .accnos file containing sequence names.\n");
+               mothurOut("The list.seqs command parameters are fasta, name, group and align.  You must provide one of these parameters.\n");
+               mothurOut("The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n");
+               mothurOut("Example list.seqs(fasta=amazon.fasta).\n");
+               mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+int ListSeqsCommand::execute(){
+       try {
+               
+               if (abort == true) { return 0; }
+               
+               //read functions fill names vector
+               if (fastafile != "")            {       inputFileName = fastafile;      readFasta();    }
+               else if (namefile != "")        {       inputFileName = namefile;       readName();             }
+               else if (groupfile != "")       {       inputFileName = groupfile;      readGroup();    }
+               else if (alignfile != "")       {       inputFileName = alignfile;      readAlign();    }
+               
+               //sort in alphabetical order
+               sort(names.begin(), names.end());
+               
+               string outputFileName = getRootName(inputFileName) + "accnos";
+               ofstream out;
+               openOutputFile(outputFileName, out);
+               
+               //output to .accnos file
+               for (int i = 0; i < names.size(); i++) {
+                       out << names[i] << endl;
+               }
+               out.close();
+               
+               return 0;               
+       }
+
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "execute");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+void ListSeqsCommand::readFasta(){
+       try {
+               
+               ifstream in;
+               openInputFile(fastafile, in);
+               string name;
+               
+               while(!in.eof()){
+                       Sequence currSeq(in);
+                       name = currSeq.getName();
+                       
+                       names.push_back(name);
+                       
+                       gobble(in);
+               }
+               in.close();             
+
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "readFasta");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+void ListSeqsCommand::readName(){
+       try {
+               
+               ifstream in;
+               openInputFile(namefile, in);
+               string name, firstCol, secondCol;
+               
+               while(!in.eof()){
+
+                       in >> firstCol;                         
+                       in >> secondCol;                        
+                       
+                       //parse second column saving each name
+                       while (secondCol.find_first_of(',') != -1) { 
+                               name = secondCol.substr(0,secondCol.find_first_of(','));
+                               secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
+                               names.push_back(name);
+                       }
+                       
+                       //get name after last ,
+                       names.push_back(secondCol);
+                       
+                       gobble(in);
+               }
+               in.close();
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "readName");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+void ListSeqsCommand::readGroup(){
+       try {
+       
+               ifstream in;
+               openInputFile(groupfile, in);
+               string name, group;
+               
+               while(!in.eof()){
+
+                       in >> name;                             //read from first column
+                       in >> group;                    //read from second column
+                       
+                       names.push_back(name);
+                                       
+                       gobble(in);
+               }
+               in.close();
+
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "readGroup");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+//alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
+void ListSeqsCommand::readAlign(){
+       try {
+       
+               ifstream in;
+               openInputFile(alignfile, in);
+               string name, junk;
+               
+               //read column headers
+               for (int i = 0; i < 16; i++) {  
+                       if (!in.eof())  {       in >> junk;             }
+                       else                    {       break;                  }
+               }
+               
+               
+               while(!in.eof()){
+
+                       in >> name;                             //read from first column
+                       
+                       //read rest
+                       for (int i = 0; i < 15; i++) {  
+                               if (!in.eof())  {       in >> junk;             }
+                               else                    {       break;                  }
+                       }
+                       
+                       names.push_back(name);
+                                       
+                       gobble(in);
+               }
+               in.close();
+
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "readAlign");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
diff --git a/listseqscommand.h b/listseqscommand.h

new file mode 100644 (file)

index 0000000..4e0febd
--- /dev/null
+++ b/listseqscommand.h
@@ -0,0 +1,37 @@
+#ifndef LISTSEQSCOMMAND_H
+#define LISTSEQSCOMMAND_H
+
+/*
+ *  listseqscommand.h
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 7/8/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+
+class ListSeqsCommand : public Command {
+       
+       public:
+       
+               ListSeqsCommand(string);        
+               ~ListSeqsCommand(){};
+               int execute();
+               void help();    
+               
+       private:
+               vector<string> names;
+               string fastafile, namefile, groupfile, alignfile, inputFileName;
+               bool abort;
+               
+               void readFasta();
+               void readName();
+               void readGroup();
+               void readAlign();
+               
+};
+
+#endif
+
diff --git a/mothur.h b/mothur.h

index 4d22e98e2a2bb3eac4dcbbe98b81694b58a00bcb..5d9dc96bc65428391374abb47e3d8c61e114000b 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -231,7 +231,7 @@ inline void errorOut(exception& e, string object, string function) {
         
                 mothurOut("Error: ");
                 mothurOut(toString(e.what()));
-               mothurOut(" has occurred in the " + object + " class function " + function + "Please contact Pat Schloss at pschloss@microbio.umass.edu, and be sure to include the mothur.logFile with your inquiry.");
+               mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at pschloss@microbio.umass.edu, and be sure to include the mothur.logFile with your inquiry.");
                 mothurOutEndLine();
         
  }
diff --git a/nastreport.cpp b/nastreport.cpp

index d7be4dd57c272b69dc0aa09fd35b7d4c2f9d4f0b..aa7d3934ae11c3c4490ed3beaaceaf320ddfc6be 100644 (file)
--- a/nastreport.cpp
+++ b/nastreport.cpp
@@ -41,7 +41,7 @@ void NastReport::print(){
         candidateReportFile << alignmentMethod << '\t' << candidateStartPosition << "\t" << candidateEndPosition << '\t';
         candidateReportFile << templateStartPosition << "\t" << templateEndPosition << '\t';
         candidateReportFile << pairwiseAlignmentLength << '\t' << totalGapsInQuery << '\t' << totalGapsInTemplate << '\t';
-       candidateReportFile << longestInsert << '\t';;
+       candidateReportFile << longestInsert << '\t';
         candidateReportFile << setprecision(2) << similarityToTemplate;
         
         candidateReportFile << endl;
diff --git a/rarefact.cpp b/rarefact.cpp

index f1b43fc26553cd739d32d2baeff36bc457df006a..5cd9b99c5051be03b00757df6a99bfb479adabfe 100644 (file)
--- a/rarefact.cpp
+++ b/rarefact.cpp
@@ -81,14 +81,19 @@ try {
                         rcd->registerDisplay(displays[i]);
                 }
                 
+               //if jumble is false all iters will be the same
+               if (globaldata->jumble == false)  {  nIters = 1;  }
+               
                 for(int iter=0;iter<nIters;iter++){
                 
                         for(int i=0;i<displays.size();i++){
                                 displays[i]->init(label);                 
                         }
-
-                       //randomize the groups
-                       random_shuffle(lookup.begin(), lookup.end());
+                       
+                       if (globaldata->jumble == true)  {
+                               //randomize the groups
+                               random_shuffle(lookup.begin(), lookup.end());
+                       }
                         
                         //make merge the size of lookup[0]
                         SharedRAbundVector* merge = new SharedRAbundVector(lookup[0]->size());
diff --git a/rarefact.h b/rarefact.h

index aff5cb63effc7e806a368d3bd08545d2301bd0ba..9507d5af84eeadec5046a4fe1967eda508913968 100644 (file)
--- a/rarefact.h
+++ b/rarefact.h
@@ -5,6 +5,7 @@
  #include "raredisplay.h"
  #include "ordervector.hpp"
  #include "mothur.h"
+#include "globaldata.hpp"
  
  
  class Rarefact {
@@ -13,13 +14,14 @@ public:
         Rarefact(OrderVector* o, vector<Display*> disp) :
                         numSeqs(o->getNumSeqs()), order(o), displays(disp), label(o->getLabel())  {};
         Rarefact(vector<SharedRAbundVector*> shared, vector<Display*> disp) :
-                                        lookup(shared), displays(disp) {};
+                                        lookup(shared), displays(disp) {  globaldata = GlobalData::getInstance();  };
  
         ~Rarefact(){};
         void getCurve(int, int);
         void getSharedCurve(int, int);
         
  private:
+       GlobalData* globaldata;
         OrderVector* order;
         vector<Display*> displays;
         int numSeqs, numGroupComb;
diff --git a/rarefactsharedcommand.cpp b/rarefactsharedcommand.cpp

index 71bfaf1d0f79b4866cebe71309e058a61bd2535e..5914f156a1a01831ce7cf6d2f3c52696c529f466 100644 (file)
--- a/rarefactsharedcommand.cpp
+++ b/rarefactsharedcommand.cpp
@@ -29,7 +29,7 @@ RareFactSharedCommand::RareFactSharedCommand(string option){
                 
                 else {
                         //valid paramters for this command
-                       string Array[] =  {"iters","line","label","calc","groups"};
+                       string Array[] =  {"iters","line","label","calc","groups", "jumble"};
                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -92,6 +92,11 @@ RareFactSharedCommand::RareFactSharedCommand(string option){
                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
                         convert(temp, nIters); 
                         
+                       temp = validParameter.validFile(parameters, "jumble", false);                   if (temp == "not found") { temp = "T"; }
+                       if (isTrue(temp)) { jumble = true; }
+                       else { jumble = false; }
+                       globaldata->jumble = jumble;
+                       
                         if (abort == false) {
                         
                                 string fileNameRoot = getRootName(globaldata->inputFileName);
@@ -125,12 +130,12 @@ RareFactSharedCommand::RareFactSharedCommand(string option){
  void RareFactSharedCommand::help(){
         try {
                 mothurOut("The rarefaction.shared command can only be executed after a successful read.otu command.\n");
-               mothurOut("The rarefaction.shared command parameters are label, line, iters, groups and calc.  No parameters are required, but you may not use \n");
+               mothurOut("The rarefaction.shared command parameters are label, line, iters, groups, jumble and calc.  No parameters are required, but you may not use \n");
                 mothurOut("both the line and label parameters at the same time. The rarefaction command should be in the following format: \n");
-               mothurOut("rarefaction.shared(label=yourLabel, line=yourLines, iters=yourIters, calc=yourEstimators, groups=yourGroups).\n");
-               mothurOut("Example rarefaction.shared(label=unique-.01-.03, line=0-5-10, iters=10000, groups=B-C, calc=sharedobserved).\n");
+               mothurOut("rarefaction.shared(label=yourLabel, line=yourLines, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n");
+               mothurOut("Example rarefaction.shared(label=unique-.01-.03, line=0-5-10, iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n");
                 mothurOut("The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n");
-               mothurOut("The default value for groups is all the groups in your groupfile.\n");
+               mothurOut("The default value for groups is all the groups in your groupfile, and jumble is true.\n");
                 validCalculator->printCalc("sharedrarefaction", cout);
                 mothurOut("The label and line parameters are used to analyze specific lines in your input.\n");
                 mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
diff --git a/rarefactsharedcommand.h b/rarefactsharedcommand.h

index c7290f4d85ec4f8bd671d4df914cd2738dd3c4c0..b0109579d81ba55ba14de4e92d9a4f1e3cc1efca 100644 (file)
--- a/rarefactsharedcommand.h
+++ b/rarefactsharedcommand.h
@@ -49,7 +49,7 @@ private:
         int freq, nIters;
         string format;
  
-       bool abort, allLines;
+       bool abort, allLines, jumble;
         set<int> lines; //hold lines to be used
         set<string> labels; //holds labels to be used
         string line, label, calc, groups;
diff --git a/sequence.cpp b/sequence.cpp

index aa1f1c7eae3c0b1cb08105a070a172169ecf320e..685f0726e6ec5042d32379874e54b83e8a6eff26 100644 (file)
--- a/sequence.cpp
+++ b/sequence.cpp
@@ -53,7 +53,7 @@ Sequence::Sequence(ifstream& fastaFile){
                 }
         }
  
-       if(sequence.find_first_of('-') != string::npos){        //      if there are any gaps in the sequence, assume that it is
+       if((sequence.find_first_of('-') != string::npos) || (sequence.find_first_of('.') != string::npos)) {    //      if there are any gaps in the sequence, assume that it is
                 setAligned(sequence);                                                   //      an alignment file
         }
         setUnaligned(sequence);                                                         //      also set the unaligned sequence file
author	westcott <westcott>
	Wed, 8 Jul 2009 13:31:28 +0000 (13:31 +0000)
committer	westcott <westcott>
	Wed, 8 Jul 2009 13:31:28 +0000 (13:31 +0000)
Mothur.xcodeproj/project.pbxproj		patch \| blob \| history
aligncommand.cpp		patch \| blob \| history
chimeraseqscommand.cpp		patch \| blob \| history
chimeraseqscommand.h		patch \| blob \| history
commandfactory.cpp		patch \| blob \| history
eachgapdist.h		patch \| blob \| history
globaldata.hpp		patch \| blob \| history
listseqscommand.cpp	[new file with mode: 0644]	patch \| blob
listseqscommand.h	[new file with mode: 0644]	patch \| blob
mothur.h		patch \| blob \| history
nastreport.cpp		patch \| blob \| history
rarefact.cpp		patch \| blob \| history
rarefact.h		patch \| blob \| history
rarefactsharedcommand.cpp		patch \| blob \| history
rarefactsharedcommand.h		patch \| blob \| history
sequence.cpp		patch \| blob \| history