]> git.donarmstrong.com Git - mothur.git/commitdiff
added list.seqs command
authorwestcott <westcott>
Wed, 8 Jul 2009 13:31:28 +0000 (13:31 +0000)
committerwestcott <westcott>
Wed, 8 Jul 2009 13:31:28 +0000 (13:31 +0000)
16 files changed:
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
chimeraseqscommand.cpp
chimeraseqscommand.h
commandfactory.cpp
eachgapdist.h
globaldata.hpp
listseqscommand.cpp [new file with mode: 0644]
listseqscommand.h [new file with mode: 0644]
mothur.h
nastreport.cpp
rarefact.cpp
rarefact.h
rarefactsharedcommand.cpp
rarefactsharedcommand.h
sequence.cpp

index b8a10577b717fc26f6d167c0fec1589b24bbc3d4..f5bc8ad01ae02e054c66ca4d87ea6d1b0ba478e8 100644 (file)
@@ -76,6 +76,7 @@
                37AD4DCA0F28F3DD00AA2D49 /* readtree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AD4DC90F28F3DD00AA2D49 /* readtree.cpp */; };
                37AFC71F0F445386005F492D /* sharedsobscollectsummary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37AFC71E0F445386005F492D /* sharedsobscollectsummary.cpp */; };
                37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B28F670F27590100808A62 /* deconvolutecommand.cpp */; };
+               37B73C761004BEFD008C4B41 /* listseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B73C751004BEFD008C4B41 /* listseqscommand.cpp */; };
                37C1D9730F86506E0059E3F0 /* binsequencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */; };
                37C753CE0FB3415200DBD02E /* distancecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37C753CD0FB3415200DBD02E /* distancecommand.cpp */; };
                37D928550F21331F001D4494 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927B80F21331F001D4494 /* ace.cpp */; };
                37AFC71E0F445386005F492D /* sharedsobscollectsummary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedsobscollectsummary.cpp; sourceTree = SOURCE_ROOT; };
                37B28F660F27590100808A62 /* deconvolutecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deconvolutecommand.h; sourceTree = SOURCE_ROOT; };
                37B28F670F27590100808A62 /* deconvolutecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deconvolutecommand.cpp; sourceTree = SOURCE_ROOT; };
+               37B73C741004BEFD008C4B41 /* listseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = listseqscommand.h; sourceTree = "<group>"; };
+               37B73C751004BEFD008C4B41 /* listseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = listseqscommand.cpp; sourceTree = "<group>"; };
                37C1D9710F86506E0059E3F0 /* binsequencecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = binsequencecommand.h; sourceTree = SOURCE_ROOT; };
                37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = binsequencecommand.cpp; sourceTree = SOURCE_ROOT; };
                37C753CC0FB3415200DBD02E /* distancecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = distancecommand.h; sourceTree = SOURCE_ROOT; };
                                37D927E30F21331F001D4494 /* helpcommand.cpp */,
                                375873F40F7D648F0040F377 /* libshuffcommand.h */,
                                375873F30F7D648F0040F377 /* libshuffcommand.cpp */,
+                               37B73C741004BEFD008C4B41 /* listseqscommand.h */,
+                               37B73C751004BEFD008C4B41 /* listseqscommand.cpp */,
                                21E859D60FC4632E005E1A48 /* matrixoutputcommand.h */,
                                21E859D70FC4632E005E1A48 /* matrixoutputcommand.cpp */,
                                7E5A17AD0FE57292003C6A03 /* mergefilecommand.h */,
                                378599100FDD7E8E00EF9D03 /* optionparser.cpp in Sources */,
                                7E5A17AF0FE57292003C6A03 /* mergefilecommand.cpp in Sources */,
                                379D3D510FF90E090068C1C0 /* chimeraseqscommand.cpp in Sources */,
+                               37B73C761004BEFD008C4B41 /* listseqscommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 0f5c86f5bf1cdc73f048a4c6a7a52b928875bbee..5af8e959a6caa3edd98a33a21fa680bd5815d2dc 100644 (file)
@@ -195,10 +195,12 @@ int AlignCommand::execute(){
                        ifstream inFASTA;
                        openInputFile(candidateFileName, inFASTA);
                        
+                       string input;
                        while(!inFASTA.eof()){
-                               char c = inFASTA.get();
-                               if(c == '>'){   int pos = inFASTA.tellg(); positions.push_back(pos-1);  }
-                               while (!inFASTA.eof())  {       c = inFASTA.get(); if (c == 10 || c == 13){     break;  }       } // get rest of line if there's any crap there
+                               getline(inFASTA, input);
+                               if (input.length() != 0) {
+                                       if(input[0] == '>'){    int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);       }
+                               }
                        }
                        inFASTA.close();
                        
index 182c25e4b909ba2dd49fc1971beb7abee3b99188..f19fe11bcb907b86bc67429388fddfaa485601aa 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include "chimeraseqscommand.h"
+#include "eachgapdist.h"
 
 //***************************************************************************************************************
 
@@ -20,7 +21,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method" };
+                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment" };
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -39,16 +40,24 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true;  }     
                        
                        string temp;
-                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
+                       temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "T"; }
                        filter = isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "correction", false);               if (temp == "not found") { temp = "T"; }
                        correction = isTrue(temp);
                        
-                       temp = validParameter.validFile(parameters, "processors", true);                if (temp == "not found") { temp = "1"; }
+                       temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
                        convert(temp, processors);
                        
+                       temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
+                       convert(temp, window);
+                                       
+                       temp = validParameter.validFile(parameters, "increment", false);                        if (temp == "not found") { temp = "10"; }
+                       convert(temp, increment);
+                               
                        method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "bellerophon"; }
+                       
+                       if (method != "bellerophon") { mothurOut(method + " is not a valid method."); mothurOutEndLine();  abort = true; }
 
                }
        }
@@ -77,6 +86,11 @@ void ChimeraSeqsCommand::help(){
                exit(1);
        }
 }
+//********************************************************************************************************************
+//sorts highest score to lowest
+inline bool comparePref(Preference left, Preference right){
+       return (left.score[0] > right.score[0]);        
+}
 
 //***************************************************************************************************************
 
@@ -89,9 +103,10 @@ int ChimeraSeqsCommand::execute(){
                
                if (abort == true) { return 0; }
                
+               
                //do soft filter
                if (filter)  {
-                       string optionString = "fasta=" + fastafile + ", soft=50.0, vertical=F";
+                       string optionString = "fasta=" + fastafile + ", soft=50, vertical=F";
                        filterSeqs = new FilterSeqsCommand(optionString);
                        filterSeqs->execute();
                        delete filterSeqs;
@@ -100,76 +115,169 @@ int ChimeraSeqsCommand::execute(){
                        fastafile = getRootName(fastafile) + "filter.fasta";
                }
                
+               distCalculator = new eachGapDist();
+               
                //read in sequences
                readSeqs();
                
-               //int numSeqs = seqs.size();
+               int numSeqs = seqs.size();
                
-               //find average midpoint of seqs
-               midpoint = findAverageMidPoint();
+               if (numSeqs == 0) { mothurOut("Error in reading you sequences."); mothurOutEndLine(); return 0; }
                
-               //create 2 vectors of sequences, 1 for left side and one for right side
-               vector<Sequence> left;  vector<Sequence> right;
+               //set default window to 25% of sequence length
+               string seq0 = seqs[0].getAligned();
+               if (window == 0) { window = seq0.length() / 4;  }
+               else if (window > (seq0.length() / 2)) {  
+                       mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); mothurOutEndLine();
+                       window = (seq0.length() / 2);
+               }
                
-               for (int i = 0; i < seqs.size(); i++) {
-                       //save left side
-                       string seqLeft = seqs[i].getAligned();
-                       seqLeft = seqLeft.substr(0, midpoint);
-                       Sequence tempLeft(seqs[i].getName(), seqLeft);
-                       left.push_back(tempLeft);
+               if (increment > (seqs[0].getAlignLength() - (2*window))) { 
+                       if (increment != 10) {
                        
-                       //save right side
-                       string seqRight = seqs[i].getAligned();
-                       seqRight = seqRight.substr(midpoint+1, (seqRight.length()-midpoint-1));
-                       Sequence tempRight(seqs[i].getName(), seqRight);
-                       right.push_back(tempRight);
+                               mothurOut("You have selected a increment that is too large. I will use the default."); mothurOutEndLine();
+                               increment = 10;
+                               if (increment > (seqs[0].getAlignLength() - (2*window))) {  increment = 0;  }
+                               
+                       }else{ increment = 0; }
+               }
+cout << "increment = " << increment << endl;           
+               if (increment == 0) { iters = 1; }
+               else { iters = ((seqs[0].getAlignLength() - (2*window)) / increment); }
+               
+               //initialize pref
+               pref.resize(numSeqs);  
+               
+               for (int i = 0; i < numSeqs; i++ ) { 
+                       pref[i].leftParent.resize(2); pref[i].rightParent.resize(2); pref[i].score.resize(2);   pref[i].closestLeft.resize(2); pref[i].closestRight.resize(3);
+                       pref[i].name = seqs[i].getName();
+                       pref[i].score[0] = 0.0;  pref[i].score[1] = 0.0; 
+                       pref[i].closestLeft[0] = 100000.0;  pref[i].closestLeft[1] = 100000.0;  
+                       pref[i].closestRight[0] = 100000.0;  pref[i].closestRight[1] = 100000.0;  
                }
+
+               int midpoint = window;
+               int count = 0;
+               while (count < iters) {
+                               
+                               //create 2 vectors of sequences, 1 for left side and one for right side
+                               vector<Sequence> left;  vector<Sequence> right;
+                               
+                               for (int i = 0; i < seqs.size(); i++) {
+//cout << "whole = " << seqs[i].getAligned() << endl;
+                                       //save left side
+                                       string seqLeft = seqs[i].getAligned().substr(midpoint-window, window);
+                                       Sequence tempLeft;
+                                       tempLeft.setName(seqs[i].getName());
+                                       tempLeft.setAligned(seqLeft);
+                                       left.push_back(tempLeft);
+//cout << "left = " << tempLeft.getAligned() << endl;                  
+                                       //save right side
+                                       string seqRight = seqs[i].getAligned().substr(midpoint, window);
+                                       Sequence tempRight;
+                                       tempRight.setName(seqs[i].getName());
+                                       tempRight.setAligned(seqRight);
+                                       right.push_back(tempRight);
+//cout << "right = " << seqRight << endl;      
+                               }
+                               
+                               //adjust midpoint by increment
+                               midpoint += increment;
+                               
+                               
+                               //this should be parallelized
+                               //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
+                               //create a matrix containing the distance from left to left and right to right
+                               //calculate distances
+                               SparseMatrix* SparseLeft = new SparseMatrix();
+                               SparseMatrix* SparseRight = new SparseMatrix();
+                               
+                               createSparseMatrix(0, left.size(), SparseLeft, left);
+                               createSparseMatrix(0, right.size(), SparseRight, right);
+                               
+                               vector<SeqMap> distMapRight;
+                               vector<SeqMap> distMapLeft;
+                               
+                               // Create a data structure to quickly access the distance information.
+                               // It consists of a vector of distance maps, where each map contains
+                               // all distances of a certain sequence. Vector and maps are accessed
+                               // via the index of a sequence in the distance matrix
+                               distMapRight = vector<SeqMap>(numSeqs); 
+                               distMapLeft = vector<SeqMap>(numSeqs); 
+                               //cout << "left" << endl << endl;
+                               for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
+                                       distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
+                                       //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+                               }
+                               //cout << "right" << endl << endl;
+                               for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
+                                       distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
+                                       //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+                               }
+                               
+                               delete SparseLeft;
+                               delete SparseRight;
                                
-               //this should be parallelized
-               //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
-               //create a matrix containing the distance from left to left and right to right
-               //calculate distances
-               SparseMatrix* SparseLeft = new SparseMatrix();
-               SparseMatrix* SparseRight = new SparseMatrix();
+                               
+                               //fill preference structure
+                               generatePreferences(distMapLeft, distMapRight, midpoint);
+                               
+                               count++;
+                               
+               }
                
-               createSparseMatrix(0, left.size(), SparseLeft, left);
-               createSparseMatrix(0, right.size(), SparseRight, right);
+               delete distCalculator;
                
+               //find average pref score across windows
+               //if (increment != 0) {
+                
+                       //for (int i = 0; i < pref.size(); i++) {
+                               //pref[i].score[0] = pref[i].score[0] / iters;
+                       //}
+               //}
                
-               //vector<SeqMap> distMapRight;
-               //vector<SeqMap> distMapLeft;
+               //sort Preferences highest to lowest
+               sort(pref.begin(), pref.end(), comparePref);
                
-               // Create a data structure to quickly access the distance information.
-               // It consists of a vector of distance maps, where each map contains
-               // all distances of a certain sequence. Vector and maps are accessed
-               // via the index of a sequence in the distance matrix
-               //distMapRight = vector<SeqMap>(globaldata->gListVector->size()); 
-               //distMapLeft = vector<SeqMap>(globaldata->gListVector->size()); 
-               for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
-                       //distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
-               }
-               for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
-                       //distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
-               }
-
+               string outputFileName = getRootName(fastafile) + "chimeras";
+               ofstream out;
+               openOutputFile(outputFileName, out);
                
-               //fill preference structure
-               //generatePreferences(distMapLeft, distMapRight);
+               int above1 = 0;
+               out << "Name\tScore\tLeft\tRight\t" << endl;
+               //output prefenence structure to .chimeras file
+               for (int i = 0; i < pref.size(); i++) {
+                       out << pref[i].name << '\t' << pref[i].score[0] << '\t' << pref[i].leftParent[0] << '\t' << pref[i].rightParent[0] << endl;
+                       
+                       //calc # of seqs with preference above 1.0
+                       if (pref[i].score[0] > 1.0) { 
+                               above1++; 
+                               mothurOut(pref[i].name + " is a suspected chimera at breakpoint " + toString(pref[i].midpoint)); mothurOutEndLine();
+                               mothurOut("It's score is " + toString(pref[i].score[0]) + " with suspected left parent " + pref[i].leftParent[0] + " and right parent " + pref[i].rightParent[0]); mothurOutEndLine();
+                       }
+                       
+                       
+               }
                
-                               
-               //output results to screen                                              
+               //output results to screen
                mothurOutEndLine();
-               mothurOut("\t\t"); mothurOutEndLine();
-               //mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0])); mothurOutEndLine();
-               //mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25])); mothurOutEndLine();
-               //mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25])); mothurOutEndLine();
-               //mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50])); mothurOutEndLine();
-               //mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75])); mothurOutEndLine();
-               //mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5])); mothurOutEndLine();
-               //mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100])); mothurOutEndLine();
-               //mothurOut("# of Seqs:\t" + toString(numSeqs)); mothurOutEndLine();
+               mothurOut("Sequence with preference score above 1.0: " + toString(above1)); mothurOutEndLine();
+               int spot;
+               spot = pref.size()-1;
+               mothurOut("Minimum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.975;
+               mothurOut("2.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.75;
+               mothurOut("25%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.50;
+               mothurOut("Median: \t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.25;
+               mothurOut("75%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.025;
+               mothurOut("97.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = 0;
+               mothurOut("Maximum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
                
-               //outSummary.close();
                return 0;
        }
        catch(exception& e) {
@@ -188,6 +296,8 @@ void ChimeraSeqsCommand::readSeqs(){
                while(!inFASTA.eof()){
                        Sequence current(inFASTA);
                        
+                       if (current.getAligned() == "") { current.setAligned(current.getUnaligned()); }
+                       
                        seqs.push_back(current);
                        
                        gobble(inFASTA);
@@ -201,53 +311,6 @@ void ChimeraSeqsCommand::readSeqs(){
        }
 }
 
-
-//***************************************************************************************************************
-int ChimeraSeqsCommand::findAverageMidPoint(){
-       try {
-               int totalMids = 0;
-               int averageMid = 0;
-               
-               //loop through the seqs and find midpoint
-               for (int i = 0; i < seqs.size(); i++) {
-                       
-                       //get unaligned sequence
-                       seqs[i].setUnaligned(seqs[i].getUnaligned());  //if you read an aligned file the unaligned is really aligned, so we need to make sure its unaligned
-                       
-                       string unaligned = seqs[i].getUnaligned();
-                       string aligned = seqs[i].getAligned();
-                       
-                       //find midpoint of this seq
-                       int count = 0;
-                       int thismid = 0;
-                       for (int j = 0; j < aligned.length(); j++) {
-                               
-                               thismid++;
-                               
-                               //if you are part of the unaligned sequence increment
-                               if (isalpha(aligned[j])) {  count++;  }
-                               
-                               //if you have reached the halfway point stop
-                               if (count >= (unaligned.length() / 2)) { break; }
-                       }
-                       
-                       //add this mid to total
-                       totalMids += thismid;
-               
-               }
-               
-               averageMid = (totalMids / seqs.size());
-               
-               return averageMid; 
-       
-       
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSeqsCommand", "findAverageMidPoint");
-               exit(1);
-       }
-}
-
 /***************************************************************************************************************/
 int ChimeraSeqsCommand::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* sparse, vector<Sequence> s){
        try {
@@ -256,9 +319,9 @@ int ChimeraSeqsCommand::createSparseMatrix(int startSeq, int endSeq, SparseMatri
                        
                        for(int j=0;j<i;j++){
                        
-                               //distCalculator->calcDist(s.get(i), s.get(j));
+                               distCalculator->calcDist(s[i], s[j]);
                                float dist = distCalculator->getDist();
-                               
+                       
                                PCell temp(i, j, dist);
                                sparse->addCell(temp);
                                
@@ -273,23 +336,123 @@ int ChimeraSeqsCommand::createSparseMatrix(int startSeq, int endSeq, SparseMatri
                exit(1);
        }
 }
-/***************************************************************************************************************
-void ChimeraSeqsCommand::generatePreferences(vector<SeqMap> left, vector<SeqMap> right){
+/***************************************************************************************************************/
+void ChimeraSeqsCommand::generatePreferences(vector<SeqMap> left, vector<SeqMap> right, int mid){
        try {
+               
+               float dme = 0.0;
+               SeqMap::iterator itR;
+               SeqMap::iterator itL;
+               
+               //initialize pref[i]
+               for (int i = 0; i < pref.size(); i++) {
+                       pref[i].score[1] = 0.0;
+                       pref[i].closestLeft[1] = 100000.0; 
+                       pref[i].closestRight[1] = 100000.0; 
+                       pref[i].leftParent[1] = "";
+                       pref[i].rightParent[1] = "";
+               }
 
+//cout << "in generate left.size() = " << left.size() << endl;         
                for (int i = 0; i < left.size(); i++) {
                        
-                       int iscore = 0;
-                       float closestLeft = 100000.0;
-                       float closestRight = 100000.0;
+                       SeqMap currentLeft = left[i];    //example i = 3;   currentLeft is a map of 0 to the distance of sequence 3 to sequence 0,
+                                                                                               //                                                                              1 to the distance of sequence 3 to sequence 1,
+                                                                                               //                                                                              2 to the distance of sequence 3 to sequence 2.
+                       SeqMap currentRight = right[i];         // same as left but with distances on the right side.
                        
-                       for (int j = 0; j < left.size(); j++) {
+                       for (int j = 0; j < i; j++) {
                                
-                               //iscore += abs(left
-                       
+                               itL = currentLeft.find(j);
+                               itR = currentRight.find(j);
+cout << " i = " << i << " j = " << j << " distLeft = " << itL->second << endl;
+cout << " i = " << i << " j = " << j << " distright = " << itR->second << endl;
+                               
+                               //if you can find this entry update the preferences
+                               if ((itL != currentLeft.end()) && (itR != currentRight.end())) {
+                               
+                                       if (!correction) {
+                                               pref[i].score[1] += abs((itL->second - itR->second));
+                                               pref[j].score[1] += abs((itL->second - itR->second));
+cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
+cout << "abs = " << abs((itL->second - itR->second)) << endl;
+cout << i << " score = " << pref[i].score[1] << endl;
+cout << j << " score = " << pref[j].score[1] << endl;
+                                       }else {
+                                               pref[i].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
+                                               pref[j].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
+cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
+cout << "abs = " << abs((sqrt(itL->second) - sqrt(itR->second))) << endl;
+cout << i << " score = " << pref[i].score[1] << endl;
+cout << j << " score = " << pref[j].score[1] << endl;
+                                       }
+cout << "pref[" << i << "].closestLeft[1] = "  <<      pref[i].closestLeft[1] << " parent = " << pref[i].leftParent[1] << endl;                        
+                                       //are you the closest left sequence
+                                       if (itL->second < pref[i].closestLeft[1]) {  
+
+                                               pref[i].closestLeft[1] = itL->second;
+                                               pref[i].leftParent[1] = seqs[j].getName();
+cout << "updating closest left to " << pref[i].leftParent[1] << endl;
+                                       }
+cout << "pref[" << j << "].closestLeft[1] = "  <<      pref[j].closestLeft[1] << " parent = " << pref[j].leftParent[1] << endl;        
+                                       if (itL->second < pref[j].closestLeft[1]) { 
+                                               pref[j].closestLeft[1] = itL->second;
+                                               pref[j].leftParent[1] = seqs[i].getName();
+cout << "updating closest left to " << pref[j].leftParent[1] << endl;
+                                       }
+                                       
+                                       //are you the closest right sequence
+                                       if (itR->second < pref[i].closestRight[1]) {   
+                                               pref[i].closestRight[1] = itR->second;
+                                               pref[i].rightParent[1] = seqs[j].getName();
+                                       }
+                                       if (itR->second < pref[j].closestRight[1]) {   
+                                               pref[j].closestRight[1] = itR->second;
+                                               pref[j].rightParent[1] = seqs[i].getName();
+                                       }
+                                       
+                               }
                        }
                
                }
+               
+               
+                 
+               //calculate the dme
+               int count0 = 0;
+               for (int i = 0; i < pref.size(); i++) {  dme += pref[i].score[1];  if (pref[i].score[1] == 0.0) { count0++; }  }
+               
+               float expectedPercent = 1 / (float) (pref.size() - count0);
+cout << endl << "dme = " << dme << endl;
+               //recalculate prefernences based on dme
+               for (int i = 0; i < pref.size(); i++) {
+cout << "unadjusted pref " << i << " = " << pref[i].score[1] << endl;  
+                       // gives the actual percentage of the dme this seq adds
+                       pref[i].score[1] = pref[i].score[1] / dme;
+                       
+                       //how much higher or lower is this than expected
+                       pref[i].score[1] = pref[i].score[1] / expectedPercent;
+                       
+                       //so a non chimeric sequence would be around 1, and a chimeric would be signifigantly higher.
+cout << "adjusted pref " << i << " = " << pref[i].score[1] << endl;                                    
+               }
+               
+               //is this score bigger then the last score
+               for (int i = 0; i < pref.size(); i++) {  
+                       
+                       //update biggest score
+                       if (pref[i].score[1] > pref[i].score[0]) {
+                               pref[i].score[0] = pref[i].score[1];
+                               pref[i].leftParent[0] = pref[i].leftParent[1];
+                               pref[i].rightParent[0] = pref[i].rightParent[1];
+                               pref[i].closestLeft[0] = pref[i].closestLeft[1];
+                               pref[i].closestRight[0] = pref[i].closestRight[1];
+                               pref[i].midpoint = mid;
+                       }
+                       
+                       //total of preference scores across windows
+                       //pref[i].score[0] += pref[i].score[1];
+               }
 
        }
        catch(exception& e) {
index 5b55120e0d0981b432f71332dc1db57e8fae5ce7..2398ce63b257935d857e06bf6525b6b94e8efd05 100644 (file)
 typedef list<PCell>::iterator MatData;
 typedef map<int, float> SeqMap;  //maps sequence to all distance for that seqeunce
 
+struct Preference {
+               string name;
+               vector<string> leftParent; //keep the name of closest left associated with the two scores
+               vector<string> rightParent; //keep the name of closest right associated with the two scores
+               vector<float> score;  //so you can keep last score and calc this score and keep whichever is bigger.
+               vector<float> closestLeft;  //keep the closest left associated with the two scores
+               vector<float> closestRight; //keep the closest right associated with the two scores
+               int midpoint;
+
+};
+
+
+
 /***********************************************************/
 
 class ChimeraSeqsCommand : public Command {
@@ -29,28 +42,21 @@ public:
        int execute();
        void help();
        
+               
 private:
-       //Dist* distCalculator;
        
-       struct Preference {
-               string leftParent;
-               string rightParent;
-               float score;
-
-       };
-
        Dist* distCalculator;
        bool abort;
        string method, fastafile;
        bool filter, correction;
-       int processors, midpoint;
+       int processors, midpoint, averageLeft, averageRight, window, iters, increment;
        FilterSeqsCommand* filterSeqs;
+       ListVector* list;
        vector<Sequence> seqs;
        vector<Preference> pref;
        
-       int findAverageMidPoint();
        void readSeqs();
-       void generatePreferences(SparseMatrix*, SparseMatrix*);
+       void generatePreferences(vector<SeqMap>, vector<SeqMap>, int);
        int createSparseMatrix(int, int, SparseMatrix*, vector<Sequence>);
        
 
index 43f6769f92de125e9b76074e2a2d91cdb9ca3889..3d90269590b3f42e2d140821ba39220ccde9c251 100644 (file)
@@ -49,6 +49,8 @@
 #include "reversecommand.h"
 #include "trimseqscommand.h"
 #include "mergefilecommand.h"
+#include "chimeraseqscommand.h"
+#include "listseqscommand.h"
 
 /***********************************************************/
 
@@ -96,6 +98,8 @@ CommandFactory::CommandFactory(){
        commands["screen.seqs"]                 = "screen.seqs";
        commands["reverse.seqs"]                = "reverse.seqs";
        commands["trim.seqs"]                   = "trim.seqs";
+       commands["chimera.seqs"]                = "chimera.seqs";
+       commands["list.seqs"]                   = "list.seqs";
        commands["quit"]                                = "quit"; 
 
 }
@@ -145,13 +149,15 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "tree.shared")                   {   command = new TreeGroupCommand(optionString);                       }
                else if(commandName == "dist.shared")                   {   command = new MatrixOutputCommand(optionString);            }
                else if(commandName == "bootstrap.shared")              {   command = new BootSharedCommand(optionString);                      }
-               //else if(commandName == "consensus")                           {   command = new ConcensusCommand(optionString);                       }
+               //else if(commandName == "consensus")                   {   command = new ConcensusCommand(optionString);                       }
                else if(commandName == "dist.seqs")                             {   command = new DistanceCommand(optionString);                        }
                else if(commandName == "align.seqs")                    {   command = new AlignCommand(optionString);                           }
                else if(commandName == "summary.seqs")                  {       command = new SeqSummaryCommand(optionString);                  }
                else if(commandName == "screen.seqs")                   {       command = new ScreenSeqsCommand(optionString);                  }
                else if(commandName == "reverse.seqs")                  {       command = new ReverseSeqsCommand(optionString);                 }
                else if(commandName == "trim.seqs")                             {       command = new TrimSeqsCommand(optionString);                    }
+               else if(commandName == "chimera.seqs")                  {       command = new ChimeraSeqsCommand(optionString);                 }
+               else if(commandName == "list.seqs")                             {       command = new ListSeqsCommand(optionString);                    }
                else if(commandName == "merge.files")                   {       command = new MergeFileCommand(optionString);                   }
                else                                                                                    {       command = new NoCommand(optionString);                                  }
 
index d21e55ca1acb4e950b1126ade38a3237a7684ad9..ac9b45302c75cd7cec5ac28613eeaf2c344a21f7 100644 (file)
@@ -23,6 +23,7 @@ public:
                
                string seqA = A.getAligned();
                string seqB = B.getAligned();
+
                int alignLength = seqA.length();
                
                for(int i=0; i<alignLength; i++){
@@ -47,6 +48,7 @@ public:
                
                if(length == 0) {       dist = 1.0000;                                                          }
                else                    {       dist = ((double)diff  / (double)length);        }
+
                
        }
 };
index ff09a36c31727bb6d04e3b2dbe50b50574a2a19f..759bcae6fd7631988bf15407e7234330c2c9f659 100644 (file)
@@ -42,7 +42,7 @@ public:
        TreeMap* gTreemap;
        SequenceDB* gSequenceDB;
        string inputFileName, argv;
-       bool allLines, runParse;
+       bool allLines, runParse, jumble;
        vector<string>  Estimators, Groups; //holds estimators to be used
        set<int> lines; //hold lines to be used
        set<string> labels; //holds labels to be used
@@ -57,18 +57,13 @@ public:
        string getNameFile();   //do we need this?
        string getGroupFile();  //do we need this?
        string getOrderFile();
-//     string getFastaFile();
        string getTreeFile();
        string getSharedFile();
        string getFormat();     //do we need this?
-//     string getCandidateFile();
-//     string getTemplateFile();
+
 
        void setListFile(string);
-//     void setFastaFile(string);
        void setTreeFile(string);
-//     void setCandidateFile(string);
-//     void setTemplateFile(string);
        void setGroupFile(string);              //do we need this?
        void setPhylipFile(string);
        void setColumnFile(string);
diff --git a/listseqscommand.cpp b/listseqscommand.cpp
new file mode 100644 (file)
index 0000000..a7d79f3
--- /dev/null
@@ -0,0 +1,240 @@
+/*
+ *  listseqscommand.cpp
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 7/8/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "listseqscommand.h"
+#include "sequence.hpp"
+
+//**********************************************************************************************************************
+
+ListSeqsCommand::ListSeqsCommand(string option){
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta","name", "group", "align" };
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //check for required parameters
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") {  fastafile = "";  }        
+                       
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { abort = true; }
+                       else if (namefile == "not found") {  namefile = "";  }  
+                       
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { abort = true; }
+                       else if (groupfile == "not found") {  groupfile = "";  }        
+                       
+                       alignfile = validParameter.validFile(parameters, "align", true);
+                       if (alignfile == "not open") { abort = true; }
+                       else if (alignfile == "not found") {  alignfile = "";  }
+                       
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == ""))  { mothurOut("You must provide a file."); mothurOutEndLine(); abort = true; }
+                       
+                       if (parameters.size() > 1) { mothurOut("You may only enter one file."); mothurOutEndLine(); abort = true;  }
+               }
+
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "ListSeqsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void ListSeqsCommand::help(){
+       try {
+               mothurOut("The list.seqs command reads a fasta, name, group or alignreport file and outputs a .accnos file containing sequence names.\n");
+               mothurOut("The list.seqs command parameters are fasta, name, group and align.  You must provide one of these parameters.\n");
+               mothurOut("The list.seqs command should be in the following format: list.seqs(fasta=yourFasta).\n");
+               mothurOut("Example list.seqs(fasta=amazon.fasta).\n");
+               mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+int ListSeqsCommand::execute(){
+       try {
+               
+               if (abort == true) { return 0; }
+               
+               //read functions fill names vector
+               if (fastafile != "")            {       inputFileName = fastafile;      readFasta();    }
+               else if (namefile != "")        {       inputFileName = namefile;       readName();             }
+               else if (groupfile != "")       {       inputFileName = groupfile;      readGroup();    }
+               else if (alignfile != "")       {       inputFileName = alignfile;      readAlign();    }
+               
+               //sort in alphabetical order
+               sort(names.begin(), names.end());
+               
+               string outputFileName = getRootName(inputFileName) + "accnos";
+               ofstream out;
+               openOutputFile(outputFileName, out);
+               
+               //output to .accnos file
+               for (int i = 0; i < names.size(); i++) {
+                       out << names[i] << endl;
+               }
+               out.close();
+               
+               return 0;               
+       }
+
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "execute");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+void ListSeqsCommand::readFasta(){
+       try {
+               
+               ifstream in;
+               openInputFile(fastafile, in);
+               string name;
+               
+               while(!in.eof()){
+                       Sequence currSeq(in);
+                       name = currSeq.getName();
+                       
+                       names.push_back(name);
+                       
+                       gobble(in);
+               }
+               in.close();             
+
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "readFasta");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+void ListSeqsCommand::readName(){
+       try {
+               
+               ifstream in;
+               openInputFile(namefile, in);
+               string name, firstCol, secondCol;
+               
+               while(!in.eof()){
+
+                       in >> firstCol;                         
+                       in >> secondCol;                        
+                       
+                       //parse second column saving each name
+                       while (secondCol.find_first_of(',') != -1) { 
+                               name = secondCol.substr(0,secondCol.find_first_of(','));
+                               secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
+                               names.push_back(name);
+                       }
+                       
+                       //get name after last ,
+                       names.push_back(secondCol);
+                       
+                       gobble(in);
+               }
+               in.close();
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "readName");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+void ListSeqsCommand::readGroup(){
+       try {
+       
+               ifstream in;
+               openInputFile(groupfile, in);
+               string name, group;
+               
+               while(!in.eof()){
+
+                       in >> name;                             //read from first column
+                       in >> group;                    //read from second column
+                       
+                       names.push_back(name);
+                                       
+                       gobble(in);
+               }
+               in.close();
+
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "readGroup");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+//alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
+void ListSeqsCommand::readAlign(){
+       try {
+       
+               ifstream in;
+               openInputFile(alignfile, in);
+               string name, junk;
+               
+               //read column headers
+               for (int i = 0; i < 16; i++) {  
+                       if (!in.eof())  {       in >> junk;             }
+                       else                    {       break;                  }
+               }
+               
+               
+               while(!in.eof()){
+
+                       in >> name;                             //read from first column
+                       
+                       //read rest
+                       for (int i = 0; i < 15; i++) {  
+                               if (!in.eof())  {       in >> junk;             }
+                               else                    {       break;                  }
+                       }
+                       
+                       names.push_back(name);
+                                       
+                       gobble(in);
+               }
+               in.close();
+
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "ListSeqsCommand", "readAlign");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
diff --git a/listseqscommand.h b/listseqscommand.h
new file mode 100644 (file)
index 0000000..4e0febd
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef LISTSEQSCOMMAND_H
+#define LISTSEQSCOMMAND_H
+
+/*
+ *  listseqscommand.h
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 7/8/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+
+class ListSeqsCommand : public Command {
+       
+       public:
+       
+               ListSeqsCommand(string);        
+               ~ListSeqsCommand(){};
+               int execute();
+               void help();    
+               
+       private:
+               vector<string> names;
+               string fastafile, namefile, groupfile, alignfile, inputFileName;
+               bool abort;
+               
+               void readFasta();
+               void readName();
+               void readGroup();
+               void readAlign();
+               
+};
+
+#endif
+
index 4d22e98e2a2bb3eac4dcbbe98b81694b58a00bcb..5d9dc96bc65428391374abb47e3d8c61e114000b 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -231,7 +231,7 @@ inline void errorOut(exception& e, string object, string function) {
        
                mothurOut("Error: ");
                mothurOut(toString(e.what()));
-               mothurOut(" has occurred in the " + object + " class function " + function + "Please contact Pat Schloss at pschloss@microbio.umass.edu, and be sure to include the mothur.logFile with your inquiry.");
+               mothurOut(" has occurred in the " + object + " class function " + function + "Please contact Pat Schloss at pschloss@microbio.umass.edu, and be sure to include the mothur.logFile with your inquiry.");
                mothurOutEndLine();
        
 }
index d7be4dd57c272b69dc0aa09fd35b7d4c2f9d4f0b..aa7d3934ae11c3c4490ed3beaaceaf320ddfc6be 100644 (file)
@@ -41,7 +41,7 @@ void NastReport::print(){
        candidateReportFile << alignmentMethod << '\t' << candidateStartPosition << "\t" << candidateEndPosition << '\t';
        candidateReportFile << templateStartPosition << "\t" << templateEndPosition << '\t';
        candidateReportFile << pairwiseAlignmentLength << '\t' << totalGapsInQuery << '\t' << totalGapsInTemplate << '\t';
-       candidateReportFile << longestInsert << '\t';;
+       candidateReportFile << longestInsert << '\t';
        candidateReportFile << setprecision(2) << similarityToTemplate;
        
        candidateReportFile << endl;
index f1b43fc26553cd739d32d2baeff36bc457df006a..5cd9b99c5051be03b00757df6a99bfb479adabfe 100644 (file)
@@ -81,14 +81,19 @@ try {
                        rcd->registerDisplay(displays[i]);
                }
                
+               //if jumble is false all iters will be the same
+               if (globaldata->jumble == false)  {  nIters = 1;  }
+               
                for(int iter=0;iter<nIters;iter++){
                
                        for(int i=0;i<displays.size();i++){
                                displays[i]->init(label);                 
                        }
-
-                       //randomize the groups
-                       random_shuffle(lookup.begin(), lookup.end());
+                       
+                       if (globaldata->jumble == true)  {
+                               //randomize the groups
+                               random_shuffle(lookup.begin(), lookup.end());
+                       }
                        
                        //make merge the size of lookup[0]
                        SharedRAbundVector* merge = new SharedRAbundVector(lookup[0]->size());
index aff5cb63effc7e806a368d3bd08545d2301bd0ba..9507d5af84eeadec5046a4fe1967eda508913968 100644 (file)
@@ -5,6 +5,7 @@
 #include "raredisplay.h"
 #include "ordervector.hpp"
 #include "mothur.h"
+#include "globaldata.hpp"
 
 
 class Rarefact {
@@ -13,13 +14,14 @@ public:
        Rarefact(OrderVector* o, vector<Display*> disp) :
                        numSeqs(o->getNumSeqs()), order(o), displays(disp), label(o->getLabel())  {};
        Rarefact(vector<SharedRAbundVector*> shared, vector<Display*> disp) :
-                                        lookup(shared), displays(disp) {};
+                                        lookup(shared), displays(disp) {  globaldata = GlobalData::getInstance();  };
 
        ~Rarefact(){};
        void getCurve(int, int);
        void getSharedCurve(int, int);
        
 private:
+       GlobalData* globaldata;
        OrderVector* order;
        vector<Display*> displays;
        int numSeqs, numGroupComb;
index 71bfaf1d0f79b4866cebe71309e058a61bd2535e..5914f156a1a01831ce7cf6d2f3c52696c529f466 100644 (file)
@@ -29,7 +29,7 @@ RareFactSharedCommand::RareFactSharedCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"iters","line","label","calc","groups"};
+                       string Array[] =  {"iters","line","label","calc","groups", "jumble"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -92,6 +92,11 @@ RareFactSharedCommand::RareFactSharedCommand(string option){
                        temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
                        convert(temp, nIters); 
                        
+                       temp = validParameter.validFile(parameters, "jumble", false);                   if (temp == "not found") { temp = "T"; }
+                       if (isTrue(temp)) { jumble = true; }
+                       else { jumble = false; }
+                       globaldata->jumble = jumble;
+                       
                        if (abort == false) {
                        
                                string fileNameRoot = getRootName(globaldata->inputFileName);
@@ -125,12 +130,12 @@ RareFactSharedCommand::RareFactSharedCommand(string option){
 void RareFactSharedCommand::help(){
        try {
                mothurOut("The rarefaction.shared command can only be executed after a successful read.otu command.\n");
-               mothurOut("The rarefaction.shared command parameters are label, line, iters, groups and calc.  No parameters are required, but you may not use \n");
+               mothurOut("The rarefaction.shared command parameters are label, line, iters, groups, jumble and calc.  No parameters are required, but you may not use \n");
                mothurOut("both the line and label parameters at the same time. The rarefaction command should be in the following format: \n");
-               mothurOut("rarefaction.shared(label=yourLabel, line=yourLines, iters=yourIters, calc=yourEstimators, groups=yourGroups).\n");
-               mothurOut("Example rarefaction.shared(label=unique-.01-.03, line=0-5-10, iters=10000, groups=B-C, calc=sharedobserved).\n");
+               mothurOut("rarefaction.shared(label=yourLabel, line=yourLines, iters=yourIters, calc=yourEstimators, jumble=yourJumble, groups=yourGroups).\n");
+               mothurOut("Example rarefaction.shared(label=unique-.01-.03, line=0-5-10, iters=10000, groups=B-C, jumble=T, calc=sharedobserved).\n");
                mothurOut("The default values for iters is 1000, freq is 100, and calc is sharedobserved which calculates the shared rarefaction curve for the observed richness.\n");
-               mothurOut("The default value for groups is all the groups in your groupfile.\n");
+               mothurOut("The default value for groups is all the groups in your groupfile, and jumble is true.\n");
                validCalculator->printCalc("sharedrarefaction", cout);
                mothurOut("The label and line parameters are used to analyze specific lines in your input.\n");
                mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
index c7290f4d85ec4f8bd671d4df914cd2738dd3c4c0..b0109579d81ba55ba14de4e92d9a4f1e3cc1efca 100644 (file)
@@ -49,7 +49,7 @@ private:
        int freq, nIters;
        string format;
 
-       bool abort, allLines;
+       bool abort, allLines, jumble;
        set<int> lines; //hold lines to be used
        set<string> labels; //holds labels to be used
        string line, label, calc, groups;
index aa1f1c7eae3c0b1cb08105a070a172169ecf320e..685f0726e6ec5042d32379874e54b83e8a6eff26 100644 (file)
@@ -53,7 +53,7 @@ Sequence::Sequence(ifstream& fastaFile){
                }
        }
 
-       if(sequence.find_first_of('-') != string::npos){        //      if there are any gaps in the sequence, assume that it is
+       if((sequence.find_first_of('-') != string::npos) || (sequence.find_first_of('.') != string::npos)) {    //      if there are any gaps in the sequence, assume that it is
                setAligned(sequence);                                                   //      an alignment file
        }
        setUnaligned(sequence);                                                         //      also set the unaligned sequence file