]> git.donarmstrong.com Git - mothur.git/commitdiff
started work on sffinfo command. fixed bug across all paralellized commands if the...
authorwestcott <westcott>
Wed, 14 Jul 2010 11:45:40 +0000 (11:45 +0000)
committerwestcott <westcott>
Wed, 14 Jul 2010 11:45:40 +0000 (11:45 +0000)
37 files changed:
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
aligncommand.h
alignmentdb.cpp
bayesian.cpp
bellerophon.cpp
bellerophon.h
chimera.cpp
chimera.h
chimeraccodecommand.cpp
chimeraccodecommand.h
chimeracheckcommand.cpp
chimeracheckcommand.h
chimerapintailcommand.cpp
chimerapintailcommand.h
chimeraslayercommand.cpp
chimeraslayercommand.h
classify.cpp
classifyseqscommand.cpp
classifyseqscommand.h
commandfactory.cpp
endiannessmacros.h [new file with mode: 0644]
filterseqscommand.cpp
filterseqscommand.h
makefile
mothur.cpp
mothur.h
phylosummary.cpp
phylotree.cpp
screenseqscommand.cpp
screenseqscommand.h
seqsummarycommand.cpp
seqsummarycommand.h
sffinfocommand.cpp [new file with mode: 0644]
sffinfocommand.h [new file with mode: 0644]
trimseqscommand.cpp
trimseqscommand.h

index 6770cecbd4a72f81f5f1a3a69910ab3b0b97d266..dae9fe3c9f0ba8bc02939e20c54fccc85e206da0 100644 (file)
@@ -26,6 +26,8 @@
                A72B3A63118B37FD004B9F8D /* phylodiversitycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversitycommand.cpp; sourceTree = "<group>"; };
                A72B3A7B118B4D1B004B9F8D /* phylodiversity.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylodiversity.h; sourceTree = "<group>"; };
                A72B3A7C118B4D1B004B9F8D /* phylodiversity.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylodiversity.cpp; sourceTree = "<group>"; };
+               A732505E11E49EF100484B90 /* sffinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sffinfocommand.h; sourceTree = "<group>"; };
+               A732505F11E49EF100484B90 /* sffinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sffinfocommand.cpp; sourceTree = "<group>"; };
                A73953DA11987ED100B0B160 /* chopseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chopseqscommand.h; sourceTree = "<group>"; };
                A73953DB11987ED100B0B160 /* chopseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chopseqscommand.cpp; sourceTree = "<group>"; };
                A747E79B1163442A00FB9042 /* chimeracheckcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckcommand.h; sourceTree = "<group>"; };
@@ -37,6 +39,7 @@
                A76AAD03117F322B003D8DA1 /* phylosummary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylosummary.cpp; sourceTree = "<group>"; };
                A76C4A1011876BAF0009460B /* setlogfilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = setlogfilecommand.h; sourceTree = SOURCE_ROOT; };
                A76C4A1111876BAF0009460B /* setlogfilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = setlogfilecommand.cpp; sourceTree = SOURCE_ROOT; };
+               A780E6CB11E7745D00BB5718 /* endiannessmacros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = endiannessmacros.h; sourceTree = "<group>"; };
                A78254461164D7790002E2DD /* chimerapintailcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerapintailcommand.h; sourceTree = "<group>"; };
                A78254471164D7790002E2DD /* chimerapintailcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerapintailcommand.cpp; sourceTree = "<group>"; };
                A7825502116519F70002E2DD /* chimerabellerophoncommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerabellerophoncommand.h; sourceTree = "<group>"; };
                                A7DA203C113FECD400BF472F /* display.h */,
                                A7DA2042113FECD400BF472F /* dlibshuff.cpp */,
                                A7DA2043113FECD400BF472F /* dlibshuff.h */,
+                               A780E6CB11E7745D00BB5718 /* endiannessmacros.h */,
                                A7DA2048113FECD400BF472F /* engine.cpp */,
                                A7DA2049113FECD400BF472F /* engine.hpp */,
                                A7DA204C113FECD400BF472F /* fileoutput.cpp */,
                A7CB593E11402F110010EB83 /* commands */ = {
                        isa = PBXGroup;
                        children = (
-                               7EA299BA11E384940022D8D3 /* sensspeccommand.h */,
-                               7EA299BB11E384940022D8D3 /* sensspeccommand.cpp */,
                                A7DA202B113FECD400BF472F /* command.hpp */,
                                A7DA1FEF113FECD400BF472F /* aligncommand.h */,
                                A7DA1FEE113FECD400BF472F /* aligncommand.cpp */,
                                A7DA20F8113FECD400BF472F /* screenseqscommand.cpp */,
                                A7DA20FB113FECD400BF472F /* secondarystructurecommand.h */,
                                A7DA20FA113FECD400BF472F /* secondarystructurecommand.cpp */,
+                               7EA299BA11E384940022D8D3 /* sensspeccommand.h */,
+                               7EA299BB11E384940022D8D3 /* sensspeccommand.cpp */,
                                A7DA20FD113FECD400BF472F /* seqsummarycommand.h */,
                                A7DA20FC113FECD400BF472F /* seqsummarycommand.cpp */,
                                A7DA2103113FECD400BF472F /* setdircommand.h */,
                                A7DA2102113FECD400BF472F /* setdircommand.cpp */,
                                A76C4A1011876BAF0009460B /* setlogfilecommand.h */,
                                A76C4A1111876BAF0009460B /* setlogfilecommand.cpp */,
+                               A732505E11E49EF100484B90 /* sffinfocommand.h */,
+                               A732505F11E49EF100484B90 /* sffinfocommand.cpp */,
                                A7DA2110113FECD400BF472F /* sharedcommand.h */,
                                A7DA210F113FECD400BF472F /* sharedcommand.cpp */,
                                A71D924511AEB42400D00CBC /* splitabundcommand.h */,
index 16006f72e602e37270d3f2f8396245f921bd064f..8d1b40f37cd7eb9edfc708e3b9e856092de5eb5a 100644 (file)
@@ -386,7 +386,7 @@ int AlignCommand::execute(){
                                }
                        }
                        else{
-                               vector<int> positions;
+                               vector<unsigned long int> positions;
                                processIDS.resize(0);
                                
                                ifstream inFASTA;
@@ -396,7 +396,7 @@ int AlignCommand::execute(){
                                while(!inFASTA.eof()){
                                        input = getline(inFASTA);
                                        if (input.length() != 0) {
-                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                               if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
                                        }
                                }
                                inFASTA.close();
@@ -406,7 +406,7 @@ int AlignCommand::execute(){
                                int numSeqsPerProcessor = numFastaSeqs / processors;
                                
                                for (int i = 0; i < processors; i++) {
-                                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
                                        if(i == processors - 1){
                                                numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
                                        }
index b100287269f16781839c884bd83121c620e279ba..186ca63b15cbd09f881d29041de379dc5cf6b6b9 100644 (file)
@@ -26,9 +26,9 @@ public:
        
 private:
        struct linePair {
-               int start;
+               unsigned long int start;
                int numSeqs;
-               linePair(long int i, int j) : start(i), numSeqs(j) {}
+               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
        };
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
index 9b2977dd91d11604b990860f1b85299aff50b147..bf46bec168858853a58730fd80b4309798982fa6 100644 (file)
@@ -26,7 +26,7 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                \r
                #ifdef USE_MPI  \r
                        int pid, processors;\r
-                       vector<long> positions;\r
+                       vector<unsigned long int> positions;\r
                \r
                        MPI_Status status; \r
                        MPI_File inMPI;\r
index 17264776740a0e2f7e1ad2a797118bdb389be817..655d7027b5dabbe3a7ebeae94edbc95724d371c5 100644 (file)
@@ -368,8 +368,8 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                #ifdef USE_MPI
                        
                        int pid, num, num2, processors;
-                       vector<long> positions;
-                       vector<long> positions2;
+                       vector<unsigned long int> positions;
+                       vector<unsigned long int> positions2;
                        
                        MPI_Status status; 
                        MPI_File inMPI;
index aa26d296e614adf04d69a2d73fb341dd798205a0..743c6d779c8042ea2d47690cdc77ec839bf21bab 100644 (file)
@@ -246,7 +246,7 @@ int Bellerophon::getChimeras() {
                numSeqsPerProcessor = iters / processors;
                
                //each process hits this only once
-               int startPos = pid * numSeqsPerProcessor;
+               unsigned long int startPos = pid * numSeqsPerProcessor;
                if(pid == processors - 1){
                                numSeqsPerProcessor = iters - pid * numSeqsPerProcessor;
                }
@@ -326,7 +326,7 @@ int Bellerophon::getChimeras() {
                                int numSeqsPerProcessor = iters / processors;
                                
                                for (int i = 0; i < processors; i++) {
-                                       int startPos = i * numSeqsPerProcessor;
+                                       unsigned long int startPos = i * numSeqsPerProcessor;
                                        if(i == processors - 1){
                                                numSeqsPerProcessor = iters - i * numSeqsPerProcessor;
                                        }
index 1333ec8f16d8d0985a2b289b65294ef1f94a95f3..41c10191a730fc7291644fd4c4fbaddc4b9231e8 100644 (file)
@@ -36,9 +36,9 @@ class Bellerophon : public Chimera {
                
        private:
                struct linePair {
-                       int start;
+                       unsigned long int start;
                        int num;
-                       linePair(long int i, int j) : start(i), num(j) {}
+                       linePair(unsigned long int i, int j) : start(i), num(j) {}
                };
                
                vector<linePair> lines;
index 3ab95892b9df8d1ad9692c2bca8b413412d3e0e5..685d6dbdbb990867dd4e4e49fe9b3aa4e46a270d 100644 (file)
@@ -104,7 +104,7 @@ vector<Sequence*> Chimera::readSeqs(string file) {
                
                #ifdef USE_MPI  
                        int pid, processors;
-                       vector<long> positions;
+                       vector<unsigned long int> positions;
                        int numSeqs;
                        int tag = 2001;
                
index 11bc435938b99d63df86958667b801f34318a63a..7120478f1df89cb9038ddea7f848cad376ca83d7 100644 (file)
--- a/chimera.h
+++ b/chimera.h
@@ -77,9 +77,9 @@ struct sim {
 };
 
 struct linePair {
-                       int start;
-                       int end;
-                       linePair(int i, int j) : start(i), end(j) {}
+                       unsigned long int start;
+                       unsigned long int end;
+                       linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
                        linePair(){}
 };
 
index 3d3c9aea99401c492676e210173d7b397e205a70..578c7d69880f0e163685e6e4fe5782477e3a158e 100644 (file)
@@ -332,7 +332,7 @@ int ChimeraCcodeCommand::execute(){
                                        }
                                        
                                }else{
-                                       vector<int> positions;
+                                       vector<unsigned long int> positions;
                                        processIDS.resize(0);
                                        
                                        ifstream inFASTA;
@@ -342,7 +342,7 @@ int ChimeraCcodeCommand::execute(){
                                        while(!inFASTA.eof()){
                                                input = getline(inFASTA);
                                                if (input.length() != 0) {
-                                                       if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
                                                }
                                        }
                                        inFASTA.close();
@@ -352,7 +352,7 @@ int ChimeraCcodeCommand::execute(){
                                        int numSeqsPerProcessor = numSeqs / processors;
                                        
                                        for (int i = 0; i < processors; i++) {
-                                               long int startPos = positions[ i * numSeqsPerProcessor ];
+                                               unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
                                                if(i == processors - 1){
                                                        numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
                                                }
index c3d71c920ee2d84e8c6a2738e21cbcb840d4a8c5..c740a2f383cb5828e34c8bd2123e935f829de4a7 100644 (file)
@@ -28,9 +28,9 @@ public:
 private:
 
        struct linePair {
-               int start;
+               unsigned long int start;
                int numSeqs;
-               linePair(long int i, int j) : start(i), numSeqs(j) {}
+               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
        };
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
index 25f9c0fe4fa68425b9fcf6f407357fbc79027cfb..098896ef6d0ce514c394d677090ba2e38888de84 100644 (file)
@@ -338,7 +338,7 @@ int ChimeraCheckCommand::execute(){
                                        }
                                                                        
                                }else{
-                                       vector<int> positions;
+                                       vector<unsigned long int> positions;
                                        processIDS.resize(0);
                                        
                                        ifstream inFASTA;
@@ -348,7 +348,7 @@ int ChimeraCheckCommand::execute(){
                                        while(!inFASTA.eof()){
                                                input = getline(inFASTA);
                                                if (input.length() != 0) {
-                                                       if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
                                                }
                                        }
                                        inFASTA.close();
@@ -358,7 +358,7 @@ int ChimeraCheckCommand::execute(){
                                        int numSeqsPerProcessor = numSeqs / processors;
                                        
                                        for (int j = 0; j < processors; j++) {
-                                               long int startPos = positions[ j * numSeqsPerProcessor ];
+                                               unsigned long int startPos = positions[ j * numSeqsPerProcessor ];
                                                if(j == processors - 1){
                                                        numSeqsPerProcessor = numSeqs - j * numSeqsPerProcessor;
                                                }
index b3b49d30bad83f82d5c48b3cf1acf82f80ac766e..7c4934470f448e8fee497fbbdafe5f5cda57115e 100644 (file)
@@ -28,9 +28,9 @@ public:
 private:
 
        struct linePair {
-               int start;
+               unsigned long int start;
                int numSeqs;
-               linePair(long int i, int j) : start(i), numSeqs(j) {}
+               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
        };
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
index 6cf581b9308f21159186ece928bcbdf8e4142803..8dffd3a222565f39f6b029e96ce65afd6c47a4e4 100644 (file)
@@ -359,7 +359,7 @@ int ChimeraPintailCommand::execute(){
                                        }
                                        
                                }else{
-                                       vector<int> positions;
+                                       vector<unsigned long int> positions;
                                        processIDS.resize(0);
                                        
                                        ifstream inFASTA;
@@ -369,7 +369,7 @@ int ChimeraPintailCommand::execute(){
                                        while(!inFASTA.eof()){
                                                input = getline(inFASTA);
                                                if (input.length() != 0) {
-                                                       if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
                                                }
                                        }
                                        inFASTA.close();
@@ -379,7 +379,7 @@ int ChimeraPintailCommand::execute(){
                                        int numSeqsPerProcessor = numSeqs / processors;
                                        
                                        for (int i = 0; i < processors; i++) {
-                                               long int startPos = positions[ i * numSeqsPerProcessor ];
+                                               unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
                                                if(i == processors - 1){
                                                        numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
                                                }
index 65e67fe0f92fb86df0abd757c4b06167dc024992..9fc6f437c6724f3424e412b3af5ad0d59c06066e 100644 (file)
@@ -29,9 +29,9 @@ public:
 private:
 
        struct linePair {
-               int start;
+               unsigned long int start;
                int numSeqs;
-               linePair(long int i, int j) : start(i), numSeqs(j) {}
+               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
        };
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
index ae98ff3075e691e0db5c5fcedd288f10da14cb9f..74fd3aa7cc2be4707146df3452415eb192a7de79 100644 (file)
@@ -357,7 +357,7 @@ int ChimeraSlayerCommand::execute(){
                                        }
                                        
                                }else{
-                                       vector<int> positions;
+                                       vector<unsigned long int> positions;
                                        processIDS.resize(0);
                                        
                                        ifstream inFASTA;
@@ -367,7 +367,7 @@ int ChimeraSlayerCommand::execute(){
                                        while(!inFASTA.eof()){
                                                input = getline(inFASTA);
                                                if (input.length() != 0) {
-                                                       if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
                                                }
                                        }
                                        inFASTA.close();
@@ -377,7 +377,7 @@ int ChimeraSlayerCommand::execute(){
                                        int numSeqsPerProcessor = numSeqs / processors;
                                        
                                        for (int i = 0; i < processors; i++) {
-                                               long int startPos = positions[ i * numSeqsPerProcessor ];
+                                               unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
                                                if(i == processors - 1){
                                                        numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
                                                }
index 7c20fb912bd321a121a410da45ed9abc87fce474..c2d9c45cfa931e9510981a4ff0c82c1d1e0fa890 100644 (file)
@@ -28,9 +28,9 @@ public:
 private:
 
        struct linePair {
-               int start;
+               unsigned long int start;
                int numSeqs;
-               linePair(long int i, int j) : start(i), numSeqs(j) {}
+               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
        };
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
index 2eb3ec1f800654ddd7e28cd8c72b2c5e996774d3..9a2de1fa46d78e99f8909abd729fb6f24e00bdf0 100644 (file)
@@ -28,7 +28,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                m->mothurOut("Generating search database...    "); cout.flush();
 #ifdef USE_MPI 
                        int pid, processors;
-                       vector<long> positions;
+                       vector<unsigned long int> positions;
                        int tag = 2001;
                
                        MPI_Status status; 
@@ -183,7 +183,7 @@ int Classify::readTaxonomy(string file) {
 
 #ifdef USE_MPI 
                int pid, num, processors;
-               vector<long> positions;
+               vector<unsigned long int> positions;
                int tag = 2001;
                
                MPI_Status status; 
index 6c5d827d6553eac7f8ca8696091dd3ab7c87e4f3..2a210b01ad3a8fb0812c1877595028a851bdbee7 100644 (file)
@@ -478,7 +478,7 @@ int ClassifySeqsCommand::execute(){
                                driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
                        }
                        else{
-                               vector<int> positions;
+                               vector<unsigned long int> positions;
                                processIDS.resize(0);
                                
                                ifstream inFASTA;
@@ -488,7 +488,7 @@ int ClassifySeqsCommand::execute(){
                                while(!inFASTA.eof()){
                                        input = getline(inFASTA);
                                        if (input.length() != 0) {
-                                               if(input[0] == '>'){    int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);       }
+                                               if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
                                        }
                                }
                                inFASTA.close();
@@ -496,9 +496,9 @@ int ClassifySeqsCommand::execute(){
                                numFastaSeqs = positions.size();
                                
                                int numSeqsPerProcessor = numFastaSeqs / processors;
-                               
+       
                                for (int i = 0; i < processors; i++) {
-                                       int startPos = positions[ i * numSeqsPerProcessor ];
+                                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
                                        if(i == processors - 1){
                                                numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
                                        }
@@ -761,9 +761,9 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa
 
                for(int i=0;i<line->numSeqs;i++){
                        if (m->control_pressed) { return 0; }
-                       
-                       Sequence* candidateSeq = new Sequence(inFASTA);
-                       
+               
+                       Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA);
+               
                        if (candidateSeq->getName() != "") {
                                taxonomy = classify->getTaxonomy(candidateSeq);
                                
index 1dc4c1bf0353609f3a18bc547ba947587d965fa7..3294118343bbf763fa93dc56b6a05a3f8043c5ca 100644 (file)
@@ -35,9 +35,9 @@ public:
        
 private:
        struct linePair {
-               int start;
+               unsigned long int start;
                int numSeqs;
-               linePair(int i, int j) : start(i), numSeqs(j) {}
+               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
        };
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
index 725ce8cb7d2c4c9685e0f95ae6f67bc5cbb5fc83..fc20ee5abbcda080d7965c133199e9a86e4a74cb 100644 (file)
@@ -83,6 +83,7 @@
 #include "degapseqscommand.h"
 #include "getrelabundcommand.h"
 #include "sensspeccommand.h"
+#include "sffinfocommand.h"
 
 /*******************************************************/
 
@@ -171,6 +172,7 @@ CommandFactory::CommandFactory(){
        commands["classify.otu"]                = "classify.otu";
        commands["degap.seqs"]                  = "degap.seqs";
        commands["get.relabund"]                = "get.relabund";
+       commands["sffinfo"]                             = "sffinfo";
        commands["classify.seqs"]               = "MPIEnabled"; 
        commands["dist.seqs"]                   = "MPIEnabled";
        commands["filter.seqs"]                 = "MPIEnabled";
@@ -300,6 +302,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "degap.seqs")                    {       command = new DegapSeqsCommand(optionString);                           }
                else if(commandName == "get.relabund")                  {       command = new GetRelAbundCommand(optionString);                         }
                else if(commandName == "sens.spec")                             {       command = new SensSpecCommand(optionString);                            }
+               else if(commandName == "sffinfo")                               {       command = new SffInfoCommand(optionString);                                     }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
diff --git a/endiannessmacros.h b/endiannessmacros.h
new file mode 100644 (file)
index 0000000..07981ba
--- /dev/null
@@ -0,0 +1,189 @@
+#ifndef EDIANNESSMACROS_H
+#define EDIANNESSMACROS_H
+
+/*
+ *   endiannessmacros.h
+ *  Mothur
+ *
+ *  Created by westcott on 7/9/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+/*********************************************************************/
+// The following is copied from the staden io_lib-1.12.4 os.h
+/*
+ * Author: 
+ *         MRC Laboratory of Molecular Biology
+ *        Hills Road
+ *        Cambridge CB2 2QH
+ *        United Kingdom
+ *
+ * Description: operating system specific type definitions
+ *
+ */
+
+#include <limits.h>
+#include <inttypes.h>
+
+/*-----------------------------------------------------------------------------
+ * Detection of endianness. The main part of this is done in autoconf, but
+ * for the case of MacOS FAT binaries we fall back on auto-sensing based on
+ * processor type too.
+ */
+
+/* Set by autoconf */
+#define SP_LITTLE_ENDIAN
+
+/* Mac FAT binaries or unknown. Auto detect based on CPU type */
+#if !defined(SP_BIG_ENDIAN) && !defined(SP_LITTLE_ENDIAN)
+
+/*
+ * x86 equivalents
+ */
+#if defined(__i386__) || defined(__i386) || defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(__i686__) || defined(__i686)
+#  if defined(SP_BIG_ENDIAN)
+#    undef SP_BIG_ENDIAN
+#  endif
+#  define SP_LITTLE_ENDIAN
+#endif
+
+/*
+ * DEC Alpha
+ */
+#if defined(__alpha__) || defined(__alpha)
+#  if defined(SP_LITTLE_ENDIAN)
+#    undef SP_LITTLE_ENDIAN
+#  endif
+#  define SP_BIG_ENDIAN
+#endif
+
+/*
+ * SUN Sparc
+ */
+#if defined(__sparc__) || defined(__sparc)
+#  if defined(SP_LITTLE_ENDIAN)
+#    undef SP_LITTLE_ENDIAN
+#  endif
+#  define SP_BIG_ENDIAN
+#endif
+
+/*
+ * PowerPC
+ */
+#if defined(__ppc__) || defined(__ppc)
+#  if defined(SP_LITTLE_ENDIAN)
+#    undef SP_LITTLE_ENDIAN
+#  endif
+#  define SP_BIG_ENDIAN
+#endif
+
+/* Some catch-alls */
+#if defined(__LITTLE_ENDIAN__) || defined(__LITTLEENDIAN__)
+#    define SP_LITTLE_ENDIAN
+#endif
+
+#if defined(__BIG_ENDIAN__) || defined(__BIGENDIAN__)
+#    define SP_BIG_ENDIAN
+#endif
+
+#if defined(SP_BIG_ENDIAN) && defined(SP_LITTLE_ENDIAN)
+#    error Both BIG and LITTLE endian defined. Fix os.h and/or Makefile
+#endif
+
+#if !defined(SP_BIG_ENDIAN) && !defined(SP_LITTLE_ENDIAN)
+#    error Neither BIG nor LITTLE endian defined. Fix os.h and/or Makefile
+#endif
+
+#endif
+
+/*-----------------------------------------------------------------------------
+ * Byte swapping macros
+ */
+
+/*
+ * Our new swap runs at the same speed on Ultrix, but substantially faster
+ * (300% for swap_int4, ~50% for swap_int2) on an Alpha (due to the lack of
+ * decent 'char' support).
+ *
+ * They also have the ability to swap in situ (src == dst). Newer code now
+ * relies on this so don't change back!
+ */
+#define iswap_int8(x) \
+    (((x & 0x00000000000000ffLL) << 56) + \
+     ((x & 0x000000000000ff00LL) << 40) + \
+     ((x & 0x0000000000ff0000LL) << 24) + \
+     ((x & 0x00000000ff000000LL) <<  8) + \
+     ((x & 0x000000ff00000000LL) >>  8) + \
+     ((x & 0x0000ff0000000000LL) >> 24) + \
+     ((x & 0x00ff000000000000LL) >> 40) + \
+     ((x & 0xff00000000000000LL) >> 56))
+
+#define iswap_int4(x) \
+    (((x & 0x000000ff) << 24) + \
+     ((x & 0x0000ff00) <<  8) + \
+     ((x & 0x00ff0000) >>  8) + \
+     ((x & 0xff000000) >> 24))
+
+#define iswap_int2(x) \
+    (((x & 0x00ff) << 8) + \
+     ((x & 0xff00) >> 8))
+
+#define swap_int8(src, dst) ((dst) = iswap_int8(src))
+#define swap_int4(src, dst) ((dst) = iswap_int4(src))
+#define swap_int2(src, dst) ((dst) = iswap_int2(src))
+
+
+/*
+ * Linux systems may use byteswap.h to get assembly versions of byte-swap
+ * on intel systems. This can be as trivial as the bswap opcode, which works
+ * out at over 2-times faster than iswap_int4 above.
+ */
+#if 0
+#if defined(__linux__)
+#    include <byteswap.h>
+#    undef iswap_int8
+#    undef iswap_int4
+#    undef iswap_int2
+#    define iswap_int8 bswap_64
+#    define iswap_int4 bswap_32
+#    define iswap_int2 bswap_16
+#endif
+#endif
+
+
+/*
+ * Macros to specify that data read in is of a particular endianness.
+ * The macros here swap to the appropriate order for the particular machine
+ * running the macro and return the new answer. These may also be used when
+ * writing to a file to specify that we wish to write in (eg) big endian
+ * format.
+ *
+ * This leads to efficient code as most of the time these macros are
+ * trivial.
+ */
+#ifdef SP_BIG_ENDIAN
+#define be_int8(x) (x)
+#define be_int4(x) (x)
+#define be_int2(x) (x)
+#define be_int1(x) (x)
+
+#define le_int8(x) iswap_int8((x))
+#define le_int4(x) iswap_int4((x))
+#define le_int2(x) iswap_int2((x))
+#define le_int1(x) (x)
+#endif
+
+#ifdef SP_LITTLE_ENDIAN
+#define be_int8(x) iswap_int8((x))
+#define be_int4(x) iswap_int4((x))
+#define be_int2(x) iswap_int2((x))
+#define be_int1(x) (x)
+
+#define le_int8(x) (x)
+#define le_int4(x) (x)
+#define le_int2(x) (x)
+#define le_int1(x) (x)
+#endif
+
+#endif
\ No newline at end of file
index 33790fe97632525cbb6c155ab953da531f770078..fada19aeba83054f7d41b42d2827147e86215414 100644 (file)
@@ -872,7 +872,7 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename)
 int FilterSeqsCommand::setLines(string filename) {
        try {
                
-               vector<long int> positions;
+               vector<unsigned long int> positions;
                bufferSizes.clear();
                
                ifstream inFASTA;
@@ -883,7 +883,7 @@ int FilterSeqsCommand::setLines(string filename) {
                        input = getline(inFASTA);
 
                        if (input.length() != 0) {
-                               if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);     }
+                               if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);    }
                        }
                }
                inFASTA.close();
@@ -891,7 +891,7 @@ int FilterSeqsCommand::setLines(string filename) {
                int numFastaSeqs = positions.size();
        
                FILE * pFile;
-               long size;
+               unsigned long int size;
                
                //get num bytes in file
                pFile = fopen (filename.c_str(),"rb");
@@ -908,12 +908,12 @@ int FilterSeqsCommand::setLines(string filename) {
                
                for (int i = 0; i < processors; i++) {
 
-                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
                        if(i == processors - 1){
                                numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
                                bufferSizes.push_back(size - startPos);
                        }else{  
-                               long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
+                               unsigned long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
                                bufferSizes.push_back(myEnd-startPos);
                        }
                        lines.push_back(new linePair(startPos, numSeqsPerProcessor));
index 848d08a30e29c665f028a31c9f9dd97e0a113c5d..2ec152bb557eb27970c3660136b1b879f658949d 100644 (file)
@@ -24,9 +24,9 @@ public:
        \r
 private:\r
        struct linePair {\r
-               int start;\r
+               unsigned long int start;\r
                int num;\r
-               linePair(long int i, long int j) : start(i), num(j) {}\r
+               linePair(unsigned long int i, long int j) : start(i), num(j) {}\r
        };\r
        vector<linePair*> lines;\r
        vector<int> processIDS;\r
index 33cce4d996c4e9a586be09fa9096366658d1cb0f..efac3fb5b84fc3bde1178520a8ca15009b1ef3dd 100644 (file)
--- a/makefile
+++ b/makefile
@@ -23,6 +23,7 @@ endif
 \r
 ifeq  ($(strip $(64BIT_VERSION)),yes)\r
     TARGET_ARCH += -arch x86_64\r
+        CXXFLAGS += -DBIT_VERSION\r
 endif\r
 \r
 # if you do not want to use the readline library, set this to no.\r
index d5f838745a3f65b13e1d9dcb40c0829f3bb1fc7c..1e5b41bd7ca20879113e604dac03e9ffd7e6ab36 100644 (file)
@@ -72,6 +72,14 @@ int main(int argc, char *argv[]){
                        m->mothurOutEndLine(); m->mothurOutEndLine();\r
                #endif\r
                \r
+               #ifdef BIT_VERSION\r
+                       m->mothurOutJustToLog("Running 64Bit Version");\r
+                       m->mothurOutEndLine(); m->mothurOutEndLine();\r
+               #else\r
+                       m->mothurOutJustToLog("Running 32Bit Version");\r
+                       m->mothurOutEndLine(); m->mothurOutEndLine();\r
+               #endif\r
+               \r
                //header\r
                m->mothurOut("mothur v.1.11.0");\r
                m->mothurOutEndLine();          \r
index 0a73f1cf65ae4c0f24bb04d556ef4aa124f29326..1434482e9a9785485e0f898f88e352e6f7757659 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -992,9 +992,9 @@ inline string sortFile(string distFile, string outputDir){
        }       
 }
 /**************************************************************************************************/
-inline vector<long> setFilePosFasta(string filename, int& num) {
+inline vector<unsigned long int> setFilePosFasta(string filename, int& num) {
 
-                       vector<long> positions;
+                       vector<unsigned long int> positions;
                        ifstream inFASTA;
                        openInputFile(filename, inFASTA);
                                
@@ -1002,7 +1002,7 @@ inline vector<long> setFilePosFasta(string filename, int& num) {
                        while(!inFASTA.eof()){
                                input = getline(inFASTA); gobble(inFASTA);
                                if (input.length() != 0) {
-                                       if(input[0] == '>'){    long pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);      }
+                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
                                }
                        }
                        inFASTA.close();
@@ -1021,7 +1021,7 @@ inline vector<long> setFilePosFasta(string filename, int& num) {
                                fclose (pFile);
                        }*/
                        
-                       long size = positions[(positions.size()-1)];
+                       unsigned long int size = positions[(positions.size()-1)];
                        ifstream in;
                        openInputFile(filename, in);
                        
@@ -1038,18 +1038,18 @@ inline vector<long> setFilePosFasta(string filename, int& num) {
                        return positions;
 }
 /**************************************************************************************************/
-inline vector<long> setFilePosEachLine(string filename, int& num) {
+inline vector<unsigned long int> setFilePosEachLine(string filename, int& num) {
 
-                       vector<long> positions;
+                       vector<unsigned long int> positions;
                        ifstream in;
                        openInputFile(filename, in);
                                
                        string input;
                        while(!in.eof()){
-                               long lastpos = in.tellg();
+                               unsigned long int lastpos = in.tellg();
                                input = getline(in); gobble(in);
                                if (input.length() != 0) {
-                                       long pos = in.tellg(); 
+                                       unsigned long int pos = in.tellg(); 
                                        if (pos != -1) { positions.push_back(pos - input.length() - 1); }
                                        else {  positions.push_back(lastpos);  }
                                }
@@ -1059,7 +1059,7 @@ inline vector<long> setFilePosEachLine(string filename, int& num) {
                        num = positions.size();
                
                        FILE * pFile;
-                       long size;
+                       unsigned long int size;
                
                        //get num bytes in file
                        pFile = fopen (filename.c_str(),"rb");
index 84b0861e38cc450b50b481e7efdaaa76e0831e4e..004c131d6779de0ab5d47df75308ad8b835fde2f 100644 (file)
@@ -172,6 +172,9 @@ void PhyloSummary::print(ofstream& out){
                //print labels
                out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
                if (groupmap != NULL) {
+                       //so the labels match the counts below, since the map sorts them automatically...
+                       sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end());
+                       
                        for (int i = 0; i < groupmap->namesOfGroups.size(); i++) {
                                out << groupmap->namesOfGroups[i] << '\t';
                        }
index d6c740ae791b494ea64951a03405b38535593152..d085f6c52ce3c733c7666d1c7df28e95a0575647 100644 (file)
@@ -125,7 +125,7 @@ PhyloTree::PhyloTree(string tfile){
                
                #ifdef USE_MPI
                        int pid, num, processors;
-                       vector<long> positions;
+                       vector<unsigned long int> positions;
                        
                        MPI_Status status; 
                        MPI_File inMPI;
index 66e0cb07e066c27724480f1f1c6c523ff755e8d6..09b82cebe3138f44533aa477b0d24fa9165c0e08 100644 (file)
@@ -305,7 +305,7 @@ int ScreenSeqsCommand::execute(){
                                if (m->control_pressed) { remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
                                
                        }else{
-                               vector<int> positions;
+                               vector<unsigned long int> positions;
                                processIDS.resize(0);
                                
                                ifstream inFASTA;
@@ -315,21 +315,22 @@ int ScreenSeqsCommand::execute(){
                                while(!inFASTA.eof()){
                                        input = getline(inFASTA);
                                        if (input.length() != 0) {
-                                               if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  }
+                                               if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
                                        }
                                }
                                inFASTA.close();
                                
                                numFastaSeqs = positions.size();
-                               
+                       
                                int numSeqsPerProcessor = numFastaSeqs / processors;
-                               
+                                       
                                for (int i = 0; i < processors; i++) {
-                                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
                                        if(i == processors - 1){
                                                numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
                                        }
                                        lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+               
                                }
                                
                                createProcesses(goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames); 
@@ -708,7 +709,7 @@ int ScreenSeqsCommand::driver(linePair* line, string goodFName, string badFName,
                inFASTA.seekg(line->start);
        
                for(int i=0;i<line->numSeqs;i++){
-                       
+               
                        if (m->control_pressed) {  return 0; }
                        
                        Sequence currSeq(inFASTA);
index f1b72051250d07a389ed332b0690cbcda3b807c0..314e6617f6b2ec76033636ec8a05f722b3274e49 100644 (file)
@@ -23,9 +23,9 @@ public:
 private:
 
        struct linePair {
-               int start;
+               unsigned long int start;
                int numSeqs;
-               linePair(long int i, int j) : start(i), numSeqs(j) {}
+               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
        };
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
index 7226f9ddac16e61ebc2239ff4d868474b990ec25..66405394859d182134884e4141b53d377c7ae245 100644 (file)
@@ -453,7 +453,7 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition,
 int SeqSummaryCommand::setLines(string filename) {
        try {
                
-               vector<long int> positions;
+               vector<unsigned long int> positions;
                
                ifstream inFASTA;
                openInputFile(filename, inFASTA);
@@ -463,7 +463,7 @@ int SeqSummaryCommand::setLines(string filename) {
                        input = getline(inFASTA);
 
                        if (input.length() != 0) {
-                               if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);     }
+                               if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);    }
                        }
                }
                inFASTA.close();
@@ -471,7 +471,7 @@ int SeqSummaryCommand::setLines(string filename) {
                int numFastaSeqs = positions.size();
        
                FILE * pFile;
-               long size;
+               unsigned long int size;
                
                //get num bytes in file
                pFile = fopen (filename.c_str(),"rb");
@@ -486,11 +486,11 @@ int SeqSummaryCommand::setLines(string filename) {
                
                for (int i = 0; i < processors; i++) {
 
-                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
                        if(i == processors - 1){
                                numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
                        }else{  
-                               long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
+                               unsigned long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
                        }
                        lines.push_back(new linePair(startPos, numSeqsPerProcessor));
                }
index 58fbc0a8b4e9dddec4745f65882ac9b6b6dd8002..3e576a0b8aa276c6134cefb83fea9831d0520661 100644 (file)
@@ -26,9 +26,9 @@ private:
        int processors;
        
        struct linePair {
-               int start;
+               unsigned long int start;
                int num;
-               linePair(long int i, long int j) : start(i), num(j) {}
+               linePair(unsigned long int i, long int j) : start(i), num(j) {}
        };
        vector<linePair*> lines;
        vector<int> processIDS;
diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp
new file mode 100644 (file)
index 0000000..2d6af87
--- /dev/null
@@ -0,0 +1,469 @@
+/*
+ *  sffinfocommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 7/7/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "sffinfocommand.h"
+#include "endiannessmacros.h"
+
+//**********************************************************************************************************************
+
+SffInfoCommand::SffInfoCommand(string option)  {
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"sff","outputdir","inputdir", "outputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);        if (inputDir == "not found"){ inputDir = "";          }
+
+                       sffFilename = validParameter.validFile(parameters, "sff", false);
+                       if (sffFilename == "not found") { m->mothurOut("sff is a required parameter for the sffinfo command."); m->mothurOutEndLine(); abort = true;  }
+                       else { 
+                               splitAtDash(sffFilename, filenames);
+                               
+                               //go through files and make sure they are good, if not, then disregard them
+                               for (int i = 0; i < filenames.size(); i++) {
+                                       if (inputDir != "") {
+                                               string path = hasPath(filenames[i]);
+                                               //if the user has not given a path then, add inputdir. else leave path alone.
+                                               if (path == "") {       filenames[i] = inputDir + filenames[i];         }
+                                       }
+       
+                                       ifstream in;
+                                       int ableToOpen = openInputFile(filenames[i], in);
+                                       in.close();
+                                       
+                                       if (ableToOpen == 1) { 
+                                               m->mothurOut(filenames[i] + " will be disregarded."); m->mothurOutEndLine(); 
+                                               //erase from file list
+                                               filenames.erase(filenames.begin()+i);
+                                               i--;
+                                       }
+                               }
+                               
+                               //make sure there is at least one valid file left
+                               if (filenames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
+                       }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "SffInfoCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void SffInfoCommand::help(){
+       try {
+               m->mothurOut("The sffinfo command reads a sff file and outputs a .sff.txt file.\n");
+               
+               m->mothurOut("Example sffinfo(sff=...).\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. sff), '=' and parameters (i.e.yourSffFileName).\n\n");
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "help");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+SffInfoCommand::~SffInfoCommand(){}
+
+//**********************************************************************************************************************
+int SffInfoCommand::execute(){
+       try {
+               
+               if (abort == true) { return 0; }
+               
+               for (int s = 0; s < filenames.size(); s++) {
+                       
+                       if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
+                       
+                       m->mothurOut("Extracting info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
+                       
+                       if (outputDir == "") {  outputDir += hasPath(filenames[s]); }
+                       string outputFileName = outputDir + getRootName(getSimpleName(filenames[s])) + "sff.txt";
+                                               
+                       extractSffInfo(filenames[s], outputFileName);
+                       
+                       outputNames.push_back(outputFileName);
+
+                       m->mothurOut("Done."); m->mothurOutEndLine();
+               }
+               
+               if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str());         } return 0; }
+               
+               //report output filenames
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SffInfoCommand::extractSffInfo(string input, string output){
+       try {
+               ofstream out;
+               openOutputFile(output, out);
+               
+               ifstream in;
+               in.open(input.c_str(), ios::binary);
+               
+               CommonHeader* header = readCommonHeader(in);
+               
+               //cout << "magic = " << header->magicNumber << endl << "version = " << header->version << endl << "index offset = " << header->indexOffset << endl << "index length = "<< header->indexLength << endl << "numreads = " << header->numReads << endl << "header length = " << header->headerLength << endl << "key length = " << header->keyLength << endl;
+//cout << "numflowreads = "<< header->numFlowsPerRead << endl << "flow format code = "<< header->flogramFormatCode << endl << "flow chars = " << header->flowChars << endl << "key sequence = " << header->keySequence << endl << endl;                
+               cout << in.tellg() << endl;
+               //read through the sff file
+               while (!in.eof()) {
+               
+                       //read header
+                       Header* readheader = readHeader(in);
+                       
+                       //read data
+                       seqRead* read = readSeqData(in, header->numFlowsPerRead, readheader->numBases);
+                       
+                       cout << in.tellg() << endl;
+                       
+                       //print common header
+                       printCommonHeader(out, header, true);
+                       
+                       //print header
+                       printHeader(out, readheader, true);
+                       
+                       //print data
+                       printSeqData(out, read, true);
+       
+               }
+               
+               
+               in.close();
+               out.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "extractSffInfo");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+CommonHeader* SffInfoCommand::readCommonHeader(ifstream& in){
+       try {
+               CommonHeader* header = new CommonHeader();
+       
+               if (!in.eof()) {
+                       string tempBuf = "";
+                       
+                       //read magic number
+                       char* buffer = new char(sizeof(header->magicNumber));
+                       in.read(buffer, sizeof(header->magicNumber));
+                       header->magicNumber = be_int4(*(uint32_t *)(buffer));
+                       delete[] buffer;
+                       
+                       //read version
+                       header->version = new char(4);
+                       in.read(header->version, 4);
+                       tempBuf = buffer;
+                       if (tempBuf.length() > 4) { tempBuf = tempBuf.substr(0, 4); strcpy(header->version, tempBuf.c_str());  }
+                       
+                       //read offset
+                       buffer = new char(sizeof(header->indexOffset));
+                       in.read(buffer, sizeof(header->indexOffset));
+                       header->indexOffset =  be_int8(*(uint64_t *)(buffer));
+                       delete[] buffer;
+                       
+                       //read index length
+                       buffer = new char(sizeof(header->indexLength));
+                       in.read(buffer, sizeof(header->indexLength));
+                       header->indexLength =  be_int4(*(uint32_t *)(buffer));
+                       delete[] buffer;
+                       
+                       //read num reads
+                       buffer = new char(sizeof(header->numReads));
+                       in.read(buffer, sizeof(header->numReads));
+                       header->numReads =  be_int4(*(uint32_t *)(buffer));
+                       delete[] buffer;        
+                       
+                       //read header length
+                       buffer = new char(sizeof(header->headerLength));
+                       in.read(buffer, sizeof(header->headerLength));
+                       header->headerLength =  be_int2(*(uint16_t *)(buffer));
+                       delete[] buffer;
+                       
+                       //read key length
+                       buffer = new char(sizeof(header->keyLength));
+                       in.read(buffer, sizeof(header->keyLength));
+                       header->keyLength = be_int2(*(uint16_t *)(buffer));
+                       delete[] buffer;
+
+                       //read number of flow reads
+                       buffer = new char(sizeof(header->numFlowsPerRead));
+                       in.read(buffer, sizeof(header->numFlowsPerRead));
+                       header->numFlowsPerRead =  be_int2(*(uint16_t *)(buffer));
+                       delete[] buffer;
+                       
+                       //read format code
+                       buffer = new char(sizeof(header->flogramFormatCode));
+                       in.read(buffer, sizeof(header->flogramFormatCode));
+                       header->flogramFormatCode = be_int1(*(uint8_t *)(buffer));
+                       delete[] buffer;
+                       
+                       //read flow chars
+                       header->flowChars = new char(header->numFlowsPerRead);
+                       in.read(header->flowChars, header->numFlowsPerRead);
+                       tempBuf = buffer;
+                       if (tempBuf.length() > header->numFlowsPerRead) { tempBuf = tempBuf.substr(0, header->numFlowsPerRead); strcpy(header->flowChars, tempBuf.c_str());  }
+                       
+                       //read key
+                       header->keySequence = new char(header->keyLength);
+                       in.read(header->keySequence, header->keyLength);
+                       tempBuf = header->keySequence;
+                       if (tempBuf.length() > header->keyLength) { tempBuf = tempBuf.substr(0, header->keyLength); strcpy(header->keySequence, tempBuf.c_str());  }
+                       
+               }else{
+                       m->mothurOut("Error reading sff common header."); m->mothurOutEndLine();
+               }
+
+               return header;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "readCommonHeader");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+Header* SffInfoCommand::readHeader(ifstream& in){
+       try {
+               Header* header = new Header();
+       
+               if (!in.eof()) {
+                       string tempBuf = "";
+                       
+                       //read header length
+                       char* buffer = new char(sizeof(header->headerLength));
+                       in.read(buffer, sizeof(header->headerLength));
+                       header->headerLength = be_int2(*(unsigned short *)(buffer));
+                       delete[] buffer;
+                                               
+                       //read name length
+                       buffer = new char(sizeof(header->nameLength));
+                       in.read(buffer, sizeof(header->nameLength));
+                       header->nameLength = be_int2(*(unsigned short *)(buffer));
+                       delete[] buffer;
+
+                       //read num bases
+                       buffer = new char(sizeof(header->numBases));
+                       in.read(buffer, sizeof(header->numBases));
+                       header->numBases =  be_int4(*(unsigned int *)(buffer));
+                       delete[] buffer;
+                       
+                       //read clip qual left
+                       buffer = new char(sizeof(header->clipQualLeft));
+                       in.read(buffer, sizeof(header->clipQualLeft));
+                       header->clipQualLeft =  be_int2(*(unsigned short *)(buffer));
+                       delete[] buffer;
+                       
+                       //read clip qual right
+                       buffer = new char(sizeof(header->clipQualRight));
+                       in.read(buffer, sizeof(header->clipQualRight));
+                       header->clipQualRight =  be_int2(*(unsigned short *)(buffer));
+                       delete[] buffer;
+                       
+                       //read clipAdapterLeft
+                       buffer = new char(sizeof(header->clipAdapterLeft));
+                       in.read(buffer, sizeof(header->clipAdapterLeft));
+                       header->clipAdapterLeft = be_int2(*(unsigned short *)(buffer));
+                       delete[] buffer;
+
+                       //read clipAdapterRight
+                       buffer = new char(sizeof(header->clipAdapterRight));
+                       in.read(buffer, sizeof(header->clipAdapterRight));
+                       header->clipAdapterRight =  be_int2(*(unsigned short *)(buffer));
+                       delete[] buffer;
+                       
+                       //read name
+                       header->name = new char(header->nameLength);
+                       in.read(header->name, header->nameLength);
+                       tempBuf = header->name;
+                       if (tempBuf.length() > header->nameLength) { tempBuf = tempBuf.substr(0, header->nameLength); strcpy(header->name, tempBuf.c_str());  }
+       
+               }else{
+                       m->mothurOut("Error reading sff header info."); m->mothurOutEndLine();
+               }
+
+               return header;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "readHeader");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+seqRead* SffInfoCommand::readSeqData(ifstream& in, int numFlowReads, int numBases){
+       try {
+               seqRead* read = new seqRead();
+       
+               if (!in.eof()) {
+                                               
+                       string tempBuf = "";
+                       char* buffer;
+                       
+                       //read flowgram
+                       read->flowgram.resize(numFlowReads);
+                       for (int i = 0; i < numFlowReads; i++) {  
+                               buffer = new char((sizeof(unsigned short)));
+                               in.read(buffer, (sizeof(unsigned short)));
+                               read->flowgram[i] = be_int2(*(unsigned short *)(buffer));
+                               delete[] buffer;
+                       }
+                       
+                       //read flowgram
+                       read->flowIndex.resize(numBases);
+                       for (int i = 0; i < numBases; i++) {  
+                               buffer = new char(1);
+                               in.read(buffer, 1);
+                               read->flowgram[i] = be_int1(*(unsigned int *)(buffer));
+                               delete[] buffer;
+                       }
+                       
+                       //read bases
+                       read->bases = new char(numBases);
+                       in.read(read->bases, numBases);
+                       tempBuf = buffer;
+                       if (tempBuf.length() > numBases) { tempBuf = tempBuf.substr(0, numBases); strcpy(read->bases, tempBuf.c_str());  }
+                       
+
+                       //read flowgram
+                       read->qualScores.resize(numBases);
+                       for (int i = 0; i < numBases; i++) {  
+                               buffer = new char(1);
+                               in.read(buffer, 1);
+                               read->qualScores[i] = be_int1(*(unsigned int *)(buffer));
+                               delete[] buffer;
+                       }
+                       
+                       /* Pad to 8 chars */
+                       int spotInFile = in.tellg();
+       cout << spotInFile << endl;
+                       int spot = floor((spotInFile + 7) /(float) 8) * 8;
+       cout << spot << endl;
+                       in.seekg(spot);
+                       
+               }else{
+                       m->mothurOut("Error reading."); m->mothurOutEndLine();
+               }
+
+               return read;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "readSeqData");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SffInfoCommand::printCommonHeader(ofstream& out, CommonHeader* header, bool debug) {
+       try {
+       
+               string output = "Common Header:\nMagic Number: ";
+               output += toString(header->magicNumber) + '\n';
+               output += "Version: " + toString(header->version) + '\n';
+               output += "Index Offset: " + toString(header->indexOffset) + '\n';
+               output += "Index Length: " + toString(header->indexLength) + '\n';      
+               output += "Number of Reads: " + toString(header->numReads) + '\n';
+               output += "Header Length: " + toString(header->headerLength) + '\n';
+               output += "Key Length: " + toString(header->keyLength) + '\n';
+               output += "Number of Flows: " + toString(header->numFlowsPerRead) + '\n';
+               output += "Format Code: " + toString(header->flogramFormatCode) + '\n';
+               output += "Flow Chars: " + toString(header->flowChars) + '\n';
+               output += "Key Sequence: " + toString(header->keySequence) + '\n';
+               
+               out << output << endl;
+               
+               if (debug) { cout << output << endl; }
+                       
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "printCommonHeader");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SffInfoCommand::printHeader(ofstream& out, Header* header, bool debug) {
+       try {
+               string name = header->name;
+               string output = ">" + name + "\nRead Header Length: " + toString(header->headerLength) + '\n';
+               output += "Name Length: " + toString(header->nameLength) + '\n';
+               output += "Number of Bases: " + toString(header->numBases) + '\n';
+               output += "Clip Qual Left: " + toString(header->clipQualLeft) + '\n';
+               output += "Clip Qual Right: " + toString(header->clipQualLeft) + '\n';
+               output += "Clip Adap Left: " + toString(header->clipQualLeft) + '\n';
+               output += "Clip Adap Right: " + toString(header->clipQualLeft) + '\n';
+               
+               out << output << endl;
+               
+               if (debug) { cout << output << endl; }
+
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "printHeader");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int SffInfoCommand::printSeqData(ofstream& out, seqRead* read, bool debug) {
+       try {
+
+               string output = "FlowGram: ";
+               for (int i = 0; i < read->flowgram.size(); i++) {  output += toString(read->flowgram[i]) +'\t';  }
+               output += "\nFlow Indexes: ";
+               for (int i = 0; i < read->flowIndex.size(); i++) {  output += toString(read->flowIndex[i]) +'\t';  }
+               string bases = read->bases;
+               output += "\nBases: " + bases + '\n';
+               for (int i = 0; i < read->qualScores.size(); i++) {  output += toString(read->qualScores[i]) +'\t';  }
+               output += '\n';
+               
+               out << output << endl;
+               
+               if (debug) { cout << output << endl; }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SffInfoCommand", "printSeqData");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************/
diff --git a/sffinfocommand.h b/sffinfocommand.h
new file mode 100644 (file)
index 0000000..2385d58
--- /dev/null
@@ -0,0 +1,84 @@
+#ifndef SFFINFOCOMMAND_H
+#define SFFINFOCOMMAND_H
+
+/*
+ *  sffinfocommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 7/7/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+
+#define SFF_MAGIC   0x2e736666 /* ".sff" */
+#define SFF_VERSION "\0\0\0\1"
+
+/**********************************************************/
+struct CommonHeader {
+       uint32_t magicNumber;
+       char* version;
+       uint64_t indexOffset;
+       uint32_t indexLength;
+       uint32_t numReads;
+       uint16_t headerLength;
+       uint16_t keyLength;
+       uint16_t numFlowsPerRead;
+       uint8_t flogramFormatCode;
+       char* flowChars; //length depends on number flow reads
+       char* keySequence; //length depends on key length
+       
+       CommonHeader() { magicNumber=0; indexOffset=0; indexLength=0; numReads=0; headerLength=0; keyLength=0; numFlowsPerRead=0; flogramFormatCode='s'; }
+};
+/**********************************************************/
+struct Header {
+       unsigned short headerLength;
+       unsigned short nameLength;
+       unsigned int numBases;
+       unsigned short clipQualLeft;
+       unsigned short clipQualRight;
+       unsigned short clipAdapterLeft;
+       unsigned short clipAdapterRight;
+       char* name; //length depends on nameLength
+
+       Header() { headerLength=0; nameLength=0; numBases=0; clipQualLeft=0; clipQualRight=0; clipAdapterLeft=0; clipAdapterRight=0; }
+};
+/**********************************************************/
+struct seqRead {
+       vector<unsigned short> flowgram;
+       vector<unsigned int> flowIndex;
+       char* bases;
+       vector<unsigned int> qualScores;
+};
+/**********************************************************/
+
+class SffInfoCommand : public Command {
+       
+public:
+       SffInfoCommand(string);
+       ~SffInfoCommand();
+       int execute();
+       void help();
+       
+private:
+       string sffFilename, outputDir;
+       vector<string> filenames, outputNames;
+       bool abort;
+       
+       int extractSffInfo(string, string);
+       CommonHeader* readCommonHeader(ifstream&);
+       Header* readHeader(ifstream&);
+       seqRead* readSeqData(ifstream&, int, int);
+       
+       int printCommonHeader(ofstream&, CommonHeader*, bool); //bool is debug mode
+       int printHeader(ofstream&, Header*, bool);
+       int printSeqData(ofstream&, seqRead*, bool);
+               
+};
+
+/**********************************************************/
+#endif
+
+
index cbf5e13603ced805d185196fad9129eacb573d22..af3bfc8fcbf5ee10cb1a50b192ff414063bd740e 100644 (file)
@@ -519,7 +519,7 @@ int TrimSeqsCommand::setLines(string filename, vector<linePair*>& lines) {
                
                lines.clear();
                
-               vector<long int> positions;
+               vector<unsigned long int> positions;
                
                ifstream inFASTA;
                openInputFile(filename, inFASTA);
@@ -529,7 +529,7 @@ int TrimSeqsCommand::setLines(string filename, vector<linePair*>& lines) {
                        input = getline(inFASTA);
 
                        if (input.length() != 0) {
-                               if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);     }
+                               if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);    }
                        }
                }
                inFASTA.close();
@@ -537,7 +537,7 @@ int TrimSeqsCommand::setLines(string filename, vector<linePair*>& lines) {
                int numFastaSeqs = positions.size();
        
                FILE * pFile;
-               long size;
+               unsigned long int size;
                
                //get num bytes in file
                pFile = fopen (filename.c_str(),"rb");
@@ -552,11 +552,11 @@ int TrimSeqsCommand::setLines(string filename, vector<linePair*>& lines) {
                
                for (int i = 0; i < processors; i++) {
 
-                       long int startPos = positions[ i * numSeqsPerProcessor ];
+                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
                        if(i == processors - 1){
                                numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
                        }else{  
-                               long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
+                               unsigned long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
                        }
                        lines.push_back(new linePair(startPos, numSeqsPerProcessor));
                }
index fefe53490cc4b0c1275a006c0946f022c24e366a..ab692d3ee0dbd3ce77ce158ab9a0e82aca7719e6 100644 (file)
@@ -24,9 +24,9 @@ public:
 private:
 
        struct linePair {
-               int start;
+               unsigned long int start;
                int num;
-               linePair(long int i, int j) : start(i), num(j) {}
+               linePair(unsigned long int i, int j) : start(i), num(j) {}
        };
 
        void getOligos(vector<string>&);