]> git.donarmstrong.com Git - bamtools.git/commitdiff
Began rolling over bamtools_sam into bamtools_convert. Kinda hacky setup for now...
authorDerek <derekwbarnett@gmail.com>
Mon, 7 Jun 2010 17:58:46 +0000 (13:58 -0400)
committerDerek <derekwbarnett@gmail.com>
Mon, 7 Jun 2010 17:58:46 +0000 (13:58 -0400)
bamtools_convert.cpp

index bf54b03b9e9eba7d6a44c141fb169a37c9c8b064..ace819104906d305bca806bc177faaa3f7a9286a 100644 (file)
@@ -3,24 +3,42 @@
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 2 June 2010
+// Last modified: 7 June 2010
 // ---------------------------------------------------------------------------
 // Converts between BAM and a number of other formats
 // ***************************************************************************
 
 #include <iostream>
+#include <sstream>
 #include <string>
 #include <vector>
 
 #include "bamtools_convert.h"
 #include "bamtools_format.h"
 #include "bamtools_options.h"
+#include "BGZF.h"
 #include "BamReader.h"
 #include "BamMultiReader.h"
 
 using namespace std;
 using namespace BamTools;
   
+static RefVector references;  
+  
+namespace BamTools {
+  
+    static const string FORMAT_FASTA = "fasta";
+    static const string FORMAT_FASTQ = "fastq";
+    static const string FORMAT_JSON  = "json";
+    static const string FORMAT_SAM   = "sam";
+  
+    void PrintFASTA(const BamAlignment& a);
+    void PrintFASTQ(const BamAlignment& a);
+    void PrintJSON(const BamAlignment& a);
+    void PrintSAM(const BamAlignment& a);
+    
+} // namespace BamTools
+  
 // ---------------------------------------------
 // ConvertSettings implementation
 
@@ -74,19 +92,193 @@ int ConvertTool::Help(void) {
 
 int ConvertTool::Run(int argc, char* argv[]) {
   
+    bool convertedOk = true;
+  
     // parse command line arguments
     Options::Parse(argc, argv, 1);
     
     // open files
     BamReader reader;
     reader.Open(m_settings->InputFilename);
+    references = reader.GetReferenceData();
         
-    // do conversion
+    // ----------------------------------------
+    // do conversion,depending on desired output format
+
+    // FASTA
+    if ( m_settings->Format == FORMAT_FASTA ) {
+        cout << "Converting to FASTA" << endl;
+    }
     
+    // FASTQ
+    else if ( m_settings->Format == FORMAT_FASTQ) {
+        cout << "Converting to FASTQ" << endl;
+    }
     
+    // JSON
+    else if ( m_settings->Format == FORMAT_JSON ) {
+        cout << "Converting to JSON" << endl;
+    }
     
+    // SAM
+    else if ( m_settings->Format == FORMAT_SAM ) {
+        BamAlignment alignment;
+        while ( reader.GetNextAlignment(alignment) ) {
+            PrintSAM(alignment);
+        }
+    }
     
+    // uncrecognized format
+    else { 
+        cerr << "Unrecognized format: " << m_settings->Format << endl;
+        cerr << "Please see help|README (?) for details on supported formats " << endl;
+        convertedOk = false;
+    }
+    
+    // ------------------------
     // clean up & exit
     reader.Close();
-    return 0;
-}
\ No newline at end of file
+    return (int)convertedOk;
+}
+
+// ----------------------------------------------------------
+// Conversion/output methods
+// ----------------------------------------------------------
+
+// print BamAlignment in FASTA format
+void BamTools::PrintFASTA(const BamAlignment& a) { 
+
+}
+
+// print BamAlignment in FASTQ format
+void BamTools::PrintFASTQ(const BamAlignment& a) { 
+
+}
+
+// print BamAlignment in JSON format
+void BamTools::PrintJSON(const BamAlignment& a) { 
+
+}
+
+// print BamAlignment in SAM format
+void BamTools::PrintSAM(const BamAlignment& a) {
+  
+    // tab-delimited
+    // <QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> [ <TAG>:<VTYPE>:<VALUE> [...] ]
+  
+    ostringstream sb("");
+    
+    // write name & alignment flag
+    cout << a.Name << "\t" << a.AlignmentFlag << "\t";
+    
+    // write reference name
+    if ( (a.RefID >= 0) && (a.RefID < (int)references.size()) ) cout << references[a.RefID].RefName << "\t";
+    else cout << "*\t";
+    
+    // write position & map quality
+    cout << a.Position+1 << "\t" << a.MapQuality << "\t";
+    
+    // write CIGAR
+    const vector<CigarOp>& cigarData = a.CigarData;
+    if ( cigarData.empty() ) cout << "*\t";
+    else {
+        vector<CigarOp>::const_iterator cigarIter = cigarData.begin();
+        vector<CigarOp>::const_iterator cigarEnd  = cigarData.end();
+        for ( ; cigarIter != cigarEnd; ++cigarIter ) {
+            const CigarOp& op = (*cigarIter);
+            cout << op.Length << op.Type;
+        }
+        cout << "\t";
+    }
+    
+    // write mate reference name, mate position, & insert size
+    if ( a.IsPaired() && (a.MateRefID >= 0) && (a.MateRefID < (int)references.size()) ) {
+        if ( a.MateRefID == a.RefID ) cout << "=\t";
+        else cout << references[a.MateRefID].RefName << "\t";
+        cout << a.MatePosition+1 << "\t" << a.InsertSize << "\t";
+    } 
+    else cout << "*\t0\t0\t";
+    
+    // write sequence
+    if ( a.QueryBases.empty() ) cout << "*\t";
+    else cout << a.QueryBases << "\t";
+    
+    // write qualities
+    if ( a.Qualities.empty() ) cout << "*";
+    else cout << a.Qualities;
+    
+    // write tag data
+    const char* tagData = a.TagData.c_str();
+    const size_t tagDataLength = a.TagData.length();
+    size_t index = 0;
+    while ( index < tagDataLength ) {
+        
+        // write tag name
+        cout << "\t" << a.TagData.substr(index, 2) << ":";
+        index += 2;
+        
+        // get data type
+        char type = a.TagData.at(index);
+        ++index;
+        
+        switch (type) {
+            case('A') : 
+                cout << "A:" << tagData[index]; 
+                ++index; 
+                break;
+            
+            case('C') : 
+                cout << "i:" << atoi(&tagData[index]); 
+                ++index; 
+                break;
+            
+            case('c') : 
+                cout << "i:" << atoi(&tagData[index]);
+                ++index; 
+                break;
+            
+            case('S') : 
+                cout << "i:" << BgzfData::UnpackUnsignedShort(&tagData[index]); 
+                index += 2; 
+                break;
+                
+            case('s') : 
+                cout << "i:" << BgzfData::UnpackSignedShort(&tagData[index]);
+                index += 2; 
+                break;
+            
+            case('I') : 
+                cout << "i:" << BgzfData::UnpackUnsignedInt(&tagData[index]);
+                index += 4; 
+                break;
+            
+            case('i') : 
+                cout << "i:" << BgzfData::UnpackSignedInt(&tagData[index]);
+                index += 4; 
+                break;
+            
+            case('f') : 
+                cout << "f:" << BgzfData::UnpackFloat(&tagData[index]);
+                index += 4; 
+                break;
+            
+            case('d') : 
+                cout << "d:" << BgzfData::UnpackDouble(&tagData[index]);
+                index += 8; 
+                break;
+            
+            case('Z') :
+            case('H') : 
+                cout << type << ":"; 
+                while (tagData[index]) {
+                    cout << tagData[index];
+                    ++index;
+                }
+                ++index; 
+                break;      
+        }
+    }
+    
+    // write stream to stdout
+    cout << sb.str() << endl;
+}