]> git.donarmstrong.com Git - bamtools.git/commitdiff
Added new RevertTool to the toolkit
authorderek <derekwbarnett@gmail.com>
Mon, 6 Dec 2010 04:11:03 +0000 (23:11 -0500)
committerderek <derekwbarnett@gmail.com>
Mon, 6 Dec 2010 04:11:03 +0000 (23:11 -0500)
  * "$ bamtools revert ... " will clear the IsDuplicate flag on
BamAlignments and replace the Qualities with the contents of the OQ tag.
  * Suggested by and draft implementation contributed by Al Ward.

src/toolkit/CMakeLists.txt
src/toolkit/bamtools.cpp
src/toolkit/bamtools_revert.cpp [new file with mode: 0644]
src/toolkit/bamtools_revert.h [new file with mode: 0644]

index 7e0fad0c9f5f7fff3fadde71a981fe8f8e6b727e..3923bc9e93e0f02cfd2656bca69cc0025a6fa726 100644 (file)
@@ -21,6 +21,7 @@ add_executable ( bamtools
                  bamtools_index.cpp
                  bamtools_merge.cpp
                  bamtools_random.cpp
+                bamtools_revert.cpp
                  bamtools_sort.cpp
                  bamtools_split.cpp
                  bamtools_stats.cpp
index 2be9d720c622de3d286568febf27479ebe553490..c7491d71d90ed899a85de274a236ac7b5a77f4d1 100644 (file)
@@ -3,7 +3,7 @@
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 22 November 2010
+// Last modified: 5 December 2010
 // ---------------------------------------------------------------------------
 // Integrates a number of BamTools functionalities into a single executable.
 // ***************************************************************************
@@ -26,6 +26,7 @@
 #include "bamtools_index.h"
 #include "bamtools_merge.h"
 #include "bamtools_random.h"
+#include "bamtools_revert.h"
 #include "bamtools_sort.h"
 #include "bamtools_split.h"
 #include "bamtools_stats.h"
@@ -42,6 +43,7 @@ static const string HEADER   = "header";
 static const string INDEX    = "index";
 static const string MERGE    = "merge";
 static const string RANDOM   = "random";
+static const string REVERT   = "revert";
 static const string SORT     = "sort";
 static const string SPLIT    = "split";
 static const string STATS    = "stats";
@@ -80,6 +82,7 @@ AbstractTool* CreateTool(const string& arg) {
     if ( arg == INDEX )    return new IndexTool;
     if ( arg == MERGE )    return new MergeTool;
     if ( arg == RANDOM )   return new RandomTool;
+    if ( arg == REVERT )   return new RevertTool;
     if ( arg == SORT )     return new SortTool;
     if ( arg == SPLIT )    return new SplitTool;
     if ( arg == STATS )    return new StatsTool;
diff --git a/src/toolkit/bamtools_revert.cpp b/src/toolkit/bamtools_revert.cpp
new file mode 100644 (file)
index 0000000..0a6c08a
--- /dev/null
@@ -0,0 +1,178 @@
+// ***************************************************************************
+// bamtools_cpp (c) 2010 Derek Barnett, Alistair Ward
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 5 December 2010
+// ---------------------------------------------------------------------------
+// Prints general alignment statistics for BAM file(s).
+// ***************************************************************************
+
+#include <iostream>
+#include <string>
+#include "bamtools_revert.h"
+#include "bamtools_options.h"
+#include "bamtools_utilities.h"
+#include "BamReader.h"
+#include "BamWriter.h"
+using namespace std;
+using namespace BamTools;
+
+// ---------------------------------------------
+// RevertSettings implementation
+
+struct RevertTool::RevertSettings {
+
+    // flags
+    bool HasInput;
+    bool HasOutput;
+    bool IsForceCompression;
+    bool IsKeepDuplicateFlag;
+    bool IsKeepQualities;
+
+    // filenames
+    string InputFilename;
+    string OutputFilename;
+    
+    // constructor
+    RevertSettings(void)
+        : HasInput(false)
+       , HasOutput(false)
+       , IsForceCompression(false)
+       , IsKeepDuplicateFlag(false)
+       , IsKeepQualities(false)
+       , InputFilename(Options::StandardIn())
+       , OutputFilename(Options::StandardOut())
+    { }
+};  
+
+// ---------------------------------------------
+// RevertToolPrivate implementation
+
+struct RevertTool::RevertToolPrivate {
+  
+    // ctor & dtor
+    public:
+        RevertToolPrivate(RevertTool::RevertSettings* settings);
+        ~RevertToolPrivate(void);
+  
+    // 'public' interface
+    public:
+        bool Run(void);
+        
+    // internal methods
+    private:
+       void RevertAlignment(BamAlignment& al);
+        
+    // data members
+    private:
+        RevertTool::RevertSettings* m_settings;
+       string m_OQ;
+};
+
+RevertTool::RevertToolPrivate::RevertToolPrivate(RevertTool::RevertSettings* settings)
+    : m_settings(settings)
+    , m_OQ("OQ")
+{ }
+
+RevertTool::RevertToolPrivate::~RevertToolPrivate(void) { }
+
+// reverts a BAM alignment
+// default behavior (for now) is : replace Qualities with OQ, clear IsDuplicate flag
+// can override default behavior using command line options
+void RevertTool::RevertToolPrivate::RevertAlignment(BamAlignment& al) {
+
+    // replace Qualities with OQ, if requested
+    if ( !m_settings->IsKeepQualities ) {
+       string originalQualities;
+       if ( al.GetTag(m_OQ, originalQualities) ) {
+           al.Qualities = originalQualities;
+           al.RemoveTag(m_OQ);
+       }
+    }
+
+    // clear duplicate flag, if requested
+    if ( !m_settings->IsKeepDuplicateFlag )
+       al.SetIsDuplicate(false);
+}
+
+bool RevertTool::RevertToolPrivate::Run(void) {
+  
+    // opens the BAM file without checking for indexes
+    BamReader reader;
+    if ( !reader.Open(m_settings->InputFilename) ) {
+       cerr << "Could not open input BAM file... quitting." << endl;
+        return false;
+    }
+
+    // get BAM file metadata
+    const string& headerText = reader.GetHeaderText();
+    const RefVector& references = reader.GetReferenceData();
+    
+    // open writer
+    BamWriter writer;
+    bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression );
+    if ( !writer.Open(m_settings->OutputFilename, headerText, references, writeUncompressed) ) {
+        cerr << "Could not open " << m_settings->OutputFilename << " for writing." << endl;
+        return false;
+    }
+
+    // plow through file, reverting alignments
+    BamAlignment al;
+    while ( reader.GetNextAlignment(al) ) {
+       RevertAlignment(al);
+        writer.SaveAlignment(al);
+    }
+    
+    // clean and exit
+    reader.Close();
+    writer.Close();
+    return true; 
+}
+
+// ---------------------------------------------
+// RevertTool implementation
+
+RevertTool::RevertTool(void)
+    : AbstractTool()
+    , m_settings(new RevertSettings)
+    , m_impl(0)
+{
+    // set program details
+    Options::SetProgramInfo("bamtools revert", "removes duplicate marks and restores original (non-recalibrated) base qualities", "[-in <filename> ... ]");
+    
+    // set up options 
+    OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
+    Options::AddValueOption("-in",  "BAM filename", "the input BAM file",  "", m_settings->HasInput,  m_settings->InputFilename,  IO_Opts, Options::StandardIn());
+    Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut());
+    Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
+
+    OptionGroup* RevertOpts = Options::CreateOptionGroup("Revert Options");
+    Options::AddOption("-keepDuplicate", "keep duplicates marked", m_settings->IsKeepDuplicateFlag, RevertOpts);
+    Options::AddOption("-keepQualities", "keep base qualities (do not replace with OQ contents)", m_settings->IsKeepQualities, RevertOpts);
+}
+
+RevertTool::~RevertTool(void) {
+    delete m_settings;
+    m_settings = 0;
+    
+    delete m_impl;
+    m_impl = 0;
+}
+
+int RevertTool::Help(void) {
+    Options::DisplayHelp();
+    return 0;
+}
+
+int RevertTool::Run(int argc, char* argv[]) {
+  
+    // parse command line arguments
+    Options::Parse(argc, argv, 1);
+
+    // run internal RevertTool implementation, return success/fail
+    m_impl = new RevertToolPrivate(m_settings);
+    
+    if ( m_impl->Run() ) return 0;
+    else return 1;
+}
diff --git a/src/toolkit/bamtools_revert.h b/src/toolkit/bamtools_revert.h
new file mode 100644 (file)
index 0000000..b97d47b
--- /dev/null
@@ -0,0 +1,38 @@
+// ***************************************************************************
+// bamtools_stats.h (c) 2010 Derek Barnett, Alistair Ward
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 5 December 2010
+// ---------------------------------------------------------------------------
+//
+// ***************************************************************************
+
+#ifndef BAMTOOLS_REVERT_H
+#define BAMTOOLS_REVERT_H
+
+#include "bamtools_tool.h"
+
+namespace BamTools {
+  
+class RevertTool : public AbstractTool {
+  
+    public:
+        RevertTool(void);
+        ~RevertTool(void);
+  
+    public:
+        int Help(void);
+        int Run(int argc, char* argv[]); 
+        
+    private:
+        struct RevertSettings;
+        RevertSettings* m_settings;
+        
+        struct RevertToolPrivate;
+        RevertToolPrivate* m_impl;
+};
+  
+} // namespace BamTools
+
+#endif // BAMTOOLS_REVERT_H