From 7664c745b7787707501ca7ee5bf412ceff2d39b8 Mon Sep 17 00:00:00 2001 From: derek Date: Sun, 5 Dec 2010 23:11:03 -0500 Subject: [PATCH] Added new RevertTool to the toolkit * "$ bamtools revert ... " will clear the IsDuplicate flag on BamAlignments and replace the Qualities with the contents of the OQ tag. * Suggested by and draft implementation contributed by Al Ward. --- src/toolkit/CMakeLists.txt | 1 + src/toolkit/bamtools.cpp | 5 +- src/toolkit/bamtools_revert.cpp | 178 ++++++++++++++++++++++++++++++++ src/toolkit/bamtools_revert.h | 38 +++++++ 4 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 src/toolkit/bamtools_revert.cpp create mode 100644 src/toolkit/bamtools_revert.h diff --git a/src/toolkit/CMakeLists.txt b/src/toolkit/CMakeLists.txt index 7e0fad0..3923bc9 100644 --- a/src/toolkit/CMakeLists.txt +++ b/src/toolkit/CMakeLists.txt @@ -21,6 +21,7 @@ add_executable ( bamtools bamtools_index.cpp bamtools_merge.cpp bamtools_random.cpp + bamtools_revert.cpp bamtools_sort.cpp bamtools_split.cpp bamtools_stats.cpp diff --git a/src/toolkit/bamtools.cpp b/src/toolkit/bamtools.cpp index 2be9d72..c7491d7 100644 --- a/src/toolkit/bamtools.cpp +++ b/src/toolkit/bamtools.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 22 November 2010 +// Last modified: 5 December 2010 // --------------------------------------------------------------------------- // Integrates a number of BamTools functionalities into a single executable. // *************************************************************************** @@ -26,6 +26,7 @@ #include "bamtools_index.h" #include "bamtools_merge.h" #include "bamtools_random.h" +#include "bamtools_revert.h" #include "bamtools_sort.h" #include "bamtools_split.h" #include "bamtools_stats.h" @@ -42,6 +43,7 @@ static const string HEADER = "header"; static const string INDEX = "index"; static const string MERGE = "merge"; static const string RANDOM = "random"; +static const string REVERT = "revert"; static const string SORT = "sort"; static const string SPLIT = "split"; static const string STATS = "stats"; @@ -80,6 +82,7 @@ AbstractTool* CreateTool(const string& arg) { if ( arg == INDEX ) return new IndexTool; if ( arg == MERGE ) return new MergeTool; if ( arg == RANDOM ) return new RandomTool; + if ( arg == REVERT ) return new RevertTool; if ( arg == SORT ) return new SortTool; if ( arg == SPLIT ) return new SplitTool; if ( arg == STATS ) return new StatsTool; diff --git a/src/toolkit/bamtools_revert.cpp b/src/toolkit/bamtools_revert.cpp new file mode 100644 index 0000000..0a6c08a --- /dev/null +++ b/src/toolkit/bamtools_revert.cpp @@ -0,0 +1,178 @@ +// *************************************************************************** +// bamtools_cpp (c) 2010 Derek Barnett, Alistair Ward +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 5 December 2010 +// --------------------------------------------------------------------------- +// Prints general alignment statistics for BAM file(s). +// *************************************************************************** + +#include +#include +#include "bamtools_revert.h" +#include "bamtools_options.h" +#include "bamtools_utilities.h" +#include "BamReader.h" +#include "BamWriter.h" +using namespace std; +using namespace BamTools; + +// --------------------------------------------- +// RevertSettings implementation + +struct RevertTool::RevertSettings { + + // flags + bool HasInput; + bool HasOutput; + bool IsForceCompression; + bool IsKeepDuplicateFlag; + bool IsKeepQualities; + + // filenames + string InputFilename; + string OutputFilename; + + // constructor + RevertSettings(void) + : HasInput(false) + , HasOutput(false) + , IsForceCompression(false) + , IsKeepDuplicateFlag(false) + , IsKeepQualities(false) + , InputFilename(Options::StandardIn()) + , OutputFilename(Options::StandardOut()) + { } +}; + +// --------------------------------------------- +// RevertToolPrivate implementation + +struct RevertTool::RevertToolPrivate { + + // ctor & dtor + public: + RevertToolPrivate(RevertTool::RevertSettings* settings); + ~RevertToolPrivate(void); + + // 'public' interface + public: + bool Run(void); + + // internal methods + private: + void RevertAlignment(BamAlignment& al); + + // data members + private: + RevertTool::RevertSettings* m_settings; + string m_OQ; +}; + +RevertTool::RevertToolPrivate::RevertToolPrivate(RevertTool::RevertSettings* settings) + : m_settings(settings) + , m_OQ("OQ") +{ } + +RevertTool::RevertToolPrivate::~RevertToolPrivate(void) { } + +// reverts a BAM alignment +// default behavior (for now) is : replace Qualities with OQ, clear IsDuplicate flag +// can override default behavior using command line options +void RevertTool::RevertToolPrivate::RevertAlignment(BamAlignment& al) { + + // replace Qualities with OQ, if requested + if ( !m_settings->IsKeepQualities ) { + string originalQualities; + if ( al.GetTag(m_OQ, originalQualities) ) { + al.Qualities = originalQualities; + al.RemoveTag(m_OQ); + } + } + + // clear duplicate flag, if requested + if ( !m_settings->IsKeepDuplicateFlag ) + al.SetIsDuplicate(false); +} + +bool RevertTool::RevertToolPrivate::Run(void) { + + // opens the BAM file without checking for indexes + BamReader reader; + if ( !reader.Open(m_settings->InputFilename) ) { + cerr << "Could not open input BAM file... quitting." << endl; + return false; + } + + // get BAM file metadata + const string& headerText = reader.GetHeaderText(); + const RefVector& references = reader.GetReferenceData(); + + // open writer + BamWriter writer; + bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); + if ( !writer.Open(m_settings->OutputFilename, headerText, references, writeUncompressed) ) { + cerr << "Could not open " << m_settings->OutputFilename << " for writing." << endl; + return false; + } + + // plow through file, reverting alignments + BamAlignment al; + while ( reader.GetNextAlignment(al) ) { + RevertAlignment(al); + writer.SaveAlignment(al); + } + + // clean and exit + reader.Close(); + writer.Close(); + return true; +} + +// --------------------------------------------- +// RevertTool implementation + +RevertTool::RevertTool(void) + : AbstractTool() + , m_settings(new RevertSettings) + , m_impl(0) +{ + // set program details + Options::SetProgramInfo("bamtools revert", "removes duplicate marks and restores original (non-recalibrated) base qualities", "[-in ... ]"); + + // set up options + OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); + Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFilename, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); + Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); + + OptionGroup* RevertOpts = Options::CreateOptionGroup("Revert Options"); + Options::AddOption("-keepDuplicate", "keep duplicates marked", m_settings->IsKeepDuplicateFlag, RevertOpts); + Options::AddOption("-keepQualities", "keep base qualities (do not replace with OQ contents)", m_settings->IsKeepQualities, RevertOpts); +} + +RevertTool::~RevertTool(void) { + delete m_settings; + m_settings = 0; + + delete m_impl; + m_impl = 0; +} + +int RevertTool::Help(void) { + Options::DisplayHelp(); + return 0; +} + +int RevertTool::Run(int argc, char* argv[]) { + + // parse command line arguments + Options::Parse(argc, argv, 1); + + // run internal RevertTool implementation, return success/fail + m_impl = new RevertToolPrivate(m_settings); + + if ( m_impl->Run() ) return 0; + else return 1; +} diff --git a/src/toolkit/bamtools_revert.h b/src/toolkit/bamtools_revert.h new file mode 100644 index 0000000..b97d47b --- /dev/null +++ b/src/toolkit/bamtools_revert.h @@ -0,0 +1,38 @@ +// *************************************************************************** +// bamtools_stats.h (c) 2010 Derek Barnett, Alistair Ward +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 5 December 2010 +// --------------------------------------------------------------------------- +// +// *************************************************************************** + +#ifndef BAMTOOLS_REVERT_H +#define BAMTOOLS_REVERT_H + +#include "bamtools_tool.h" + +namespace BamTools { + +class RevertTool : public AbstractTool { + + public: + RevertTool(void); + ~RevertTool(void); + + public: + int Help(void); + int Run(int argc, char* argv[]); + + private: + struct RevertSettings; + RevertSettings* m_settings; + + struct RevertToolPrivate; + RevertToolPrivate* m_impl; +}; + +} // namespace BamTools + +#endif // BAMTOOLS_REVERT_H -- 2.39.2