1 // ***************************************************************************
2 // bamtools_merge.cpp (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 21 March 2011
7 // ---------------------------------------------------------------------------
8 // Merges multiple BAM files into one.
9 // ***************************************************************************
11 #include "bamtools_merge.h"
13 #include <api/BamMultiReader.h>
14 #include <api/BamWriter.h>
15 #include <utils/bamtools_options.h>
16 #include <utils/bamtools_utilities.h>
17 using namespace BamTools;
24 // ---------------------------------------------
25 // MergeSettings implementation
27 struct MergeTool::MergeSettings {
30 bool HasInputBamFilename;
31 bool HasOutputBamFilename;
32 bool IsForceCompression;
36 vector<string> InputFiles;
39 string OutputFilename;
44 : HasInputBamFilename(false)
45 , HasOutputBamFilename(false)
46 , IsForceCompression(false)
48 , OutputFilename(Options::StandardOut())
52 // ---------------------------------------------
53 // MergeTool implementation
55 MergeTool::MergeTool(void)
57 , m_settings(new MergeSettings)
59 // set program details
60 Options::SetProgramInfo("bamtools merge", "merges multiple BAM files into one", "[-in <filename> -in <filename> ...] [-out <filename> | [-forceCompression]] [-region <REGION>]");
63 OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output");
64 Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts);
65 Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts);
66 Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts);
67 Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, IO_Opts);
70 MergeTool::~MergeTool(void) {
75 int MergeTool::Help(void) {
76 Options::DisplayHelp();
80 int MergeTool::Run(int argc, char* argv[]) {
82 // parse command line arguments
83 Options::Parse(argc, argv, 1);
85 // set to default input if none provided
86 if ( !m_settings->HasInputBamFilename )
87 m_settings->InputFiles.push_back(Options::StandardIn());
89 // opens the BAM files (by default without checking for indexes)
90 BamMultiReader reader;
91 if ( !reader.Open(m_settings->InputFiles) ) {
92 cerr << "bamtools merge ERROR: could not open input BAM file(s)... Aborting." << endl;
96 // retrieve header & reference dictionary info
97 std::string mergedHeader = reader.GetHeaderText();
98 RefVector references = reader.GetReferenceData();
100 // determine compression mode for BamWriter
101 bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
102 !m_settings->IsForceCompression );
103 BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
104 if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;
108 writer.SetCompressionMode(compressionMode);
109 if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references) ) {
110 cerr << "bamtools merge ERROR: could not open " << m_settings->OutputFilename << " for writing." << endl;
115 // if no region specified, store entire contents of file(s)
116 if ( !m_settings->HasRegion ) {
118 while ( reader.GetNextAlignmentCore(al) )
119 writer.SaveAlignment(al);
122 // otherwise attempt to use region as constraint
125 // if region string parses OK
127 if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
129 // attempt to find index files
130 reader.LocateIndexes();
132 // if index data available for all BAM files, we can use SetRegion
133 if ( reader.HasIndexes() ) {
135 // attempt to use SetRegion(), if failed report error
136 if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) {
137 cerr << "bamtools merge ERROR: set region failed. Check that REGION describes a valid range" << endl;
142 // everything checks out, just iterate through specified region, storing alignments
144 while ( reader.GetNextAlignmentCore(al) )
145 writer.SaveAlignment(al);
148 // no index data available, we have to iterate through until we
149 // find overlapping alignments
152 while ( reader.GetNextAlignmentCore(al) ) {
153 if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) &&
154 (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) )
156 writer.SaveAlignment(al);
162 // error parsing REGION string
164 cerr << "bamtools merge ERROR: could not parse REGION - " << m_settings->Region << endl;
165 cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"