From: Derek Barnett Date: Wed, 24 Jul 2013 20:00:39 +0000 (-0400) Subject: Merge branch 'master' of https://github.com/pezmaster31/bamtools X-Git-Url: https://git.donarmstrong.com/?p=bamtools.git;a=commitdiff_plain;h=9220032eb9f9db7e1226c130757d2d91de35e9e6;hp=f6c30c48b16ad4c084c98aa9c81288f726a499fd Merge branch 'master' of https://github.com/pezmaster31/bamtools --- diff --git a/src/api/CMakeLists.txt b/src/api/CMakeLists.txt index 66eb35f..a9dd092 100644 --- a/src/api/CMakeLists.txt +++ b/src/api/CMakeLists.txt @@ -13,7 +13,7 @@ add_definitions( -DBAMTOOLS_API_LIBRARY ) # (for proper exporting of library sym add_definitions( -fPIC ) # (attempt to force PIC compiling on CentOS, not being set on shared libs by CMake) # fetch all internal source files -add_subdirectory ( internal ) +add_subdirectory( internal ) # make list of all API source files set( BamToolsAPISources @@ -44,11 +44,11 @@ set_target_properties( BamTools-static PROPERTIES PREFIX "lib" ) # link libraries automatically with zlib (and Winsock2, if applicable) -if( _WIN32 ) +if( WIN32 ) set( APILibs z ws2_32 ) -else( _WIN32 ) +else() set( APILibs z ) -endif( _WIN32 ) +endif() target_link_libraries( BamTools ${APILibs} ) target_link_libraries( BamTools-static ${APILibs} ) diff --git a/src/api/internal/index/CMakeLists.txt b/src/api/internal/index/CMakeLists.txt index 1c78cb9..d6a7df6 100644 --- a/src/api/internal/index/CMakeLists.txt +++ b/src/api/internal/index/CMakeLists.txt @@ -5,9 +5,9 @@ # src/api/internal/index # ========================== -set ( InternalIndexDir "${InternalDir}/index" ) +set( InternalIndexDir "${InternalDir}/index" ) -set ( InternalIndexSources +set( InternalIndexSources ${InternalIndexDir}/BamIndexFactory_p.cpp ${InternalIndexDir}/BamStandardIndex_p.cpp ${InternalIndexDir}/BamToolsIndex_p.cpp diff --git a/src/api/internal/io/CMakeLists.txt b/src/api/internal/io/CMakeLists.txt index d9da416..28153d5 100644 --- a/src/api/internal/io/CMakeLists.txt +++ b/src/api/internal/io/CMakeLists.txt @@ -5,12 +5,12 @@ # src/api/internal/io # ========================== -set ( InternalIODir "${InternalDir}/io" ) +set( InternalIODir "${InternalDir}/io" ) #-------------------------- # platform-independent IO #-------------------------- -set ( CommonIOSources +set( CommonIOSources ${InternalIODir}/BamDeviceFactory_p.cpp ${InternalIODir}/BamFile_p.cpp ${InternalIODir}/BamFtp_p.cpp @@ -30,21 +30,17 @@ set ( CommonIOSources #------------------------ # platform-dependent IO #------------------------ -if ( _WIN32 ) - set ( PlatformIOSources - ${InternalIODir}/TcpSocketEngine_win_p.cpp - ) -else ( _WIN32 ) - set ( PlatformIOSources - ${InternalIODir}/TcpSocketEngine_unix_p.cpp - ) -endif ( _WIN32 ) +if( WIN32 ) + set( PlatformIOSources ${InternalIODir}/TcpSocketEngine_win_p.cpp ) +else() + set( PlatformIOSources ${InternalIODir}/TcpSocketEngine_unix_p.cpp ) +endif() #--------------------------- # make build-specific list #--------------------------- -set ( InternalIOSources - ${CommonIOSources} +set( InternalIOSources + ${CommonIOSources} ${PlatformIOSources} PARENT_SCOPE # <-- leave this last diff --git a/src/api/internal/sam/CMakeLists.txt b/src/api/internal/sam/CMakeLists.txt index 4b2bce2..2f303bd 100644 --- a/src/api/internal/sam/CMakeLists.txt +++ b/src/api/internal/sam/CMakeLists.txt @@ -5,9 +5,9 @@ # src/api/internal/sam # ========================== -set ( InternalSamDir "${InternalDir}/sam" ) +set( InternalSamDir "${InternalDir}/sam" ) -set ( InternalSamSources +set( InternalSamSources ${InternalSamDir}/SamFormatParser_p.cpp ${InternalSamDir}/SamFormatPrinter_p.cpp ${InternalSamDir}/SamHeaderValidator_p.cpp diff --git a/src/api/internal/utils/CMakeLists.txt b/src/api/internal/utils/CMakeLists.txt index 38a6957..4b1e2c2 100644 --- a/src/api/internal/utils/CMakeLists.txt +++ b/src/api/internal/utils/CMakeLists.txt @@ -5,9 +5,9 @@ # src/api/internal/utils # ========================== -set ( InternalUtilsDir "${InternalDir}/utils" ) +set( InternalUtilsDir "${InternalDir}/utils" ) -set ( InternalUtilsSources +set( InternalUtilsSources ${InternalUtilsDir}/BamException_p.cpp PARENT_SCOPE # <-- leave this last diff --git a/src/toolkit/bamtools_coverage.cpp b/src/toolkit/bamtools_coverage.cpp index c0ecd8f..6a4493d 100644 --- a/src/toolkit/bamtools_coverage.cpp +++ b/src/toolkit/bamtools_coverage.cpp @@ -2,7 +2,7 @@ // bamtools_coverage.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 7 April 2011 +// Last modified: 24 July 2013 // --------------------------------------------------------------------------- // Prints coverage data for a single BAM file // *************************************************************************** @@ -137,6 +137,7 @@ bool CoverageTool::CoverageToolPrivate::Run(void) { BamAlignment al; while ( reader.GetNextAlignment(al) ) pileup.AddAlignment(al); + pileup.Flush(); // clean up reader.Close(); diff --git a/src/toolkit/bamtools_random.cpp b/src/toolkit/bamtools_random.cpp index 5282f15..367ac58 100644 --- a/src/toolkit/bamtools_random.cpp +++ b/src/toolkit/bamtools_random.cpp @@ -2,7 +2,7 @@ // bamtools_random.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 10 December 2012 (DB) +// Last modified: 24 July 2013 (DB) // --------------------------------------------------------------------------- // Grab a random subset of alignments (testing tool) // *************************************************************************** @@ -46,6 +46,7 @@ struct RandomTool::RandomSettings { bool HasInput; bool HasInputFilelist; bool HasOutput; + bool HasRandomNumberSeed; bool HasRegion; bool IsForceCompression; @@ -54,6 +55,7 @@ struct RandomTool::RandomSettings { vector InputFiles; string InputFilelist; string OutputFilename; + unsigned int RandomNumberSeed; string Region; // constructor @@ -62,10 +64,12 @@ struct RandomTool::RandomSettings { , HasInput(false) , HasInputFilelist(false) , HasOutput(false) + , HasRandomNumberSeed(false) , HasRegion(false) , IsForceCompression(false) , AlignmentCount(RANDOM_MAX_ALIGNMENT_COUNT) , OutputFilename(Options::StandardOut()) + , RandomNumberSeed(0) { } }; @@ -165,7 +169,10 @@ bool RandomTool::RandomToolPrivate::Run(void) { } // seed our random number generator - srand( time(NULL) ); + if ( m_settings->HasRandomNumberSeed ) + srand( m_settings->RandomNumberSeed ); + else + srand( time(NULL) ); // grab random alignments BamAlignment al; @@ -235,14 +242,17 @@ RandomTool::RandomTool(void) // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); - Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); - Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts); - Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); + Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-list", "filename", "the input BAM file list, one line per file", "", m_settings->HasInputFilelist, m_settings->InputFilelist, IO_Opts); + Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); + Options::AddValueOption("-region", "REGION", "only pull random alignments from within this genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts); Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); - Options::AddValueOption("-region", "REGION", "only pull random alignments from within this genomic region. Index file is recommended for better performance, and is used automatically if it exists. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, IO_Opts); OptionGroup* SettingsOpts = Options::CreateOptionGroup("Settings"); - Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed", "", m_settings->HasAlignmentCount, m_settings->AlignmentCount, SettingsOpts, RANDOM_MAX_ALIGNMENT_COUNT); + Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed", "", + m_settings->HasAlignmentCount, m_settings->AlignmentCount, SettingsOpts, RANDOM_MAX_ALIGNMENT_COUNT); + Options::AddValueOption("-seed", "unsigned integer", "random number generator seed (for repeatable results). Current time is used if no seed value is provided.", "", + m_settings->HasRandomNumberSeed, m_settings->RandomNumberSeed, SettingsOpts); } RandomTool::~RandomTool(void) { diff --git a/src/toolkit/bamtools_resolve.cpp b/src/toolkit/bamtools_resolve.cpp index cb42f5b..9e5fb84 100644 --- a/src/toolkit/bamtools_resolve.cpp +++ b/src/toolkit/bamtools_resolve.cpp @@ -2,7 +2,7 @@ // bamtools_resolve.cpp (c) 2011 // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 14 October 2011 +// Last modified: 24 July 2013 (DB) // --------------------------------------------------------------------------- // Resolves paired-end reads (marking the IsProperPair flag as needed). // *************************************************************************** @@ -73,6 +73,20 @@ static const string OPTION_FORCEMARKREADGROUPS = "ForceMarkReadGroups"; static const string RG_FIELD_DESCRIPTION = "# "; +static const string MODEL_DESCRIPTION = + "# ------------- Model Types Description ---------------\n" + "#\n" + "# ID Position Orientation \n" + "# 1 mate1 < mate2 mate1:forward, mate2:forward \n" + "# 2 mate1 < mate2 mate1:forward, mate2:reverse \n" + "# 3 mate1 < mate2 mate1:reverse, mate2:forward \n" + "# 4 mate1 < mate2 mate1:reverse, mate2:reverse \n" + "# 5 mate2 < mate1 mate2:forward, mate1:forward \n" + "# 6 mate2 < mate1 mate2:forward, mate1:reverse \n" + "# 7 mate2 < mate1 mate2:reverse, mate1:forward \n" + "# 8 mate2 < mate1 mate2:reverse, mate1:reverse \n" + "# -----------------------------------------------------\n"; + // -------------------------------------------------------------------------- // unique readname file constants // -------------------------------------------------------------------------- @@ -731,9 +745,13 @@ void ResolveTool::StatsFileWriter::WriteHeader(void) { << BAMTOOLS_VERSION_BUILD; // # bamtools resolve (vX.Y.Z) + // # + // # MODEL DESCRIPTION - see above for actual text // \n m_stream << COMMENT_CHAR << " bamtools resolve (" << versionStream.str() << ")" << endl + << COMMENT_CHAR << endl + << MODEL_DESCRIPTION << endl; } diff --git a/src/toolkit/bamtools_split.cpp b/src/toolkit/bamtools_split.cpp index e6602a9..6425e95 100644 --- a/src/toolkit/bamtools_split.cpp +++ b/src/toolkit/bamtools_split.cpp @@ -2,7 +2,7 @@ // bamtools_split.cpp (c) 2010 Derek Barnett, Erik Garrison // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 8 December 2011 (DB) +// Last modified: 24 July 2013 (DB) // --------------------------------------------------------------------------- // Splits a BAM file on user-specified property, creating a new BAM output // file for each value found @@ -33,6 +33,7 @@ static const string SPLIT_UNMAPPED_TOKEN = ".UNMAPPED"; static const string SPLIT_PAIRED_TOKEN = ".PAIRED_END"; static const string SPLIT_SINGLE_TOKEN = ".SINGLE_END"; static const string SPLIT_REFERENCE_TOKEN = ".REF_"; +static const string SPLIT_TAG_TOKEN = ".TAG_"; string GetTimestampString(void) { @@ -70,6 +71,7 @@ struct SplitTool::SplitSettings { bool HasInputFilename; bool HasCustomOutputStub; bool HasCustomRefPrefix; + bool HasCustomTagPrefix; bool IsSplittingMapped; bool IsSplittingPaired; bool IsSplittingReference; @@ -78,6 +80,7 @@ struct SplitTool::SplitSettings { // string args string CustomOutputStub; string CustomRefPrefix; + string CustomTagPrefix; string InputFilename; string TagToSplit; @@ -86,12 +89,14 @@ struct SplitTool::SplitSettings { : HasInputFilename(false) , HasCustomOutputStub(false) , HasCustomRefPrefix(false) + , HasCustomTagPrefix(false) , IsSplittingMapped(false) , IsSplittingPaired(false) , IsSplittingReference(false) , IsSplittingTag(false) , CustomOutputStub("") , CustomRefPrefix("") + , CustomTagPrefix("") , InputFilename(Options::StandardIn()) , TagToSplit("") { } @@ -454,6 +459,16 @@ bool SplitTool::SplitToolPrivate::SplitTagImpl(BamAlignment& al) { WriterMap outputFiles; WriterMapIterator writerIter; + // determine tag prefix + string tagPrefix = SPLIT_TAG_TOKEN; + if ( m_settings->HasCustomTagPrefix ) + tagPrefix = m_settings->CustomTagPrefix; + + // make sure prefix starts with '.' + const size_t dotFound = tagPrefix.find('.'); + if ( dotFound != 0 ) + tagPrefix = string(".") + tagPrefix; + // local variables const string tag = m_settings->TagToSplit; BamWriter* writer; @@ -464,7 +479,7 @@ bool SplitTool::SplitToolPrivate::SplitTagImpl(BamAlignment& al) { if ( al.GetTag(tag, currentValue) ) { // open new BamWriter, save first alignment - outputFilenameStream << m_outputFilenameStub << ".TAG_" << tag << "_" << currentValue << ".bam"; + outputFilenameStream << m_outputFilenameStub << tagPrefix << tag << "_" << currentValue << ".bam"; writer = new BamWriter; if ( !writer->Open(outputFilenameStream.str(), m_header, m_references) ) { cerr << "bamtools split ERROR: could not open " << outputFilenameStream.str() @@ -493,7 +508,7 @@ bool SplitTool::SplitToolPrivate::SplitTagImpl(BamAlignment& al) { if ( writerIter == outputFiles.end() ) { // open new BamWriter - outputFilenameStream << m_outputFilenameStub << ".TAG_" << tag << "_" << currentValue << ".bam"; + outputFilenameStream << m_outputFilenameStub << tagPrefix << tag << "_" << currentValue << ".bam"; writer = new BamWriter; if ( !writer->Open(outputFilenameStream.str(), m_header, m_references) ) { cerr << "bamtool split ERROR: could not open " << outputFilenameStream.str() @@ -542,6 +557,8 @@ SplitTool::SplitTool(void) Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputFilename, m_settings->InputFilename, IO_Opts, Options::StandardIn()); Options::AddValueOption("-refPrefix", "string", "custom prefix for splitting by references. Currently files end with REF_.bam. This option allows you to replace \"REF_\" with a prefix of your choosing.", "", m_settings->HasCustomRefPrefix, m_settings->CustomRefPrefix, IO_Opts); + Options::AddValueOption("-tagPrefix", "string", "custom prefix for splitting by tags. Current files end with TAG__.bam. This option allows you to replace \"TAG_\" with a prefix of your choosing.", "", + m_settings->HasCustomTagPrefix, m_settings->CustomTagPrefix, IO_Opts); Options::AddValueOption("-stub", "filename stub", "prefix stub for output BAM files (default behavior is to use input filename, without .bam extension, as stub). If input is stdin and no stub provided, a timestamp is generated as the stub.", "", m_settings->HasCustomOutputStub, m_settings->CustomOutputStub, IO_Opts);