X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2FBamAlignment.cpp;h=c95e896c7333d527d726c953a7fcd660f7f42ffc;hb=c3a7c31347d42a926214e2508d713975d124e8c6;hp=78d7d6b22b8801e374d168a4684ce06cca83d86b;hpb=9f1ce8c47aeadb6dc1320b52ee671c3341b97935;p=bamtools.git diff --git a/src/api/BamAlignment.cpp b/src/api/BamAlignment.cpp index 78d7d6b..c95e896 100644 --- a/src/api/BamAlignment.cpp +++ b/src/api/BamAlignment.cpp @@ -2,7 +2,7 @@ // BamAlignment.cpp (c) 2009 Derek Barnett // Marth Lab, Department of Biology, Boston College // --------------------------------------------------------------------------- -// Last modified: 10 October 2011 (DB) +// Last modified: 13 October 2011 (DB) // --------------------------------------------------------------------------- // Provides the BamAlignment data structure // *************************************************************************** @@ -365,9 +365,18 @@ bool BamAlignment::FindTag(const std::string& tag, return true; // get the storage class and find the next tag - if ( *pTagStorageType == '\0' ) return false; - if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false; - if ( *pTagData == '\0' ) return false; + if ( *pTagStorageType == '\0' ) { + ErrorString = "unexpected null found - 1"; + return false; + } + if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) { + ErrorString = "could not skip to next tag"; + return false; + } + if ( *pTagData == '\0' ) { + ErrorString = "unexpected null found - 2"; + return false; + } } // checked all tags, none match @@ -443,6 +452,94 @@ std::string BamAlignment::GetErrorString(void) const { return ErrorString; } +/*! \fn bool BamAlignment::GetSoftClips(std::vector& clipSizes, std::vector& readPositions, std::vector& genomePositions, bool usePadded = false) const + \brief Identifies if an alignment has a soft clip. If so, identifies the + sizes of the soft clips, as well as their positions in the read and reference. + + \param[out] clipSizes vector of the sizes of each soft clip in the alignment + \param[out] readPositions vector of the 0-based read locations of each soft clip in the alignment. + These positions are basically indexes within the read, not genomic positions. + \param[out] genomePositions vector of the 0-based genome locations of each soft clip in the alignment + \param[in] usePadded inserted bases affect reported position. Default is false, so that + reported position stays 'sync-ed' with reference coordinates. + + \return \c true if any soft clips were found in the alignment +*/ +bool BamAlignment::GetSoftClips(vector& clipSizes, + vector& readPositions, + vector& genomePositions, + bool usePadded) const +{ + // initialize positions & flags + int refPosition = Position; + int readPosition = 0; + bool softClipFound = false; + bool firstCigarOp = true; + + // iterate over cigar operations + vector::const_iterator cigarIter = CigarData.begin(); + vector::const_iterator cigarEnd = CigarData.end(); + for ( ; cigarIter != cigarEnd; ++cigarIter) { + const CigarOp& op = (*cigarIter); + + switch ( op.Type ) { + + // increase both read & genome positions on CIGAR chars [DMXN=] + case Constants::BAM_CIGAR_DEL_CHAR : + case Constants::BAM_CIGAR_MATCH_CHAR : + case Constants::BAM_CIGAR_MISMATCH_CHAR : + case Constants::BAM_CIGAR_REFSKIP_CHAR : + case Constants::BAM_CIGAR_SEQMATCH_CHAR : + refPosition += op.Length; + readPosition += op.Length; + break; + + // increase read position on insertion, genome position only if @usePadded is true + case Constants::BAM_CIGAR_INS_CHAR : + readPosition += op.Length; + if ( usePadded ) + refPosition += op.Length; + break; + + case Constants::BAM_CIGAR_SOFTCLIP_CHAR : + + softClipFound = true; + + ////////////////////////////////////////////////////////////////////////////// + // if we are dealing with the *first* CIGAR operation + // for this alignment, we increment the read position so that + // the read and genome position of the clip are referring to the same base. + // For example, in the alignment below, the ref position would be 4, yet + // the read position would be 0. Thus, to "sync" the two, + // we need to increment the read position by the length of the + // soft clip. + // Read: ATCGTTTCGTCCCTGC + // Ref: GGGATTTCGTCCCTGC + // Cigar: SSSSMMMMMMMMMMMM + // + // NOTE: This only needs to be done if the soft clip is the _first_ CIGAR op. + ////////////////////////////////////////////////////////////////////////////// + if ( firstCigarOp ) + readPosition += op.Length; + + // track the soft clip's size, read position, and genome position + clipSizes.push_back(op.Length); + readPositions.push_back(readPosition); + genomePositions.push_back(refPosition); + + // any other CIGAR operations have no effect + default : + break; + } + + // clear our "first pass" flag + firstCigarOp = false; + } + + // return whether any soft clips found + return softClipFound; +} + /*! \fn bool BamAlignment::GetTagType(const std::string& tag, char& type) const \brief Retrieves the BAM tag type-code associated with requested tag name.