// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 16 September 2010
+// Last modified: 23 September 2010
// ---------------------------------------------------------------------------
// Converts between BAM and a number of other formats
// ***************************************************************************
// PileupVisitor interface implementation
public:
-// void Visit(const int& refId, const int& position, const vector<BamAlignment>& alignments);
-
void Visit(const PileupPosition& pileupData);
// data members
// >BamAlignment.Name
// BamAlignment.QueryBases (up to FASTA_LINE_MAX bases per line)
// ...
+ //
+ // N.B. - QueryBases are reverse-complemented if aligned to reverse strand
// print header
m_out << "> " << a.Name << endl;
+ // handle reverse strand alignment - bases
+ string sequence = a.QueryBases;
+ if ( a.IsReverseStrand() )
+ Utilities::ReverseComplement(sequence);
+
// if sequence fits on single line
- if ( a.QueryBases.length() <= FASTA_LINE_MAX )
- m_out << a.QueryBases << endl;
+ if ( sequence.length() <= FASTA_LINE_MAX )
+ m_out << sequence << endl;
// else split over multiple lines
else {
size_t position = 0;
- size_t seqLength = a.QueryBases.length();
+ size_t seqLength = sequence.length(); // handle reverse strand alignment - bases & qualitiesth();
// write subsequences to each line
while ( position < (seqLength - FASTA_LINE_MAX) ) {
- m_out << a.QueryBases.substr(position, FASTA_LINE_MAX) << endl;
+ m_out << sequence.substr(position, FASTA_LINE_MAX) << endl;
position += FASTA_LINE_MAX;
}
// write final subsequence
- m_out << a.QueryBases.substr(position) << endl;
+ m_out << sequence.substr(position) << endl;
}
}
// BamAlignment.QueryBases
// +
// BamAlignment.Qualities
+ //
+ // N.B. - QueryBases are reverse-complemented (& Qualities reversed) if aligned to reverse strand .
+ // Name is appended "/1" or "/2" if paired-end, to reflect which mate this entry is.
+
+ // handle paired-end alignments
+ string name = a.Name;
+ if ( a.IsPaired() )
+ name.append( (a.IsFirstMate() ? "/1" : "/2") );
+
+ // handle reverse strand alignment - bases & qualities
+ string qualities = a.Qualities;
+ string sequence = a.QueryBases;
+ if ( a.IsReverseStrand() ) {
+ Utilities::Reverse(qualities);
+ Utilities::ReverseComplement(sequence);
+ }
- m_out << "@" << a.Name << endl
- << a.QueryBases << endl
- << "+" << endl
- << a.Qualities << endl;
+ // write to output stream
+ m_out << "@" << name << endl
+ << sequence << endl
+ << "+" << endl
+ << qualities << endl;
}
// print BamAlignment in JSON format
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 3 September 2010
+// Last modified: 23 September 2010
// ---------------------------------------------------------------------------
// Provides general utilities used by BamTools sub-tools.
// ***************************************************************************
+#include <algorithm>
#include <cstdlib>
#include <fstream>
#include <iostream>
using namespace std;
using namespace BamTools;
+namespace BamTools {
+
+const char REVCOMP_LOOKUP[] = {'T', 0, 'G', 'H', 0, 0, 'C', 'D', 0, 0, 0, 0, 'K', 'N', 0, 0, 0, 'Y', 'W', 'A', 'A', 'B', 'S', 'X', 'R', 0 };
+
+} // namespace BamTools
+
// check if a file exists
bool Utilities::FileExists(const std::string& filename) {
ifstream f(filename.c_str(), ifstream::in);
return true;
}
+
+void Utilities::Reverse(string& sequence) {
+ reverse(sequence.begin(), sequence.end());
+}
+
+void Utilities::ReverseComplement(std::string& sequence) {
+
+ // do complement
+ size_t seqLength = sequence.length();
+ for ( size_t i = 0; i < seqLength; ++i )
+ sequence.replace(i, 1, 1, REVCOMP_LOOKUP[(int)sequence.at(i) - 65]);
+
+ // reverse it
+ Reverse(sequence);
+}
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 30 August 2010
+// Last modified: 23 September 2010
// ---------------------------------------------------------------------------
// Provides general utilities used by BamTools sub-tools.
// ***************************************************************************
// Same as above, but accepts a BamMultiReader
static bool ParseRegionString(const std::string& regionString, const BamMultiReader& reader, BamRegion& region);
-
+ // sequence utilities
+ static void Reverse(std::string& sequence);
+ static void ReverseComplement(std::string& sequence);
};
} // namespace BamTools