1 // ***************************************************************************
2 // bamtools_sam.h (c) 2010 Derek Barnett, Erik Garrison
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 26 May 2010
7 // ---------------------------------------------------------------------------
8 // Prints a BAM file in the text-based SAM format.
9 // ***************************************************************************
11 #ifndef BAMTOOLS_SAM_H
12 #define BAMTOOLS_SAM_H
18 #include "BamReader.h"
19 #include "bamtools_getopt.h"
23 int BamSamHelp(void) {
25 // '--head' makes more sense than '--num' from a Unix perspective, but could be confusing with header info ??
26 // but this is also only the default case (from the beginning of the file)
27 // do we want to add a region specifier, eg 'chr2:1000..1500'? In this case, '--num' still makes sense (give me up to N alignments from this region)
29 std::cerr << std::endl;
30 std::cerr << "usage:\tbamtools sam [--in BAM file] [--num N] [--no_header]" << std::endl;
31 std::cerr << "\t-i, --in\tInput BAM file to generate SAM-format\t\t\t[default=stdin]" << std::endl;
32 std::cerr << "\t-n, --num N\tOnly print up to N alignments from beginning of file\t\t[default=50*]" << endl;
33 std::cerr << "\t--no_header\tOmits SAM header information from output (alignments only)\t[default=off]" << std::endl;
34 std::cerr << std::endl;
35 std::cerr << "* - By default bamtools sam will print all alignments in SAM format." << std::endl;
36 std::cerr << " However if '-n' or '--num' is included with no N, the default of 50 is used." << std::endl;
37 std::cerr << std::endl;
41 static RefVector references;
43 void PrintSAM(const BamAlignment& a) {
46 // <QNAME> <FLAG> <RNAME> <POS> <MAPQ> <CIGAR> <MRNM> <MPOS> <ISIZE> <SEQ> <QUAL> [ <TAG>:<VTYPE>:<VALUE> [...] ]
48 // ******************************* //
49 // ** NOT FULLY IMPLEMENTED YET ** //
50 //******************************** //
52 // Todo : build CIGAR string
54 // there are some quirks, per the spec, regarding when to use '=' or not
56 // ******************************* //
59 // do validity check on RefID / MateRefID ??
63 std::string cigarString("CIGAR:NOT YET");
66 std::string tagString("TAG:NOT YET");
68 // print BamAlignment to stdout in SAM format
69 std::cout << a.Name << '\t'
70 << a.AlignmentFlag << '\t'
71 << references[a.RefID].RefName << '\t'
73 << a.MapQuality << '\t'
74 << cigarString << '\t'
75 << ( a.IsPaired() ? references[a.MateRefID].RefName : "*" ) << '\t'
76 << ( a.IsPaired() ? a.MatePosition : 0 ) << '\t'
77 << ( a.IsPaired() ? a.InsertSize : 0 ) << '\t'
78 << a.QueryBases << '\t'
79 << a.Qualities << '\t'
80 << tagString << std::endl;
83 int RunBamSam(int argc, char* argv[]) {
85 // else parse command line for args
86 GetOpt options(argc, argv, 1);
88 std::string inputFilename;
89 options.addOption('i', "in", &inputFilename);
91 std::string numberString;
92 options.addOptionalOption('n', "num", &numberString, "50");
94 bool isOmittingHeader;
95 options.addSwitch("no_header", &isOmittingHeader);
97 if ( !options.parse() ) return BamCoverageHelp();
98 if ( inputFilename.empty() ) { inputFilename = "stdin"; }
100 // maxNumberOfAlignments = all (if nothing specified)
101 // = 50 (if '-n' or '--num' but no N)
102 // = N (if '-n N' or '--num N')
103 int maxNumberOfAlignments = -1;
104 if ( !numberString.empty() ) { maxNumberOfAlignments = atoi(numberString.c_str()); }
106 // open our BAM reader
108 reader.Open(inputFilename);
110 // if header desired, retrieve and print to stdout
111 if ( !isOmittingHeader ) {
112 std::string header = reader.GetHeaderText();
113 std::cout << header << std::endl;
116 // store reference data
117 references = reader.GetReferenceData();
119 // print all alignments to stdout in SAM format
120 if ( maxNumberOfAlignments < 0 ) {
122 while( reader.GetNextAlignment(ba) ) {
127 // print first N alignments to stdout in SAM format
130 int alignmentsPrinted = 0;
131 while ( reader.GetNextAlignment(ba) && (alignmentsPrinted < maxNumberOfAlignments) ) {
142 } // namespace BamTools
144 #endif // BAMTOOLS_SAM_H