// ***************************************************************************
// SamFormatParser.cpp (c) 2010 Derek Barnett
// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
// ---------------------------------------------------------------------------
-// Last modified: 23 December 2010 (DB)
+// Last modified: 10 October 2011 (DB)
// ---------------------------------------------------------------------------
// Provides functionality for parsing SAM header text into SamHeader object
// ***************************************************************************
-#include <api/SamConstants.h>
-#include <api/SamHeader.h>
-#include <api/internal/SamFormatParser_p.h>
+#include "api/SamConstants.h"
+#include "api/SamHeader.h"
+#include "api/internal/BamException_p.h"
+#include "api/internal/SamFormatParser_p.h"
using namespace BamTools;
using namespace BamTools::Internal;
string headerLine("");
while ( getline(headerStream, headerLine) )
ParseSamLine(headerLine);
- return;
}
void SamFormatParser::ParseSamLine(const string& line) {
// skip if line is not long enough to contain true values
- if (line.length() < 5 ) return;
+ if ( line.length() < 5 ) return;
// determine token at beginning of line
const string firstToken = line.substr(0,3);
else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine);
else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine);
else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine);
- else
- cerr << "SamFormatParser ERROR: unknown token: " << firstToken << endl;
-
- return;
+ else {
+ const string message = string("unknown token: ") + firstToken;
+ throw BamException("SamFormatParser::ParseSamLine", message);
+ }
}
void SamFormatParser::ParseHDLine(const string& line) {
// set header contents
if ( tokenTag == Constants::SAM_HD_VERSION_TAG ) m_header.Version = tokenValue;
- else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG ) m_header.SortOrder = tokenValue;
- else
- cerr << "SamFormatParser ERROR: unknown HD tag: " << tokenTag << endl;
+ else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue;
+ else {
+ const string message = string("unknown HD tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParseHDLine", message);
+ }
}
- // if @HD line exists, VN must be provided
- if ( !m_header.HasVersion() ) {
- cerr << "SamFormatParser ERROR: @HD line is missing VN tag" << endl;
- return;
- }
+ // check for required tags
+ if ( !m_header.HasVersion() )
+ throw BamException("SamFormatParser::ParseHDLine", "@HD line is missing VN tag");
}
void SamFormatParser::ParseSQLine(const string& line) {
if ( tokenTag == Constants::SAM_SQ_NAME_TAG ) seq.Name = tokenValue;
else if ( tokenTag == Constants::SAM_SQ_LENGTH_TAG ) seq.Length = tokenValue;
else if ( tokenTag == Constants::SAM_SQ_ASSEMBLYID_TAG ) seq.AssemblyID = tokenValue;
- else if ( tokenTag == Constants::SAM_SQ_URI_TAG ) seq.URI = tokenValue;
else if ( tokenTag == Constants::SAM_SQ_CHECKSUM_TAG ) seq.Checksum = tokenValue;
else if ( tokenTag == Constants::SAM_SQ_SPECIES_TAG ) seq.Species = tokenValue;
- else
- cerr << "SamFormatParser ERROR: unknown SQ tag: " << tokenTag << endl;
- }
-
- // if @SQ line exists, SN must be provided
- if ( !seq.HasName() ) {
- cerr << "SamFormatParser ERROR: @SQ line is missing SN tag" << endl;
- return;
+ else if ( tokenTag == Constants::SAM_SQ_URI_TAG ) seq.URI = tokenValue;
+ else {
+ const string message = string("unknown SQ tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParseSQLine", message);
+ }
}
- // if @SQ line exists, LN must be provided
- if ( !seq.HasLength() ) {
- cerr << "SamFormatParser ERROR: @SQ line is missing LN tag" << endl;
- return;
- }
+ // check for required tags
+ if ( !seq.HasName() )
+ throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing SN tag");
+ if ( !seq.HasLength() )
+ throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing LN tag");
// store SAM sequence entry
m_header.Sequences.Add(seq);
// set read group contents
if ( tokenTag == Constants::SAM_RG_ID_TAG ) rg.ID = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG ) rg.Sample = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG ) rg.Library = tokenValue;
else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG ) rg.Description = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG ) rg.FlowOrder = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG ) rg.KeySequence = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG ) rg.Library = tokenValue;
else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG ) rg.PlatformUnit = tokenValue;
else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue;
- else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG ) rg.SequencingCenter = tokenValue;
else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG ) rg.ProductionDate = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG ) rg.Program = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG ) rg.Sample = tokenValue;
+ else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG ) rg.SequencingCenter = tokenValue;
else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG ) rg.SequencingTechnology = tokenValue;
- else
- cerr << "SamFormatParser ERROR: unknown RG tag: " << tokenTag << endl;
- }
-
- // if @RG line exists, ID must be provided
- if ( !rg.HasID() ) {
- cerr << "SamFormatParser ERROR: @RG line is missing ID tag" << endl;
- return;
+ else {
+ const string message = string("unknown RG tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParseRGLine", message);
+ }
}
- // if @RG line exists, SM must be provided
- if ( !rg.HasSample() ) {
- cerr << "SamFormatParser ERROR: @RG line is missing SM tag" << endl;
- return;
- }
+ // check for required tags
+ if ( !rg.HasID() )
+ throw BamException("SamFormatParser::ParseRGLine", "@RG line is missing ID tag");
// store SAM read group entry
m_header.ReadGroups.Add(rg);
void SamFormatParser::ParsePGLine(const string& line) {
+ SamProgram pg;
+
// split string into tokens
vector<string> tokens = Split(line, Constants::SAM_TAB);
const string tokenTag = (*tokenIter).substr(0,2);
const string tokenValue = (*tokenIter).substr(3);
- // set header contents
- if ( tokenTag == Constants::SAM_PG_NAME_TAG ) m_header.ProgramName = tokenValue;
- else if ( tokenTag == Constants::SAM_PG_VERSION_TAG ) m_header.ProgramVersion = tokenValue;
- else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG ) m_header.ProgramCommandLine = tokenValue;
- else
- cerr << "SamFormatParser ERROR: unknown PG tag: " << tokenTag << endl;
+ // set program record contents
+ if ( tokenTag == Constants::SAM_PG_ID_TAG ) pg.ID = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_NAME_TAG ) pg.Name = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG ) pg.CommandLine = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue;
+ else if ( tokenTag == Constants::SAM_PG_VERSION_TAG ) pg.Version = tokenValue;
+ else {
+ const string message = string("unknown PG tag: ") + tokenTag;
+ throw BamException("SamFormatParser::ParsePGLine", message);
+ }
}
- // if @PG line exists, ID must be provided
- if ( !m_header.HasProgramName() ) {
- cerr << "SamFormatParser ERROR:- @PG line is missing ID tag" << endl;
- return;
- }
+ // check for required tags
+ if ( !pg.HasID() )
+ throw BamException("SamFormatParser::ParsePGLine", "@PG line is missing ID tag");
+
+ // store SAM program entry
+ m_header.Programs.Add(pg);
}
void SamFormatParser::ParseCOLine(const string& line) {