X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fapi%2Finternal%2FSamFormatParser_p.cpp;h=195fdcd38b18336eb2344ff592b1d98c5a67d2fa;hb=9f1ce8c47aeadb6dc1320b52ee671c3341b97935;hp=aa690b883532f06ca173ec161095e4eb2c0dc014;hpb=ff5f2ec7c437660185a406d01739f42534105412;p=bamtools.git diff --git a/src/api/internal/SamFormatParser_p.cpp b/src/api/internal/SamFormatParser_p.cpp index aa690b8..195fdcd 100644 --- a/src/api/internal/SamFormatParser_p.cpp +++ b/src/api/internal/SamFormatParser_p.cpp @@ -1,16 +1,16 @@ // *************************************************************************** // SamFormatParser.cpp (c) 2010 Derek Barnett // Marth Lab, Department of Biology, Boston College -// All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 23 December 2010 (DB) +// Last modified: 10 October 2011 (DB) // --------------------------------------------------------------------------- // Provides functionality for parsing SAM header text into SamHeader object // *************************************************************************** -#include -#include -#include +#include "api/SamConstants.h" +#include "api/SamHeader.h" +#include "api/internal/BamException_p.h" +#include "api/internal/SamFormatParser_p.h" using namespace BamTools; using namespace BamTools::Internal; @@ -36,16 +36,15 @@ void SamFormatParser::Parse(const string& headerText) { // other wise parse SAM lines istringstream headerStream(headerText); - string headerLine = ""; + string headerLine(""); while ( getline(headerStream, headerLine) ) ParseSamLine(headerLine); - return; } void SamFormatParser::ParseSamLine(const string& line) { // skip if line is not long enough to contain true values - if (line.length() < 5 ) return; + if ( line.length() < 5 ) return; // determine token at beginning of line const string firstToken = line.substr(0,3); @@ -55,8 +54,10 @@ void SamFormatParser::ParseSamLine(const string& line) { else if ( firstToken == Constants::SAM_RG_BEGIN_TOKEN) ParseRGLine(restOfLine); else if ( firstToken == Constants::SAM_PG_BEGIN_TOKEN) ParsePGLine(restOfLine); else if ( firstToken == Constants::SAM_CO_BEGIN_TOKEN) ParseCOLine(restOfLine); - else cerr << "SAM Format Error - unknown token: " << firstToken << endl; - return; + else { + const string message = string("unknown token: ") + firstToken; + throw BamException("SamFormatParser::ParseSamLine", message); + } } void SamFormatParser::ParseHDLine(const string& line) { @@ -75,17 +76,17 @@ void SamFormatParser::ParseHDLine(const string& line) { // set header contents if ( tokenTag == Constants::SAM_HD_VERSION_TAG ) m_header.Version = tokenValue; - else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue; else if ( tokenTag == Constants::SAM_HD_SORTORDER_TAG ) m_header.SortOrder = tokenValue; - else - cerr << "SAM Format Error - unknown HD tag: " << tokenTag << endl; + else if ( tokenTag == Constants::SAM_HD_GROUPORDER_TAG ) m_header.GroupOrder = tokenValue; + else { + const string message = string("unknown HD tag: ") + tokenTag; + throw BamException("SamFormatParser::ParseHDLine", message); + } } - // if @HD line exists, VN must be provided - if ( !m_header.HasVersion() ) { - cerr << "SAM Format Error - @HD line is missing VN tag!" << endl; - return; - } + // check for required tags + if ( !m_header.HasVersion() ) + throw BamException("SamFormatParser::ParseHDLine", "@HD line is missing VN tag"); } void SamFormatParser::ParseSQLine(const string& line) { @@ -108,24 +109,20 @@ void SamFormatParser::ParseSQLine(const string& line) { if ( tokenTag == Constants::SAM_SQ_NAME_TAG ) seq.Name = tokenValue; else if ( tokenTag == Constants::SAM_SQ_LENGTH_TAG ) seq.Length = tokenValue; else if ( tokenTag == Constants::SAM_SQ_ASSEMBLYID_TAG ) seq.AssemblyID = tokenValue; - else if ( tokenTag == Constants::SAM_SQ_URI_TAG ) seq.URI = tokenValue; else if ( tokenTag == Constants::SAM_SQ_CHECKSUM_TAG ) seq.Checksum = tokenValue; else if ( tokenTag == Constants::SAM_SQ_SPECIES_TAG ) seq.Species = tokenValue; - else - cerr << "SAM Format Error - unknown SQ tag: " << tokenTag << endl; - } - - // if @SQ line exists, SN must be provided - if ( !seq.HasName() ) { - cerr << "SAM Format Error - @SQ line is missing SN tag!" << endl; - return; + else if ( tokenTag == Constants::SAM_SQ_URI_TAG ) seq.URI = tokenValue; + else { + const string message = string("unknown SQ tag: ") + tokenTag; + throw BamException("SamFormatParser::ParseSQLine", message); + } } - // if @SQ line exists, LN must be provided - if ( !seq.HasLength() ) { - cerr << "SAM Format Error - @SQ line is missing LN tag!" << endl; - return; - } + // check for required tags + if ( !seq.HasName() ) + throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing SN tag"); + if ( !seq.HasLength() ) + throw BamException("SamFormatParser::ParseSQLine", "@SQ line is missing LN tag"); // store SAM sequence entry m_header.Sequences.Add(seq); @@ -149,29 +146,26 @@ void SamFormatParser::ParseRGLine(const string& line) { // set read group contents if ( tokenTag == Constants::SAM_RG_ID_TAG ) rg.ID = tokenValue; - else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG ) rg.Sample = tokenValue; - else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG ) rg.Library = tokenValue; else if ( tokenTag == Constants::SAM_RG_DESCRIPTION_TAG ) rg.Description = tokenValue; + else if ( tokenTag == Constants::SAM_RG_FLOWORDER_TAG ) rg.FlowOrder = tokenValue; + else if ( tokenTag == Constants::SAM_RG_KEYSEQUENCE_TAG ) rg.KeySequence = tokenValue; + else if ( tokenTag == Constants::SAM_RG_LIBRARY_TAG ) rg.Library = tokenValue; else if ( tokenTag == Constants::SAM_RG_PLATFORMUNIT_TAG ) rg.PlatformUnit = tokenValue; else if ( tokenTag == Constants::SAM_RG_PREDICTEDINSERTSIZE_TAG ) rg.PredictedInsertSize = tokenValue; - else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG ) rg.SequencingCenter = tokenValue; else if ( tokenTag == Constants::SAM_RG_PRODUCTIONDATE_TAG ) rg.ProductionDate = tokenValue; + else if ( tokenTag == Constants::SAM_RG_PROGRAM_TAG ) rg.Program = tokenValue; + else if ( tokenTag == Constants::SAM_RG_SAMPLE_TAG ) rg.Sample = tokenValue; + else if ( tokenTag == Constants::SAM_RG_SEQCENTER_TAG ) rg.SequencingCenter = tokenValue; else if ( tokenTag == Constants::SAM_RG_SEQTECHNOLOGY_TAG ) rg.SequencingTechnology = tokenValue; - else - cerr << "SAM Format Error - unknown RG tag: " << tokenTag << endl; - } - - // if @RG line exists, ID must be provided - if ( !rg.HasID() ) { - cerr << "SAM Format Error - @RG line is missing ID tag!" << endl; - return; + else { + const string message = string("unknown RG tag: ") + tokenTag; + throw BamException("SamFormatParser::ParseRGLine", message); + } } - // if @RG line exists, SM must be provided - if ( !rg.HasSample() ) { - cerr << "SAM Format Error - @RG line is missing SM tag!" << endl; - return; - } + // check for required tags + if ( !rg.HasID() ) + throw BamException("SamFormatParser::ParseRGLine", "@RG line is missing ID tag"); // store SAM read group entry m_header.ReadGroups.Add(rg); @@ -179,6 +173,8 @@ void SamFormatParser::ParseRGLine(const string& line) { void SamFormatParser::ParsePGLine(const string& line) { + SamProgram pg; + // split string into tokens vector tokens = Split(line, Constants::SAM_TAB); @@ -191,19 +187,24 @@ void SamFormatParser::ParsePGLine(const string& line) { const string tokenTag = (*tokenIter).substr(0,2); const string tokenValue = (*tokenIter).substr(3); - // set header contents - if ( tokenTag == Constants::SAM_PG_NAME_TAG ) m_header.ProgramName = tokenValue; - else if ( tokenTag == Constants::SAM_PG_VERSION_TAG ) m_header.ProgramVersion = tokenValue; - else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG ) m_header.ProgramCommandLine = tokenValue; - else - cerr << "SAM Format Error - unknown PG tag: " << tokenTag << endl; + // set program record contents + if ( tokenTag == Constants::SAM_PG_ID_TAG ) pg.ID = tokenValue; + else if ( tokenTag == Constants::SAM_PG_NAME_TAG ) pg.Name = tokenValue; + else if ( tokenTag == Constants::SAM_PG_COMMANDLINE_TAG ) pg.CommandLine = tokenValue; + else if ( tokenTag == Constants::SAM_PG_PREVIOUSPROGRAM_TAG ) pg.PreviousProgramID = tokenValue; + else if ( tokenTag == Constants::SAM_PG_VERSION_TAG ) pg.Version = tokenValue; + else { + const string message = string("unknown PG tag: ") + tokenTag; + throw BamException("SamFormatParser::ParsePGLine", message); + } } - // if @PG line exists, ID must be provided - if ( !m_header.HasProgramName() ) { - cerr << "SAM Format Error - @PG line is missing ID tag!" << endl; - return; - } + // check for required tags + if ( !pg.HasID() ) + throw BamException("SamFormatParser::ParsePGLine", "@PG line is missing ID tag"); + + // store SAM program entry + m_header.Programs.Add(pg); } void SamFormatParser::ParseCOLine(const string& line) {