From: Erik Garrison Date: Wed, 8 Sep 2010 19:36:39 +0000 (-0400) Subject: Merge branch 'master' of git://github.com/pezmaster31/bamtools X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=90bb3691f9aa2a2e8a4dd906c2439c7bc434eb78;hp=1ee9b6b3d98a4bfd5a60c583bc7847c545a60e32;p=bamtools.git Merge branch 'master' of git://github.com/pezmaster31/bamtools Conflicts: src/api/BamReader.cpp --- diff --git a/Makefile b/Makefile index 39873b1..1dc3f7b 100644 --- a/Makefile +++ b/Makefile @@ -21,16 +21,18 @@ export BAMTOOLS_VERSION = 0.7.0812 # define source subdirectories SUBDIRS = $(SRC_DIR)/api \ $(SRC_DIR)/utils \ - $(SRC_DIR)/toolkit + $(SRC_DIR)/third_party \ + $(SRC_DIR)/toolkit all: @echo "Building BamTools:" @echo "Version: $$BAMTOOLS_VERSION" @echo "=========================================================" - - mkdir -p $(BIN_DIR) - mkdir -p $(OBJ_DIR) - + @echo "" + @echo "- Creating target directories" + @mkdir -p $(BIN_DIR) + @mkdir -p $(OBJ_DIR) + @echo "" @for dir in $(SUBDIRS); do \ echo "- Building in $$dir"; \ $(MAKE) --no-print-directory -C $$dir; \ diff --git a/README b/README index b3bc1ea..53d80c0 100644 --- a/README +++ b/README @@ -4,59 +4,65 @@ README : BAMTOOLS BamTools: a C++ API & toolkit for reading/writing/manipulating BAM files. -I. Introduction - a. The API - b. The Toolkit +I. Introduction + a. The API + b. The Toolkit -II. Usage - a. The API - b. The Toolkit +II. Usage + a. The API + b. The Toolkit -III. Contact +III. License + +IV. Acknowledgements + +V. Contact ------------------------------------------------------------ I. Introduction: + BamTools provides both a programmer's API and an end-user's toolkit for handling BAM files. -Ia. The API + +Ia. The API: + The API consists of 2 main modules - BamReader and BamWriter. As you would expect, -BamReader provides read-access to BAM files, while BamWriter does the writing of BAM -files. BamReader provides an interface for random-access (jumping) in a BAM file, +BamReader provides read-access to BAM files, while BamWriter handles writing data to +BAM files. BamReader provides an interface for random-access (jumping) in a BAM file, as well as generating BAM index files. BamMultiReader is an extra module that allows you to manage multiple open BAM file for reading. It provides some validation & bookkeeping under the hood to keep all -files sync'ed for - -An additional file, BamAux.h, is included as well. -This file contains the common data structures and typedefs used throught the API. +files sync'ed up for you. -BGZF.h & BGZF.cpp contain our implementation of the Broad Institute's -BGZF compression format. +Additional files used by the API: + - BamAux.h : contains the common data structures and typedefs used throught the API. + - BamIndex.* : implements both the standard BAM format index (".bai") as well as a + new BamTools-specific index (".bti"). + - BGZF.* : contains our implementation of the Broad Institute's BGZF compression format. -Ib. The Toolkit -If you've been using BamTools since the early days, you'll notice that our 'toy' API -examples (BamConversion, BamDump, and BamTrim) are now gone. In their place is a set -of features we hope you find useful. +Ib. The Toolkit: -** More explanation here ** +If you've been using the BamTools since the early days, you'll notice that our 'toy' API +examples (BamConversion, BamDump, BamTrim,...) are now gone. We dumped these in favor of +a suite of small utilities that we hope both developers and end-users find useful: usage: bamtools [--help] COMMAND [ARGS] Available bamtools commands: convert Converts between BAM and a number of other formats - count Prints number of alignments in BAM file + count Prints number of alignments in BAM file(s) coverage Prints coverage statistics from the input BAM file filter Filters BAM file(s) by user-specified criteria header Prints BAM header information index Generates index for BAM file merge Merge multiple BAM files into single file - sam Prints the BAM file in SAM (text) format + random Select random alignments from existing BAM file(s) sort Sorts the BAM file according to some criteria - stats Prints some basic statistics from the input BAM file + stats Prints some basic statistics from input BAM file(s) See 'bamtools help COMMAND' for more information on a specific command. @@ -73,16 +79,16 @@ IIa. The API To use this API, you simply need to do 3 things: - 1 - Drop the BamTools files somewhere the compiler can find them. - (i.e. in your source tree, or somewhere else in your include path) + 1 - Drop the BamTools API files somewhere the compiler can find them. + (i.e. in your project's source tree, or somewhere else in your include path) 2 - Import BamTools API with the following lines of code - #include "BamReader.h" // as needed + #include "BamReader.h" // or "BamMultiReader.h", as needed #include "BamWriter.h" // as needed using namespace BamTools; 3 - Compile with '-lz' ('l' as in Lima) to access ZLIB compression library - (For VS users, I can provide you zlib headers - just contact me). + (For MSVC users, I can provide you modified zlib headers - just contact me). See any included programs and Makefile for more specific compiling/usage examples. See comments in the header files for more detailed API documentation. @@ -94,7 +100,23 @@ IIb. The Toolkit ------------------------------------------------------------ -III. Contact : +III. License : + +Both the BamTools API and toolkit are released under the MIT License. +Copyright (c) 2009-2010 Derek Barnett, Erik Garrison, Gabor Marth, Michael Stromberg +See file LICENSE for details. + +------------------------------------------------------------ + +IV. Acknowledgements : + + * Aaron Quinlan for several key feature ideas and bug fix contributions + * Baptiste Lepilleur for the public-domain JSON parser (JsonCPP) + * Heng Li, author of SAMtools - the original C-language BAM API/toolkit. + +------------------------------------------------------------ + +V. Contact : Feel free to contact me with any questions, comments, suggestions, bug reports, etc. - Derek Barnett @@ -105,3 +127,4 @@ Biology Dept., Boston College Email: barnetde@bc.edu Project Websites: http://github.com/pezmaster31/bamtools (ACTIVE SUPPORT) http://sourceforge.net/projects/bamtools (major updates only) + diff --git a/src/api/BamAux.h b/src/api/BamAux.h index a39abb0..7a7fb1c 100644 --- a/src/api/BamAux.h +++ b/src/api/BamAux.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 27 July 2010 (DB) +// Last modified: 3 September 2010 (DB) // --------------------------------------------------------------------------- // Provides the basic constants, data structures, etc. for using BAM files // *************************************************************************** @@ -19,6 +19,8 @@ // C++ includes #include +#include +#include #include #include #include @@ -350,6 +352,11 @@ inline void SwapEndian_64p(char* data) { SwapEndian_64(value); } +inline bool FileExists(const std::string& filename) { + std::ifstream f(filename.c_str(), std::ifstream::in); + return !f.fail(); +} + // ---------------------------------------------------------------- // BamAlignment member methods diff --git a/src/api/BamIndex.cpp b/src/api/BamIndex.cpp index 5f636d1..59a1c9c 100644 --- a/src/api/BamIndex.cpp +++ b/src/api/BamIndex.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 17 August 2010 (DB) +// Last modified: 3 September 2010 (DB) // --------------------------------------------------------------------------- // Provides index functionality - both for the default (standardized) BAM // index format (.bai) as well as a BamTools-specific (nonstandard) index @@ -13,7 +13,6 @@ #include #include #include -// #include #include #include "BamIndex.h" #include "BamReader.h" @@ -48,12 +47,12 @@ bool BamIndex::HasAlignments(const int& referenceID) { // ######################################################################################### // ------------------------------- -// BamDefaultIndex structs & typedefs +// BamStandardIndex structs & typedefs namespace BamTools { // -------------------------------------------------- -// BamDefaultIndex data structures & typedefs +// BamStandardIndex data structures & typedefs struct Chunk { // data members @@ -90,26 +89,26 @@ struct ReferenceIndex { { } }; -typedef vector BamDefaultIndexData; +typedef vector BamStandardIndexData; } // namespace BamTools // ------------------------------- -// BamDefaultIndex implementation +// BamStandardIndex implementation -struct BamDefaultIndex::BamDefaultIndexPrivate { +struct BamStandardIndex::BamStandardIndexPrivate { // ------------------------- // data members - BamDefaultIndexData m_indexData; - BamDefaultIndex* m_parent; + BamStandardIndexData m_indexData; + BamStandardIndex* m_parent; // ------------------------- // ctor & dtor - BamDefaultIndexPrivate(BamDefaultIndex* parent) : m_parent(parent) { } - ~BamDefaultIndexPrivate(void) { } + BamStandardIndexPrivate(BamStandardIndex* parent) : m_parent(parent) { } + ~BamStandardIndexPrivate(void) { m_indexData.clear(); } // ------------------------- // internal methods @@ -125,20 +124,19 @@ struct BamDefaultIndex::BamDefaultIndexPrivate { }; -BamDefaultIndex::BamDefaultIndex(BgzfData* bgzf, BamReader* reader, bool isBigEndian) +BamStandardIndex::BamStandardIndex(BgzfData* bgzf, BamReader* reader, bool isBigEndian) : BamIndex(bgzf, reader, isBigEndian) { - d = new BamDefaultIndexPrivate(this); + d = new BamStandardIndexPrivate(this); } -BamDefaultIndex::~BamDefaultIndex(void) { - d->m_indexData.clear(); +BamStandardIndex::~BamStandardIndex(void) { delete d; d = 0; } // calculate bins that overlap region -int BamDefaultIndex::BamDefaultIndexPrivate::BinsFromRegion(const BamRegion& region, const bool isRightBoundSpecified, uint16_t bins[MAX_BIN]) { +int BamStandardIndex::BamStandardIndexPrivate::BinsFromRegion(const BamRegion& region, const bool isRightBoundSpecified, uint16_t bins[MAX_BIN]) { // get region boundaries uint32_t begin = (unsigned int)region.LeftPosition; @@ -169,7 +167,7 @@ int BamDefaultIndex::BamDefaultIndexPrivate::BinsFromRegion(const BamRegion& reg return i; } -bool BamDefaultIndex::Build(void) { +bool BamStandardIndex::Build(void) { // be sure reader & BGZF file are valid & open for reading if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen ) @@ -249,8 +247,8 @@ bool BamDefaultIndex::Build(void) { // update saveRefID saveRefID = bAlignment.RefID; - // if invalid RefID, break out (why?) - if ( saveRefID < 0 ) { break; } + // if invalid RefID, break out + if ( saveRefID < 0 ) break; } // make sure that current file pointer is beyond lastOffset @@ -280,8 +278,8 @@ bool BamDefaultIndex::Build(void) { // iterate through references in index // store whether reference has data & // sort offsets in linear offset vector - BamDefaultIndexData::iterator indexIter = d->m_indexData.begin(); - BamDefaultIndexData::iterator indexEnd = d->m_indexData.end(); + BamStandardIndexData::iterator indexIter = d->m_indexData.begin(); + BamStandardIndexData::iterator indexEnd = d->m_indexData.end(); for ( int i = 0; indexIter != indexEnd; ++indexIter, ++i ) { // get reference index data @@ -300,7 +298,7 @@ bool BamDefaultIndex::Build(void) { return m_reader->Rewind(); } -bool BamDefaultIndex::GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, vector& offsets) { +bool BamStandardIndex::GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, vector& offsets) { // calculate which bins overlap this region uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2); @@ -321,6 +319,7 @@ bool BamDefaultIndex::GetOffsets(const BamRegion& region, const bool isRightBoun map::const_iterator binIter = binMap.find(binKey); if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) { + // iterate over chunks const ChunkVector& chunks = (*binIter).second; std::vector::const_iterator chunksIter = chunks.begin(); std::vector::const_iterator chunksEnd = chunks.end(); @@ -345,7 +344,7 @@ bool BamDefaultIndex::GetOffsets(const BamRegion& region, const bool isRightBoun } // saves BAM bin entry for index -void BamDefaultIndex::BamDefaultIndexPrivate::InsertBinEntry(BamBinMap& binMap, +void BamStandardIndex::BamStandardIndexPrivate::InsertBinEntry(BamBinMap& binMap, const uint32_t& saveBin, const uint64_t& saveOffset, const uint64_t& lastOffset) @@ -371,7 +370,7 @@ void BamDefaultIndex::BamDefaultIndexPrivate::InsertBinEntry(BamBinMap& bin } // saves linear offset entry for index -void BamDefaultIndex::BamDefaultIndexPrivate::InsertLinearOffset(LinearOffsetVector& offsets, +void BamStandardIndex::BamStandardIndexPrivate::InsertLinearOffset(LinearOffsetVector& offsets, const BamAlignment& bAlignment, const uint64_t& lastOffset) { @@ -392,7 +391,7 @@ void BamDefaultIndex::BamDefaultIndexPrivate::InsertLinearOffset(LinearOffsetVec } } -bool BamDefaultIndex::Load(const string& filename) { +bool BamStandardIndex::Load(const string& filename) { // open index file, abort on error FILE* indexStream = fopen(filename.c_str(), "rb"); @@ -416,9 +415,9 @@ bool BamDefaultIndex::Load(const string& filename) { // get number of reference sequences uint32_t numRefSeqs; elementsRead = fread(&numRefSeqs, 4, 1, indexStream); - if ( m_isBigEndian ) { SwapEndian_32(numRefSeqs); } + if ( m_isBigEndian ) SwapEndian_32(numRefSeqs); - // intialize space for BamDefaultIndexData data structure + // intialize space for BamStandardIndexData data structure d->m_indexData.reserve(numRefSeqs); // iterate over reference sequences @@ -427,7 +426,7 @@ bool BamDefaultIndex::Load(const string& filename) { // get number of bins for this reference sequence int32_t numBins; elementsRead = fread(&numBins, 4, 1, indexStream); - if ( m_isBigEndian ) { SwapEndian_32(numBins); } + if ( m_isBigEndian ) SwapEndian_32(numBins); if ( numBins > 0 ) { RefData& refEntry = m_references[i]; @@ -482,12 +481,13 @@ bool BamDefaultIndex::Load(const string& filename) { binMap.insert( pair(binID, regionChunks) ); } + // ----------------------------------------------------- // load linear index for this reference sequence // get number of linear offsets int32_t numLinearOffsets; elementsRead = fread(&numLinearOffsets, 4, 1, indexStream); - if ( m_isBigEndian ) { SwapEndian_32(numLinearOffsets); } + if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets); // intialize LinearOffsetVector LinearOffsetVector offsets; @@ -498,7 +498,7 @@ bool BamDefaultIndex::Load(const string& filename) { for ( int j = 0; j < numLinearOffsets; ++j ) { // read a linear offset & store elementsRead = fread(&linearOffset, 8, 1, indexStream); - if ( m_isBigEndian ) { SwapEndian_64(linearOffset); } + if ( m_isBigEndian ) SwapEndian_64(linearOffset); offsets.push_back(linearOffset); } @@ -515,11 +515,11 @@ bool BamDefaultIndex::Load(const string& filename) { } // merges 'alignment chunks' in BAM bin (used for index building) -void BamDefaultIndex::BamDefaultIndexPrivate::MergeChunks(void) { +void BamStandardIndex::BamStandardIndexPrivate::MergeChunks(void) { // iterate over reference enties - BamDefaultIndexData::iterator indexIter = m_indexData.begin(); - BamDefaultIndexData::iterator indexEnd = m_indexData.end(); + BamStandardIndexData::iterator indexIter = m_indexData.begin(); + BamStandardIndexData::iterator indexEnd = m_indexData.end(); for ( ; indexIter != indexEnd; ++indexIter ) { // get BAM bin map for this reference @@ -533,7 +533,7 @@ void BamDefaultIndex::BamDefaultIndexPrivate::MergeChunks(void) { // get chunk vector for this bin ChunkVector& binChunks = (*binIter).second; - if ( binChunks.size() == 0 ) { continue; } + if ( binChunks.size() == 0 ) continue; ChunkVector mergedChunks; mergedChunks.push_back( binChunks[0] ); @@ -573,7 +573,7 @@ void BamDefaultIndex::BamDefaultIndexPrivate::MergeChunks(void) { // writes in-memory index data out to file // N.B. - (this is the original BAM filename, method will modify it to use applicable extension) -bool BamDefaultIndex::Write(const std::string& bamFilename) { +bool BamStandardIndex::Write(const std::string& bamFilename) { string indexFilename = bamFilename + ".bai"; FILE* indexStream = fopen(indexFilename.c_str(), "wb"); @@ -587,12 +587,12 @@ bool BamDefaultIndex::Write(const std::string& bamFilename) { // write number of reference sequences int32_t numReferenceSeqs = d->m_indexData.size(); - if ( m_isBigEndian ) { SwapEndian_32(numReferenceSeqs); } + if ( m_isBigEndian ) SwapEndian_32(numReferenceSeqs); fwrite(&numReferenceSeqs, 4, 1, indexStream); // iterate over reference sequences - BamDefaultIndexData::const_iterator indexIter = d->m_indexData.begin(); - BamDefaultIndexData::const_iterator indexEnd = d->m_indexData.end(); + BamStandardIndexData::const_iterator indexIter = d->m_indexData.begin(); + BamStandardIndexData::const_iterator indexEnd = d->m_indexData.end(); for ( ; indexIter != indexEnd; ++ indexIter ) { // get reference index data @@ -602,7 +602,7 @@ bool BamDefaultIndex::Write(const std::string& bamFilename) { // write number of bins int32_t binCount = binMap.size(); - if ( m_isBigEndian ) { SwapEndian_32(binCount); } + if ( m_isBigEndian ) SwapEndian_32(binCount); fwrite(&binCount, 4, 1, indexStream); // iterate over bins @@ -615,12 +615,12 @@ bool BamDefaultIndex::Write(const std::string& bamFilename) { const ChunkVector& binChunks = (*binIter).second; // save BAM bin key - if ( m_isBigEndian ) { SwapEndian_32(binKey); } + if ( m_isBigEndian ) SwapEndian_32(binKey); fwrite(&binKey, 4, 1, indexStream); // save chunk count int32_t chunkCount = binChunks.size(); - if ( m_isBigEndian ) { SwapEndian_32(chunkCount); } + if ( m_isBigEndian ) SwapEndian_32(chunkCount); fwrite(&chunkCount, 4, 1, indexStream); // iterate over chunks @@ -646,7 +646,7 @@ bool BamDefaultIndex::Write(const std::string& bamFilename) { // write linear offsets size int32_t offsetSize = offsets.size(); - if ( m_isBigEndian ) { SwapEndian_32(offsetSize); } + if ( m_isBigEndian ) SwapEndian_32(offsetSize); fwrite(&offsetSize, 4, 1, indexStream); // iterate over linear offsets @@ -656,7 +656,7 @@ bool BamDefaultIndex::Write(const std::string& bamFilename) { // write linear offset value uint64_t linearOffset = (*offsetIter); - if ( m_isBigEndian ) { SwapEndian_64(linearOffset); } + if ( m_isBigEndian ) SwapEndian_64(linearOffset); fwrite(&linearOffset, 8, 1, indexStream); } } @@ -760,7 +760,6 @@ bool BamToolsIndex::Build(void) { // if block is full, get offset for next block, reset currentBlockCount if ( currentBlockCount == d->m_blockSize ) { - d->m_indexData.push_back( BamToolsIndexEntry(blockStartOffset, blockStartId, blockStartPosition) ); blockStartOffset = m_BGZF->Tell(); currentBlockCount = 0; @@ -797,8 +796,7 @@ bool BamToolsIndex::GetOffsets(const BamRegion& region, const bool isRightBoundS } // no index was found - if ( previousOffset == -1 ) - return false; + if ( previousOffset == -1 ) return false; // store offset & return success offsets.push_back(previousOffset); @@ -828,12 +826,12 @@ bool BamToolsIndex::Load(const string& filename) { // read in block size elementsRead = fread(&d->m_blockSize, sizeof(d->m_blockSize), 1, indexStream); - if ( m_isBigEndian ) { SwapEndian_32(d->m_blockSize); } + if ( m_isBigEndian ) SwapEndian_32(d->m_blockSize); // read in number of offsets uint32_t numOffsets; elementsRead = fread(&numOffsets, sizeof(numOffsets), 1, indexStream); - if ( m_isBigEndian ) { SwapEndian_32(numOffsets); } + if ( m_isBigEndian ) SwapEndian_32(numOffsets); // reserve space for index data d->m_indexData.reserve(numOffsets); @@ -885,12 +883,12 @@ bool BamToolsIndex::Write(const std::string& bamFilename) { // write block size int32_t blockSize = d->m_blockSize; - if ( m_isBigEndian ) { SwapEndian_32(blockSize); } + if ( m_isBigEndian ) SwapEndian_32(blockSize); fwrite(&blockSize, sizeof(blockSize), 1, indexStream); // write number of offset entries uint32_t numOffsets = d->m_indexData.size(); - if ( m_isBigEndian ) { SwapEndian_32(numOffsets); } + if ( m_isBigEndian ) SwapEndian_32(numOffsets); fwrite(&numOffsets, sizeof(numOffsets), 1, indexStream); // iterate over offset entries diff --git a/src/api/BamIndex.h b/src/api/BamIndex.h index b9ce7d0..d92fe65 100644 --- a/src/api/BamIndex.h +++ b/src/api/BamIndex.h @@ -3,16 +3,16 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 17 August 2010 (DB) +// Last modified: 3 September 2010 (DB) // --------------------------------------------------------------------------- -// Provides index functionality - both for the default (standardized) BAM -// index format (.bai) as well as a BamTools-specific (nonstandard) index -// format (.bti). +// Provides index functionality - both for the standardized BAM index format +// (".bai") as well as a BamTools-specific (nonstandard) index format (".bti"). // *************************************************************************** #ifndef BAM_INDEX_H #define BAM_INDEX_H +#include #include #include #include "BamAux.h" @@ -26,25 +26,49 @@ class BgzfData; // BamIndex base class class BamIndex { + // ctor & dtor public: - BamIndex(BamTools::BgzfData* bgzf, - BamTools::BamReader* reader, - bool isBigEndian); + BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian); virtual ~BamIndex(void) { } + // index interface public: // creates index data (in-memory) from current reader data virtual bool Build(void) =0; + // returns supported file extension + virtual const std::string Extension(void) const =0; // calculates offset(s) for a given region virtual bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector& offsets) =0; - // loads existing data from file into memory - virtual bool Load(const std::string& filename) =0; // returns whether reference has alignments or no virtual bool HasAlignments(const int& referenceID); + // loads existing data from file into memory + virtual bool Load(const std::string& filename) =0; // writes in-memory index data out to file // N.B. - (this is the original BAM filename, method will modify it to use applicable extension) virtual bool Write(const std::string& bamFilename) =0; + // factory methods for returning proper BamIndex-derived type based on available index files + public: + + // returns index based on BAM filename 'stub' + // checks first for preferred type, returns that type if found + // (if not found, attmempts to load other type(s), returns 0 if NONE found) + // + // ** default preferred type is BamToolsIndex ** use this anytime it exists + enum PreferredIndexType { BAMTOOLS = 0, STANDARD }; + static BamIndex* FromBamFilename(const std::string& bamFilename, + BamTools::BgzfData* bgzf, + BamTools::BamReader* reader, + bool isBigEndian, + const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS); + + // returns index based on explicitly named index file (or 0 if not found) + static BamIndex* FromIndexFilename(const std::string& indexFilename, + BamTools::BgzfData* bgzf, + BamTools::BamReader* reader, + bool isBigEndian); + + // data members protected: BamTools::BgzfData* m_BGZF; BamTools::BamReader* m_reader; @@ -53,23 +77,25 @@ class BamIndex { }; // -------------------------------------------------- -// BamDefaultIndex class +// BamStandardIndex class // -// implements default (per SAM/BAM spec) index file ops -class BamDefaultIndex : public BamIndex { +// implements standardized (per SAM/BAM spec) index file ops +class BamStandardIndex : public BamIndex { // ctor & dtor public: - BamDefaultIndex(BamTools::BgzfData* bgzf, + BamStandardIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader, bool isBigEndian); - ~BamDefaultIndex(void); + ~BamStandardIndex(void); // interface (implements BamIndex virtual methods) public: // creates index data (in-memory) from current reader data bool Build(void); + // returns supported file extension + const std::string Extension(void) const { return std::string(".bai"); } // calculates offset(s) for a given region bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector& offsets); // loads existing data from file into memory @@ -80,8 +106,8 @@ class BamDefaultIndex : public BamIndex { // internal implementation private: - struct BamDefaultIndexPrivate; - BamDefaultIndexPrivate* d; + struct BamStandardIndexPrivate; + BamStandardIndexPrivate* d; }; // -------------------------------------------------- @@ -101,6 +127,8 @@ class BamToolsIndex : public BamIndex { public: // creates index data (in-memory) from current reader data bool Build(void); + // returns supported file extension + const std::string Extension(void) const { return std::string(".bti"); } // calculates offset(s) for a given region bool GetOffsets(const BamTools::BamRegion& region, const bool isRightBoundSpecified, std::vector& offsets); // loads existing data from file into memory @@ -115,6 +143,65 @@ class BamToolsIndex : public BamIndex { BamToolsIndexPrivate* d; }; +// -------------------------------------------------- +// BamIndex factory methods +// +// return proper BamIndex-derived type based on available index files + +inline +BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename, + BamTools::BgzfData* bgzf, + BamTools::BamReader* reader, + bool isBigEndian, + const BamIndex::PreferredIndexType& type) +{ + // --------------------------------------------------- + // attempt to load preferred type first + + const std::string bamtoolsIndexFilename = bamFilename + ".bti"; + const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename); + if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists ) + return new BamToolsIndex(bgzf, reader, isBigEndian); + + const std::string standardIndexFilename = bamFilename + ".bai"; + const bool standardIndexExists = BamTools::FileExists(standardIndexFilename); + if ( (type == BamIndex::STANDARD) && standardIndexExists ) + return new BamStandardIndex(bgzf, reader, isBigEndian); + + // ---------------------------------------------------- + // preferred type could not be found, try other (non-preferred) types + // if none found, return 0 + + if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader, isBigEndian); + if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader, isBigEndian); + return 0; +} + +inline +BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename, + BamTools::BgzfData* bgzf, + BamTools::BamReader* reader, + bool isBigEndian) +{ + // see if specified file exists + const bool indexExists = BamTools::FileExists(indexFilename); + if ( !indexExists ) return 0; + + const std::string bamtoolsIndexExtension(".bti"); + const std::string standardIndexExtension(".bai"); + + // if has bamtoolsIndexExtension + if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) ) + return new BamToolsIndex(bgzf, reader, isBigEndian); + + // if has standardIndexExtension + if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) ) + return new BamStandardIndex(bgzf, reader, isBigEndian); + + // otherwise, unsupported file type + return 0; +} + } // namespace BamTools -#endif // BAM_INDEX_H \ No newline at end of file +#endif // BAM_INDEX_H diff --git a/src/api/BamMultiReader.cpp b/src/api/BamMultiReader.cpp index 005b0b0..8ee4080 100644 --- a/src/api/BamMultiReader.cpp +++ b/src/api/BamMultiReader.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 20 July 2010 (DB) +// Last modified: 3 September 2010 (DB) // --------------------------------------------------------------------------- // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad // Institute. @@ -18,11 +18,12 @@ // C++ includes #include +#include +#include #include +#include #include #include -#include -#include // BamTools includes #include "BGZF.h" @@ -42,42 +43,118 @@ BamMultiReader::BamMultiReader(void) // destructor BamMultiReader::~BamMultiReader(void) { - Close(); // close the bam files - // clean up reader objects - for (vector >::iterator it = readers.begin(); it != readers.end(); ++it) { - delete it->first; - delete it->second; - } + Close(); } // close the BAM files void BamMultiReader::Close(void) { + + // close all BAM readers and clean up pointers + vector >::iterator readerIter = readers.begin(); + vector >::iterator readerEnd = readers.end(); + for ( ; readerIter != readerEnd; ++readerIter) { + + BamReader* reader = (*readerIter).first; + BamAlignment* alignment = (*readerIter).second; + + // close the reader + if ( reader) reader->Close(); + + // delete reader pointer + delete reader; + reader = 0; + + // delete alignment pointer + delete alignment; + alignment = 0; + } + + // clear out the container + readers.clear(); +} + +// saves index data to BAM index files (".bai"/".bti") where necessary, returns success/fail +bool BamMultiReader::CreateIndexes(bool useDefaultIndex) { + bool result = true; for (vector >::iterator it = readers.begin(); it != readers.end(); ++it) { BamReader* reader = it->first; - reader->Close(); // close the reader + result &= reader->CreateIndex(useDefaultIndex); } + return result; } -// updates the reference id stored in the BamMultiReader -// to reflect the current state of the readers -void BamMultiReader::UpdateReferenceID(void) { - // the alignments are sorted by position, so the first alignment will always have the lowest reference ID - if (alignments.begin()->second.second->RefID != CurrentRefID) { - // get the next reference id - // while there aren't any readers at the next ref id - // increment the ref id - int nextRefID = CurrentRefID; - while (alignments.begin()->second.second->RefID != nextRefID) { - ++nextRefID; - } - //cerr << "updating reference id from " << CurrentRefID << " to " << nextRefID << endl; - CurrentRefID = nextRefID; +// for debugging +void BamMultiReader::DumpAlignmentIndex(void) { + for (AlignmentIndex::const_iterator it = alignments.begin(); it != alignments.end(); ++it) { + cerr << it->first.first << ":" << it->first.second << " " << it->second.first->GetFilename() << endl; } } -// checks if any readers still have alignments -bool BamMultiReader::HasOpenReaders() { - return alignments.size() > 0; +// makes a virtual, unified header for all the bam files in the multireader +const string BamMultiReader::GetHeaderText(void) const { + + string mergedHeader = ""; + map readGroups; + + // foreach extraction entry (each BAM file) + for (vector >::const_iterator rs = readers.begin(); rs != readers.end(); ++rs) { + + map currentFileReadGroups; + + BamReader* reader = rs->first; + + stringstream header(reader->GetHeaderText()); + vector lines; + string item; + while (getline(header, item)) + lines.push_back(item); + + for (vector::const_iterator it = lines.begin(); it != lines.end(); ++it) { + + // get next line from header, skip if empty + string headerLine = *it; + if ( headerLine.empty() ) { continue; } + + // if first file, save HD & SQ entries + if ( rs == readers.begin() ) { + if ( headerLine.find("@HD") == 0 || headerLine.find("@SQ") == 0) { + mergedHeader.append(headerLine.c_str()); + mergedHeader.append(1, '\n'); + } + } + + // (for all files) append RG entries if they are unique + if ( headerLine.find("@RG") == 0 ) { + stringstream headerLineSs(headerLine); + string part, readGroupPart, readGroup; + while(std::getline(headerLineSs, part, '\t')) { + stringstream partSs(part); + string subtag; + std::getline(partSs, subtag, ':'); + if (subtag == "ID") { + std::getline(partSs, readGroup, ':'); + break; + } + } + if (readGroups.find(readGroup) == readGroups.end()) { // prevents duplicate @RG entries + mergedHeader.append(headerLine.c_str() ); + mergedHeader.append(1, '\n'); + readGroups[readGroup] = true; + currentFileReadGroups[readGroup] = true; + } else { + // warn iff we are reading one file and discover duplicated @RG tags in the header + // otherwise, we emit no warning, as we might be merging multiple BAM files with identical @RG tags + if (currentFileReadGroups.find(readGroup) != currentFileReadGroups.end()) { + cerr << "WARNING: duplicate @RG tag " << readGroup + << " entry in header of " << reader->GetFilename() << endl; + } + } + } + } + } + + // return merged header text + return mergedHeader; } // get next alignment among all files @@ -147,6 +224,51 @@ bool BamMultiReader::GetNextAlignmentCore(BamAlignment& nextAlignment) { } +// --------------------------------------------------------------------------------------- +// +// NB: The following GetReferenceX() functions assume that we have identical +// references for all BAM files. We enforce this by invoking the above +// validation function (ValidateReaders) to verify that our reference data +// is the same across all files on Open, so we will not encounter a situation +// in which there is a mismatch and we are still live. +// +// --------------------------------------------------------------------------------------- + +// returns the number of reference sequences +const int BamMultiReader::GetReferenceCount(void) const { + return readers.front().first->GetReferenceCount(); +} + +// returns vector of reference objects +const BamTools::RefVector BamMultiReader::GetReferenceData(void) const { + return readers.front().first->GetReferenceData(); +} + +// returns refID from reference name +const int BamMultiReader::GetReferenceID(const string& refName) const { + return readers.front().first->GetReferenceID(refName); +} + +// --------------------------------------------------------------------------------------- + +// checks if any readers still have alignments +bool BamMultiReader::HasOpenReaders() { + return alignments.size() > 0; +} + +// returns whether underlying BAM readers ALL have an index loaded +// this is useful to indicate whether Jump() or SetRegion() are possible +bool BamMultiReader::IsIndexLoaded(void) const { + bool ok = true; + vector >::const_iterator readerIter = readers.begin(); + vector >::const_iterator readerEnd = readers.end(); + for ( ; readerIter != readerEnd; ++readerIter ) { + const BamReader* reader = (*readerIter).first; + if ( reader ) ok &= reader->IsIndexLoaded(); + } + return ok; +} + // jumps to specified region(refID, leftBound) in BAM files, returns success/fail bool BamMultiReader::Jump(int refID, int position) { @@ -167,77 +289,34 @@ bool BamMultiReader::Jump(int refID, int position) { return result; } -bool BamMultiReader::SetRegion(const int& leftRefID, const int& leftPosition, const int& rightRefID, const int& rightPosition) { - - BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition); - - return SetRegion(region); - -} - -bool BamMultiReader::SetRegion(const BamRegion& region) { - - Region = region; - - // NB: While it may make sense to track readers in which we can - // successfully SetRegion, In practice a failure of SetRegion means "no - // alignments here." It makes sense to simply accept the failure, - // UpdateAlignments(), and continue. - - for (vector >::iterator it = readers.begin(); it != readers.end(); ++it) { - it->first->SetRegion(region); - } - - UpdateAlignments(); - - return true; - -} - -void BamMultiReader::UpdateAlignments(void) { - // Update Alignments - alignments.clear(); - for (vector >::iterator it = readers.begin(); it != readers.end(); ++it) { - BamReader* br = it->first; - BamAlignment* ba = it->second; - if (br->GetNextAlignment(*ba)) { - alignments.insert(make_pair(make_pair(ba->RefID, ba->Position), - make_pair(br, ba))); - } else { - // assume BamReader end of region / EOF - } - } -} - // opens BAM files bool BamMultiReader::Open(const vector filenames, bool openIndexes, bool coreMode, bool useDefaultIndex) { // for filename in filenames fileNames = filenames; // save filenames in our multireader for (vector::const_iterator it = filenames.begin(); it != filenames.end(); ++it) { - string filename = *it; + + const string filename = *it; BamReader* reader = new BamReader; bool openedOK = true; if (openIndexes) { - if (useDefaultIndex) - openedOK = reader->Open(filename, filename + ".bai"); - else - openedOK = reader->Open(filename, filename + ".bti"); - } else { - openedOK = reader->Open(filename); // for merging, jumping is disallowed - } + + // leave index filename empty + // this allows BamReader & BamIndex to search for any available + // useDefaultIndex gives hint to prefer BAI over BTI + openedOK = reader->Open(filename, "", true, useDefaultIndex); + } + + // ignoring index file(s) + else openedOK = reader->Open(filename); // if file opened ok, check that it can be read if ( openedOK ) { bool fileOK = true; BamAlignment* alignment = new BamAlignment; - if (coreMode) { - fileOK &= reader->GetNextAlignmentCore(*alignment); - } else { - fileOK &= reader->GetNextAlignment(*alignment); - } + fileOK &= ( coreMode ? reader->GetNextAlignmentCore(*alignment) : reader->GetNextAlignment(*alignment) ); if (fileOK) { readers.push_back(make_pair(reader, alignment)); // store pointers to our readers for cleanup @@ -248,10 +327,9 @@ bool BamMultiReader::Open(const vector filenames, bool openIndexes, bool // if only file available & could not be read, return failure if ( filenames.size() == 1 ) return false; } - } - // TODO; any more error handling on openedOK ?? + // TODO; any further error handling when openedOK is false ?? else return false; } @@ -269,13 +347,6 @@ void BamMultiReader::PrintFilenames(void) { } } -// for debugging -void BamMultiReader::DumpAlignmentIndex(void) { - for (AlignmentIndex::const_iterator it = alignments.begin(); it != alignments.end(); ++it) { - cerr << it->first.first << ":" << it->first.second << " " << it->second.first->GetFilename() << endl; - } -} - // returns BAM file pointers to beginning of alignment data bool BamMultiReader::Rewind(void) { bool result = true; @@ -286,81 +357,58 @@ bool BamMultiReader::Rewind(void) { return result; } -// saves index data to BAM index files (".bai"/".bti") where necessary, returns success/fail -bool BamMultiReader::CreateIndexes(bool useDefaultIndex) { - bool result = true; - for (vector >::iterator it = readers.begin(); it != readers.end(); ++it) { - BamReader* reader = it->first; - result &= reader->CreateIndex(useDefaultIndex); - } - return result; +bool BamMultiReader::SetRegion(const int& leftRefID, const int& leftPosition, const int& rightRefID, const int& rightPosition) { + BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition); + return SetRegion(region); } -// makes a virtual, unified header for all the bam files in the multireader -const string BamMultiReader::GetHeaderText(void) const { - - string mergedHeader = ""; - map readGroups; - - // foreach extraction entry (each BAM file) - for (vector >::const_iterator rs = readers.begin(); rs != readers.end(); ++rs) { - - map currentFileReadGroups; - - BamReader* reader = rs->first; +bool BamMultiReader::SetRegion(const BamRegion& region) { - stringstream header(reader->GetHeaderText()); - vector lines; - string item; - while (getline(header, item)) - lines.push_back(item); + Region = region; - for (vector::const_iterator it = lines.begin(); it != lines.end(); ++it) { + // NB: While it may make sense to track readers in which we can + // successfully SetRegion, In practice a failure of SetRegion means "no + // alignments here." It makes sense to simply accept the failure, + // UpdateAlignments(), and continue. - // get next line from header, skip if empty - string headerLine = *it; - if ( headerLine.empty() ) { continue; } + for (vector >::iterator it = readers.begin(); it != readers.end(); ++it) { + it->first->SetRegion(region); + } - // if first file, save HD & SQ entries - if ( rs == readers.begin() ) { - if ( headerLine.find("@HD") == 0 || headerLine.find("@SQ") == 0) { - mergedHeader.append(headerLine.c_str()); - mergedHeader.append(1, '\n'); - } - } + UpdateAlignments(); + return true; +} - // (for all files) append RG entries if they are unique - if ( headerLine.find("@RG") == 0 ) { - stringstream headerLineSs(headerLine); - string part, readGroupPart, readGroup; - while(std::getline(headerLineSs, part, '\t')) { - stringstream partSs(part); - string subtag; - std::getline(partSs, subtag, ':'); - if (subtag == "ID") { - std::getline(partSs, readGroup, ':'); - break; - } - } - if (readGroups.find(readGroup) == readGroups.end()) { // prevents duplicate @RG entries - mergedHeader.append(headerLine.c_str() ); - mergedHeader.append(1, '\n'); - readGroups[readGroup] = true; - currentFileReadGroups[readGroup] = true; - } else { - // warn iff we are reading one file and discover duplicated @RG tags in the header - // otherwise, we emit no warning, as we might be merging multiple BAM files with identical @RG tags - if (currentFileReadGroups.find(readGroup) != currentFileReadGroups.end()) { - cerr << "WARNING: duplicate @RG tag " << readGroup - << " entry in header of " << reader->GetFilename() << endl; - } - } - } +void BamMultiReader::UpdateAlignments(void) { + // Update Alignments + alignments.clear(); + for (vector >::iterator it = readers.begin(); it != readers.end(); ++it) { + BamReader* br = it->first; + BamAlignment* ba = it->second; + if (br->GetNextAlignment(*ba)) { + alignments.insert(make_pair(make_pair(ba->RefID, ba->Position), + make_pair(br, ba))); + } else { + // assume BamReader end of region / EOF } } +} - // return merged header text - return mergedHeader; +// updates the reference id stored in the BamMultiReader +// to reflect the current state of the readers +void BamMultiReader::UpdateReferenceID(void) { + // the alignments are sorted by position, so the first alignment will always have the lowest reference ID + if (alignments.begin()->second.second->RefID != CurrentRefID) { + // get the next reference id + // while there aren't any readers at the next ref id + // increment the ref id + int nextRefID = CurrentRefID; + while (alignments.begin()->second.second->RefID != nextRefID) { + ++nextRefID; + } + //cerr << "updating reference id from " << CurrentRefID << " to " << nextRefID << endl; + CurrentRefID = nextRefID; + } } // ValidateReaders checks that all the readers point to BAM files representing @@ -398,23 +446,3 @@ void BamMultiReader::ValidateReaders(void) const { } } } - -// NB: The following functions assume that we have identical references for all -// BAM files. We enforce this by invoking the above validation function -// (ValidateReaders) to verify that our reference data is the same across all -// files on Open, so we will not encounter a situation in which there is a -// mismatch and we are still live. - -// returns the number of reference sequences -const int BamMultiReader::GetReferenceCount(void) const { - return readers.front().first->GetReferenceCount(); -} - -// returns vector of reference objects -const BamTools::RefVector BamMultiReader::GetReferenceData(void) const { - return readers.front().first->GetReferenceData(); -} - -const int BamMultiReader::GetReferenceID(const string& refName) const { - return readers.front().first->GetReferenceID(refName); -} diff --git a/src/api/BamMultiReader.h b/src/api/BamMultiReader.h index bd36d71..cc30326 100644 --- a/src/api/BamMultiReader.h +++ b/src/api/BamMultiReader.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 20 July 2010 (DB) +// Last modified: 3 September 2010 (DB) // --------------------------------------------------------------------------- // Functionality for simultaneously reading multiple BAM files // *************************************************************************** @@ -59,8 +59,14 @@ class BamMultiReader { // indexes. // @coreMode - setup our first alignments using GetNextAlignmentCore(); // also useful for merging - bool Open(const vector filenames, bool openIndexes = true, bool coreMode = false, bool useDefaultIndex = true); - + // @preferStandardIndex - look for standard BAM index ".bai" first. If false, + // will look for BamTools index ".bti". + bool Open(const vector filenames, bool openIndexes = true, bool coreMode = false, bool preferStandardIndex = true); + + // returns whether underlying BAM readers ALL have an index loaded + // this is useful to indicate whether Jump() or SetRegion() are possible + bool IsIndexLoaded(void) const; + // performs random-access jump to reference, position bool Jump(int refID, int position = 0); diff --git a/src/api/BamReader.cpp b/src/api/BamReader.cpp index bb70f1f..93a991b 100644 --- a/src/api/BamReader.cpp +++ b/src/api/BamReader.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 15 July 2010 (DB) +// Last modified: 7 September 2010 (DB) // --------------------------------------------------------------------------- // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad // Institute. @@ -81,8 +81,11 @@ struct BamReader::BamReaderPrivate { // file operations void Close(void); - bool Jump(int refID, int position = 0); - bool Open(const string& filename, const string& indexFilename = ""); + bool Jump(int refID, int position); + bool Open(const std::string& filename, + const std::string& indexFilename, + const bool lookForIndex, + const bool preferStandardIndex); bool Rewind(void); bool SetRegion(const BamRegion& region); @@ -94,7 +97,7 @@ struct BamReader::BamReaderPrivate { int GetReferenceID(const string& refName) const; // index operations - bool CreateIndex(bool useDefaultIndex); + bool CreateIndex(bool useStandardIndex); // ------------------------------- // internal methods @@ -118,7 +121,7 @@ struct BamReader::BamReaderPrivate { // clear out inernal index data structure void ClearIndex(void); // loads index from BAM index file - bool LoadIndex(void); + bool LoadIndex(const bool lookForIndex, const bool preferStandardIndex); }; // ----------------------------------------------------- @@ -137,15 +140,23 @@ BamReader::~BamReader(void) { // file operations void BamReader::Close(void) { d->Close(); } +bool BamReader::IsIndexLoaded(void) const { return d->IsIndexLoaded; } bool BamReader::IsOpen(void) const { return d->mBGZF.IsOpen; } -bool BamReader::Jump(int refID, int position) { +bool BamReader::Jump(int refID, int position) +{ d->Region.LeftRefID = refID; d->Region.LeftPosition = position; d->IsLeftBoundSpecified = true; d->IsRightBoundSpecified = false; return d->Jump(refID, position); } -bool BamReader::Open(const string& filename, const string& indexFilename) { return d->Open(filename, indexFilename); } +bool BamReader::Open(const std::string& filename, + const std::string& indexFilename, + const bool lookForIndex, + const bool preferStandardIndex) +{ + return d->Open(filename, indexFilename, lookForIndex, preferStandardIndex); +} bool BamReader::Rewind(void) { return d->Rewind(); } bool BamReader::SetRegion(const BamRegion& region) { return d->SetRegion(region); } bool BamReader::SetRegion(const int& leftRefID, const int& leftBound, const int& rightRefID, const int& rightBound) { @@ -164,7 +175,7 @@ int BamReader::GetReferenceID(const string& refName) const { return d->GetRefere const std::string BamReader::GetFilename(void) const { return d->Filename; } // index operations -bool BamReader::CreateIndex(bool useDefaultIndex) { return d->CreateIndex(useDefaultIndex); } +bool BamReader::CreateIndex(bool useStandardIndex) { return d->CreateIndex(useStandardIndex); } // ----------------------------------------------------- // BamReaderPrivate implementation @@ -196,7 +207,6 @@ bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment) { // calculate character lengths/offsets const unsigned int dataLength = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE; - const unsigned int cigarDataOffset = bAlignment.SupportData.QueryNameLength; const unsigned int seqDataOffset = bAlignment.SupportData.QueryNameLength + (bAlignment.SupportData.NumCigarOperations * 4); const unsigned int qualDataOffset = seqDataOffset + (bAlignment.SupportData.QuerySequenceLength+1)/2; const unsigned int tagDataOffset = qualDataOffset + bAlignment.SupportData.QuerySequenceLength; @@ -204,32 +214,13 @@ bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment) { // set up char buffers const char* allCharData = bAlignment.SupportData.AllCharData.data(); - uint32_t* cigarData = (uint32_t*)(allCharData + cigarDataOffset); const char* seqData = ((const char*)allCharData) + seqDataOffset; const char* qualData = ((const char*)allCharData) + qualDataOffset; char* tagData = ((char*)allCharData) + tagDataOffset; - // store alignment name (depends on null char as terminator) + // store alignment name (relies on null char in name as terminator) bAlignment.Name.assign((const char*)(allCharData)); - - // save CigarOps - CigarOp op; - bAlignment.CigarData.clear(); - bAlignment.CigarData.reserve(bAlignment.SupportData.NumCigarOperations); - for (unsigned int i = 0; i < bAlignment.SupportData.NumCigarOperations; ++i) { - - // swap if necessary - if ( IsBigEndian ) { SwapEndian_32(cigarData[i]); } - - // build CigarOp structure - op.Length = (cigarData[i] >> BAM_CIGAR_SHIFT); - op.Type = CIGAR_LOOKUP[ (cigarData[i] & BAM_CIGAR_MASK) ]; - // save CigarOp - bAlignment.CigarData.push_back(op); - } - - // save query sequence bAlignment.QueryBases.clear(); bAlignment.QueryBases.reserve(bAlignment.SupportData.QuerySequenceLength); @@ -361,6 +352,7 @@ bool BamReader::BamReaderPrivate::BuildCharData(BamAlignment& bAlignment) { void BamReader::BamReaderPrivate::ClearIndex(void) { delete NewIndex; NewIndex = 0; + IsIndexLoaded = false; } // closes the BAM file @@ -381,24 +373,30 @@ void BamReader::BamReaderPrivate::Close(void) { IsRegionSpecified = false; } -// create BAM index from BAM file (keep structure in memory) and write to default index output file -bool BamReader::BamReaderPrivate::CreateIndex(bool useDefaultIndex) { +// creates index for BAM file, saves to file +// default behavior is to create the BAM standard index (".bai") +// set flag to false to create the BamTools-specific index (".bti") +bool BamReader::BamReaderPrivate::CreateIndex(bool useStandardIndex) { // clear out prior index data ClearIndex(); - // create default index - if ( useDefaultIndex ) - NewIndex = new BamDefaultIndex(&mBGZF, Parent, IsBigEndian); + // create index based on type requested + if ( useStandardIndex ) + NewIndex = new BamStandardIndex(&mBGZF, Parent, IsBigEndian); // create BamTools 'custom' index else NewIndex = new BamToolsIndex(&mBGZF, Parent, IsBigEndian); + // build new index bool ok = true; ok &= NewIndex->Build(); + IsIndexLoaded = ok; + + // attempt to save index data to file ok &= NewIndex->Write(Filename); - // return success/fail + // return success/fail of both building & writing index return ok; } @@ -433,16 +431,14 @@ bool BamReader::BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment) BamReader::BamReaderPrivate::RegionState state = IsOverlap(bAlignment); // if alignment lies after region, return false - if ( state == AFTER_REGION ) - return false; + if ( state == AFTER_REGION ) return false; while ( state != WITHIN_REGION ) { // if no valid alignment available (likely EOF) return failure if ( !LoadNextAlignment(bAlignment) ) return false; // if alignment lies after region, return false (no available read within region) state = IsOverlap(bAlignment); - if ( state == AFTER_REGION) return false; - + if ( state == AFTER_REGION ) return false; } // return success (alignment found that overlaps region) @@ -461,9 +457,8 @@ int BamReader::BamReaderPrivate::GetReferenceID(const string& refName) const { vector refNames; RefVector::const_iterator refIter = References.begin(); RefVector::const_iterator refEnd = References.end(); - for ( ; refIter != refEnd; ++refIter) { + for ( ; refIter != refEnd; ++refIter) refNames.push_back( (*refIter).RefName ); - } // return 'index-of' refName ( if not found, returns refNames.size() ) return distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName)); @@ -516,7 +511,7 @@ bool BamReader::BamReaderPrivate::Jump(int refID, int position) { // ----------------------------------------------------------------------- // check for existing index - if ( NewIndex == 0 ) return false; + if ( !IsIndexLoaded || NewIndex == 0 ) return false; // see if reference has alignments if ( !NewIndex->HasAlignments(refID) ) return false; // make sure position is valid @@ -567,7 +562,7 @@ void BamReader::BamReaderPrivate::LoadHeaderData(void) { // get BAM header text length mBGZF.Read(buffer, 4); unsigned int headerTextLength = BgzfData::UnpackUnsignedInt(buffer); - if ( IsBigEndian ) { SwapEndian_32(headerTextLength); } + if ( IsBigEndian ) SwapEndian_32(headerTextLength); // get BAM header text char* headerText = (char*)calloc(headerTextLength + 1, 1); @@ -578,36 +573,38 @@ void BamReader::BamReaderPrivate::LoadHeaderData(void) { free(headerText); } -// load existing index data from BAM index file (".bai"), return success/fail -bool BamReader::BamReaderPrivate::LoadIndex(void) { +// load existing index data from BAM index file (".bti" OR ".bai"), return success/fail +bool BamReader::BamReaderPrivate::LoadIndex(const bool lookForIndex, const bool preferStandardIndex) { // clear out any existing index data ClearIndex(); - // skip if index file empty - if ( IndexFilename.empty() ) - return false; - - // check supplied filename for index type - size_t defaultExtensionFound = IndexFilename.find(".bai"); - size_t customExtensionFound = IndexFilename.find(".bti"); - - // if SAM/BAM default (".bai") - if ( defaultExtensionFound != string::npos ) - NewIndex = new BamDefaultIndex(&mBGZF, Parent, IsBigEndian); - - // if BamTools custom index (".bti") - else if ( customExtensionFound != string::npos ) - NewIndex = new BamToolsIndex(&mBGZF, Parent, IsBigEndian); + // if no index filename provided, so we need to look for available index files + if ( IndexFilename.empty() ) { + + // attempt to load BamIndex based on current Filename provided & preferStandardIndex flag + const BamIndex::PreferredIndexType type = (preferStandardIndex ? BamIndex::STANDARD : BamIndex::BAMTOOLS); + NewIndex = BamIndex::FromBamFilename(Filename, &mBGZF, Parent, IsBigEndian, type); + + // if null, return failure + if ( NewIndex == 0 ) return false; + + // generate proper IndexFilename based on type of index created + IndexFilename = Filename + NewIndex->Extension(); + } - // else unknown else { - fprintf(stderr, "ERROR: Unknown index file extension.\n"); - return false; + // attempt to load BamIndex based on IndexFilename provided by client + NewIndex = BamIndex::FromIndexFilename(IndexFilename, &mBGZF, Parent, IsBigEndian); + + // if null, return failure + if ( NewIndex == 0 ) return false; } - // return success of loading index data - return NewIndex->Load(IndexFilename); + // an index file was found + // return success of loading the index data from file + IsIndexLoaded = NewIndex->Load(IndexFilename); + return IsIndexLoaded; } // populates BamAlignment with alignment data under file pointer, returns success/fail @@ -618,16 +615,15 @@ bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment) { mBGZF.Read(buffer, 4); bAlignment.SupportData.BlockLength = BgzfData::UnpackUnsignedInt(buffer); if ( IsBigEndian ) { SwapEndian_32(bAlignment.SupportData.BlockLength); } - if ( bAlignment.SupportData.BlockLength == 0 ) { return false; } + if ( bAlignment.SupportData.BlockLength == 0 ) return false; // read in core alignment data, make sure the right size of data was read char x[BAM_CORE_SIZE]; - if ( mBGZF.Read(x, BAM_CORE_SIZE) != BAM_CORE_SIZE ) { return false; } + if ( mBGZF.Read(x, BAM_CORE_SIZE) != BAM_CORE_SIZE ) return false; if ( IsBigEndian ) { - for ( int i = 0; i < BAM_CORE_SIZE; i+=sizeof(uint32_t) ) { - SwapEndian_32p(&x[i]); - } + for ( int i = 0; i < BAM_CORE_SIZE; i+=sizeof(uint32_t) ) + SwapEndian_32p(&x[i]); } // set BamAlignment 'core' and 'support' data @@ -663,6 +659,27 @@ bool BamReader::BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment) { // set success flag readCharDataOK = true; + + // save CIGAR ops + // need to calculate this here so that BamAlignment::GetEndPosition() performs correctly, + // even when BamReader::GetNextAlignmentCore() is called + const unsigned int cigarDataOffset = bAlignment.SupportData.QueryNameLength; + uint32_t* cigarData = (uint32_t*)(allCharData + cigarDataOffset); + CigarOp op; + bAlignment.CigarData.clear(); + bAlignment.CigarData.reserve(bAlignment.SupportData.NumCigarOperations); + for (unsigned int i = 0; i < bAlignment.SupportData.NumCigarOperations; ++i) { + + // swap if necessary + if ( IsBigEndian ) SwapEndian_32(cigarData[i]); + + // build CigarOp structure + op.Length = (cigarData[i] >> BAM_CIGAR_SHIFT); + op.Type = CIGAR_LOOKUP[ (cigarData[i] & BAM_CIGAR_MASK) ]; + + // save CigarOp + bAlignment.CigarData.push_back(op); + } } free(allCharData); @@ -676,8 +693,8 @@ void BamReader::BamReaderPrivate::LoadReferenceData(void) { char buffer[4]; mBGZF.Read(buffer, 4); unsigned int numberRefSeqs = BgzfData::UnpackUnsignedInt(buffer); - if ( IsBigEndian ) { SwapEndian_32(numberRefSeqs); } - if (numberRefSeqs == 0) { return; } + if ( IsBigEndian ) SwapEndian_32(numberRefSeqs); + if ( numberRefSeqs == 0 ) return; References.reserve((int)numberRefSeqs); // iterate over all references in header @@ -686,14 +703,14 @@ void BamReader::BamReaderPrivate::LoadReferenceData(void) { // get length of reference name mBGZF.Read(buffer, 4); unsigned int refNameLength = BgzfData::UnpackUnsignedInt(buffer); - if ( IsBigEndian ) { SwapEndian_32(refNameLength); } + if ( IsBigEndian ) SwapEndian_32(refNameLength); char* refName = (char*)calloc(refNameLength, 1); // get reference name and reference sequence length mBGZF.Read(refName, refNameLength); mBGZF.Read(buffer, 4); int refLength = BgzfData::UnpackSignedInt(buffer); - if ( IsBigEndian ) { SwapEndian_32(refLength); } + if ( IsBigEndian ) SwapEndian_32(refLength); // store data for reference RefData aReference; @@ -707,14 +724,14 @@ void BamReader::BamReaderPrivate::LoadReferenceData(void) { } // opens BAM file (and index) -bool BamReader::BamReaderPrivate::Open(const string& filename, const string& indexFilename) { +bool BamReader::BamReaderPrivate::Open(const string& filename, const string& indexFilename, const bool lookForIndex, const bool preferStandardIndex) { + // store filenames Filename = filename; IndexFilename = indexFilename; // open the BGZF file for reading, return false on failure - if ( !mBGZF.Open(filename, "rb") ) - return false; + if ( !mBGZF.Open(filename, "rb") ) return false; // retrieve header text & reference data LoadHeaderData(); @@ -723,12 +740,20 @@ bool BamReader::BamReaderPrivate::Open(const string& filename, const string& ind // store file offset of first alignment AlignmentsBeginOffset = mBGZF.Tell(); - // open index file & load index data (if exists) - if ( !IndexFilename.empty() ) - LoadIndex(); + // if no index filename provided + if ( IndexFilename.empty() ) { + + // client did not specify that index SHOULD be found + // useful for cases where sequential access is all that is required + if ( !lookForIndex ) return true; + + // otherwise, look for index file, return success/fail + return LoadIndex(lookForIndex, preferStandardIndex) ; + } - // return success - return true; + // client supplied an index filename + // attempt to load index data, return success/fail + return LoadIndex(lookForIndex, preferStandardIndex); } // returns BAM file pointer to beginning of alignment data @@ -763,10 +788,8 @@ bool BamReader::BamReaderPrivate::SetRegion(const BamRegion& region) { Region = region; // set flags - if ( region.LeftRefID >= 0 && region.LeftPosition >= 0 ) - IsLeftBoundSpecified = true; - if ( region.RightRefID >= 0 && region.RightPosition >= 0 ) - IsRightBoundSpecified = true; + if ( region.LeftRefID >= 0 && region.LeftPosition >= 0 ) IsLeftBoundSpecified = true; + if ( region.RightRefID >= 0 && region.RightPosition >= 0 ) IsRightBoundSpecified = true; // attempt jump to beginning of region, return success/fail of Jump() return Jump( Region.LeftRefID, Region.LeftPosition ); diff --git a/src/api/BamReader.h b/src/api/BamReader.h index c93987b..6e863b6 100644 --- a/src/api/BamReader.h +++ b/src/api/BamReader.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 9 July 2010 (DB) +// Last modified: 3 September 2010 (DB) // --------------------------------------------------------------------------- // Uses BGZF routines were adapted from the bgzf.c code developed at the Broad // Institute. @@ -38,12 +38,19 @@ class BamReader { // close BAM file void Close(void); + // returns whether index data is loaded (i.e. reader is able to Jump() or not) + bool IsIndexLoaded(void) const; // returns whether reader is open for reading or not bool IsOpen(void) const; // performs random-access jump to reference, position bool Jump(int refID, int position = 0); // opens BAM file (and optional BAM index file, if provided) - bool Open(const std::string& filename, const std::string& indexFilename = ""); + // @lookForIndex - if no indexFilename provided, look for an existing index file + // @preferStandardIndex - if true, give priority in index file searching to standard BAM index + bool Open(const std::string& filename, + const std::string& indexFilename = "", + const bool lookForIndex = false, + const bool preferStandardIndex = false); // returns file pointer to beginning of alignments bool Rewind(void); // sets a region of interest (with left & right bound reference/position) @@ -84,8 +91,10 @@ class BamReader { // BAM index operations // ---------------------- - // creates index for BAM file, saves to file (default = bamFilename + ".bai") - bool CreateIndex(bool useDefaultIndex = true); + // creates index for BAM file, saves to file + // default behavior is to create the BAM standard index (".bai") + // set flag to false to create the BamTools-specific index (".bti") + bool CreateIndex(bool useStandardIndex = true); // private implementation private: diff --git a/src/third_party/Makefile b/src/third_party/Makefile new file mode 100644 index 0000000..8f475a2 --- /dev/null +++ b/src/third_party/Makefile @@ -0,0 +1,28 @@ +# ========================== +# BamTools Makefile +# (c) 2010 Derek Barnett +# +# src/third_party +# ========================== + +OBJ_DIR = ../../obj +BIN_DIR = ../../bin +SRC_DIR = + +# define source subdirectories +SUBDIRS = jsoncpp + +all: + @for dir in $(SUBDIRS); do \ + echo "- Building in $$dir"; \ + $(MAKE) --no-print-directory -C $$dir; \ + echo ""; \ + done + +.PHONY: all + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean diff --git a/src/third_party/jsoncpp/LICENSE b/src/third_party/jsoncpp/LICENSE new file mode 100644 index 0000000..ca2bfe1 --- /dev/null +++ b/src/third_party/jsoncpp/LICENSE @@ -0,0 +1,55 @@ +The JsonCpp library's source code, including accompanying documentation, +tests and demonstration applications, are licensed under the following +conditions... + +The author (Baptiste Lepilleur) explicitly disclaims copyright in all +jurisdictions which recognize such a disclaimer. In such jurisdictions, +this software is released into the Public Domain. + +In jurisdictions which do not recognize Public Domain property (e.g. Germany as of +2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur, and is +released under the terms of the MIT License (see below). + +In jurisdictions which recognize Public Domain property, the user of this +software may choose to accept it either as 1) Public Domain, 2) under the +conditions of the MIT License (see below), or 3) under the terms of dual +Public Domain/MIT License conditions described here, as they choose. + +The MIT License is about as close to Public Domain as a license can get, and is +described in clear, concise terms at: + + http://en.wikipedia.org/wiki/MIT_License + +The full text of the MIT License follows: + +======================================================================== +Copyright (c) 2007-2010 Baptiste Lepilleur + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, copy, +modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +======================================================================== +(END LICENSE TEXT) + +The MIT license is compatible with both the GPL and commercial +software, affording one all of the rights of Public Domain with the +minor nuisance of being required to keep the above copyright notice +and license text in the source code. Note also that by accepting the +Public Domain "license" you can re-license your copy using whatever +license you like. diff --git a/src/third_party/jsoncpp/Makefile b/src/third_party/jsoncpp/Makefile new file mode 100644 index 0000000..fa25563 --- /dev/null +++ b/src/third_party/jsoncpp/Makefile @@ -0,0 +1,32 @@ +# ========================== +# BamTools Makefile +# (c) 2010 Derek Barnett +# +# src/third_party/jsoncpp +# ========================== + +OBJ_DIR = ../../../obj +BIN_DIR = ../../../bin + +# ---------------------------------- +# define our source and object files +# ---------------------------------- +SOURCES = json_reader.cpp \ + json_value.cpp \ + json_writer.cpp +OBJECTS= $(SOURCES:.cpp=.o) +BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS)) + +all: $(BUILT_OBJECTS) + +$(BUILT_OBJECTS): $(SOURCES) + @echo " * compiling" $(*F).cpp + @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES) + +.PHONY: all + +clean: + @echo "Cleaning up." + @rm -f $(OBJ_DIR)/* $(BIN_DIR)/* + +.PHONY: clean diff --git a/src/third_party/jsoncpp/json.h b/src/third_party/jsoncpp/json.h new file mode 100644 index 0000000..cac0a9b --- /dev/null +++ b/src/third_party/jsoncpp/json.h @@ -0,0 +1,15 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef JSON_JSON_H_INCLUDED +# define JSON_JSON_H_INCLUDED + +//# include "autolink.h" +# include "json_value.h" +# include "json_reader.h" +# include "json_writer.h" +# include "json_features.h" + +#endif // JSON_JSON_H_INCLUDED diff --git a/src/third_party/jsoncpp/json_batchallocator.h b/src/third_party/jsoncpp/json_batchallocator.h new file mode 100644 index 0000000..173e2ed --- /dev/null +++ b/src/third_party/jsoncpp/json_batchallocator.h @@ -0,0 +1,130 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef JSONCPP_BATCHALLOCATOR_H_INCLUDED +# define JSONCPP_BATCHALLOCATOR_H_INCLUDED + +# include +# include + +# ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + +namespace Json { + +/* Fast memory allocator. + * + * This memory allocator allocates memory for a batch of object (specified by + * the page size, the number of object in each page). + * + * It does not allow the destruction of a single object. All the allocated objects + * can be destroyed at once. The memory can be either released or reused for future + * allocation. + * + * The in-place new operator must be used to construct the object using the pointer + * returned by allocate. + */ +template +class BatchAllocator +{ +public: + typedef AllocatedType Type; + + BatchAllocator( unsigned int objectsPerPage = 255 ) + : freeHead_( 0 ) + , objectsPerPage_( objectsPerPage ) + { +// printf( "Size: %d => %s\n", sizeof(AllocatedType), typeid(AllocatedType).name() ); + assert( sizeof(AllocatedType) * objectPerAllocation >= sizeof(AllocatedType *) ); // We must be able to store a slist in the object free space. + assert( objectsPerPage >= 16 ); + batches_ = allocateBatch( 0 ); // allocated a dummy page + currentBatch_ = batches_; + } + + ~BatchAllocator() + { + for ( BatchInfo *batch = batches_; batch; ) + { + BatchInfo *nextBatch = batch->next_; + free( batch ); + batch = nextBatch; + } + } + + /// allocate space for an array of objectPerAllocation object. + /// @warning it is the responsability of the caller to call objects constructors. + AllocatedType *allocate() + { + if ( freeHead_ ) // returns node from free list. + { + AllocatedType *object = freeHead_; + freeHead_ = *(AllocatedType **)object; + return object; + } + if ( currentBatch_->used_ == currentBatch_->end_ ) + { + currentBatch_ = currentBatch_->next_; + while ( currentBatch_ && currentBatch_->used_ == currentBatch_->end_ ) + currentBatch_ = currentBatch_->next_; + + if ( !currentBatch_ ) // no free batch found, allocate a new one + { + currentBatch_ = allocateBatch( objectsPerPage_ ); + currentBatch_->next_ = batches_; // insert at the head of the list + batches_ = currentBatch_; + } + } + AllocatedType *allocated = currentBatch_->used_; + currentBatch_->used_ += objectPerAllocation; + return allocated; + } + + /// Release the object. + /// @warning it is the responsability of the caller to actually destruct the object. + void release( AllocatedType *object ) + { + assert( object != 0 ); + *(AllocatedType **)object = freeHead_; + freeHead_ = object; + } + +private: + struct BatchInfo + { + BatchInfo *next_; + AllocatedType *used_; + AllocatedType *end_; + AllocatedType buffer_[objectPerAllocation]; + }; + + // disabled copy constructor and assignement operator. + BatchAllocator( const BatchAllocator & ); + void operator =( const BatchAllocator &); + + static BatchInfo *allocateBatch( unsigned int objectsPerPage ) + { + const unsigned int mallocSize = sizeof(BatchInfo) - sizeof(AllocatedType)* objectPerAllocation + + sizeof(AllocatedType) * objectPerAllocation * objectsPerPage; + BatchInfo *batch = static_cast( malloc( mallocSize ) ); + batch->next_ = 0; + batch->used_ = batch->buffer_; + batch->end_ = batch->buffer_ + objectsPerPage; + return batch; + } + + BatchInfo *batches_; + BatchInfo *currentBatch_; + /// Head of a single linked list within the allocated space of freeed object + AllocatedType *freeHead_; + unsigned int objectsPerPage_; +}; + + +} // namespace Json + +# endif // ifndef JSONCPP_DOC_INCLUDE_IMPLEMENTATION + +#endif // JSONCPP_BATCHALLOCATOR_H_INCLUDED + diff --git a/src/third_party/jsoncpp/json_config.h b/src/third_party/jsoncpp/json_config.h new file mode 100644 index 0000000..3fe08f2 --- /dev/null +++ b/src/third_party/jsoncpp/json_config.h @@ -0,0 +1,76 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef JSON_CONFIG_H_INCLUDED +# define JSON_CONFIG_H_INCLUDED + +/// If defined, indicates that json library is embedded in CppTL library. +//# define JSON_IN_CPPTL 1 + +/// If defined, indicates that json may leverage CppTL library +//# define JSON_USE_CPPTL 1 +/// If defined, indicates that cpptl vector based map should be used instead of std::map +/// as Value container. +//# define JSON_USE_CPPTL_SMALLMAP 1 +/// If defined, indicates that Json specific container should be used +/// (hash table & simple deque container with customizable allocator). +/// THIS FEATURE IS STILL EXPERIMENTAL! +//# define JSON_VALUE_USE_INTERNAL_MAP 1 +/// Force usage of standard new/malloc based allocator instead of memory pool based allocator. +/// The memory pools allocator used optimization (initializing Value and ValueInternalLink +/// as if it was a POD) that may cause some validation tool to report errors. +/// Only has effects if JSON_VALUE_USE_INTERNAL_MAP is defined. +//# define JSON_USE_SIMPLE_INTERNAL_ALLOCATOR 1 + +/// If defined, indicates that Json use exception to report invalid type manipulation +/// instead of C assert macro. +# define JSON_USE_EXCEPTION 1 + +# ifdef JSON_IN_CPPTL +# include +# ifndef JSON_USE_CPPTL +# define JSON_USE_CPPTL 1 +# endif +# endif + +# ifdef JSON_IN_CPPTL +# define JSON_API CPPTL_API +# elif defined(JSON_DLL_BUILD) +# define JSON_API __declspec(dllexport) +# elif defined(JSON_DLL) +# define JSON_API __declspec(dllimport) +# else +# define JSON_API +# endif + +// If JSON_NO_INT64 is defined, then Json only support C++ "int" type for integer +// Storages. +// #define JSON_NO_INT64 1 + +#if defined(_MSC_VER) && _MSC_VER <= 1200 // MSVC 6 +// Microsoft Visual Studio 6 only support conversion from __int64 to double +// (no conversion from unsigned __int64). +#define JSON_USE_INT64_DOUBLE_CONVERSION 1 +#endif // if defined(_MSC_VER) && _MSC_VER < 1200 // MSVC 6 + + +namespace Json { +# if defined(JSON_NO_INT64) + typedef int Int; + typedef unsigned int UInt; +# else // if defined(JSON_NO_INT64) + // For Microsoft Visual use specific types as long long is not supported +# if defined(_MSC_VER) // Microsoft Visual Studio + typedef __int64 Int; + typedef unsigned __int64 UInt; +# else // if defined(_MSC_VER) // Other platforms, use long long + typedef long long int Int; + typedef unsigned long long int UInt; +# endif // if defined(_MSC_VER) +# endif // if defined(JSON_NO_INT64) +} // end namespace Json + + +#endif // JSON_CONFIG_H_INCLUDED diff --git a/src/third_party/jsoncpp/json_features.h b/src/third_party/jsoncpp/json_features.h new file mode 100644 index 0000000..05521dc --- /dev/null +++ b/src/third_party/jsoncpp/json_features.h @@ -0,0 +1,47 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef CPPTL_JSON_FEATURES_H_INCLUDED +# define CPPTL_JSON_FEATURES_H_INCLUDED + +# include "json_forwards.h" + +namespace Json { + + /** \brief Configuration passed to reader and writer. + * This configuration object can be used to force the Reader or Writer + * to behave in a standard conforming way. + */ + class JSON_API Features + { + public: + /** \brief A configuration that allows all features and assumes all strings are UTF-8. + * - C & C++ comments are allowed + * - Root object can be any JSON value + * - Assumes Value strings are encoded in UTF-8 + */ + static Features all(); + + /** \brief A configuration that is strictly compatible with the JSON specification. + * - Comments are forbidden. + * - Root object must be either an array or an object value. + * - Assumes Value strings are encoded in UTF-8 + */ + static Features strictMode(); + + /** \brief Initialize the configuration like JsonConfig::allFeatures; + */ + Features(); + + /// \c true if comments are allowed. Default: \c true. + bool allowComments_; + + /// \c true if root must be either an array or an object value. Default: \c false. + bool strictRoot_; + }; + +} // namespace Json + +#endif // CPPTL_JSON_FEATURES_H_INCLUDED diff --git a/src/third_party/jsoncpp/json_forwards.h b/src/third_party/jsoncpp/json_forwards.h new file mode 100644 index 0000000..792a87a --- /dev/null +++ b/src/third_party/jsoncpp/json_forwards.h @@ -0,0 +1,42 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef JSON_FORWARDS_H_INCLUDED +# define JSON_FORWARDS_H_INCLUDED + +# include "json_config.h" + +namespace Json { + + // writer.h + class FastWriter; + class StyledWriter; + + // reader.h + class Reader; + + // features.h + class Features; + + // value.h + typedef unsigned int ArrayIndex; + class StaticString; + class Path; + class PathArgument; + class Value; + class ValueIteratorBase; + class ValueIterator; + class ValueConstIterator; +#ifdef JSON_VALUE_USE_INTERNAL_MAP + class ValueMapAllocator; + class ValueInternalLink; + class ValueInternalArray; + class ValueInternalMap; +#endif // #ifdef JSON_VALUE_USE_INTERNAL_MAP + +} // namespace Json + + +#endif // JSON_FORWARDS_H_INCLUDED diff --git a/src/third_party/jsoncpp/json_internalarray.inl b/src/third_party/jsoncpp/json_internalarray.inl new file mode 100644 index 0000000..66d838e --- /dev/null +++ b/src/third_party/jsoncpp/json_internalarray.inl @@ -0,0 +1,453 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +// included by json_value.cpp +// everything is within Json namespace + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueInternalArray +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +ValueArrayAllocator::~ValueArrayAllocator() +{ +} + +// ////////////////////////////////////////////////////////////////// +// class DefaultValueArrayAllocator +// ////////////////////////////////////////////////////////////////// +#ifdef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR +class DefaultValueArrayAllocator : public ValueArrayAllocator +{ +public: // overridden from ValueArrayAllocator + virtual ~DefaultValueArrayAllocator() + { + } + + virtual ValueInternalArray *newArray() + { + return new ValueInternalArray(); + } + + virtual ValueInternalArray *newArrayCopy( const ValueInternalArray &other ) + { + return new ValueInternalArray( other ); + } + + virtual void destructArray( ValueInternalArray *array ) + { + delete array; + } + + virtual void reallocateArrayPageIndex( Value **&indexes, + ValueInternalArray::PageIndex &indexCount, + ValueInternalArray::PageIndex minNewIndexCount ) + { + ValueInternalArray::PageIndex newIndexCount = (indexCount*3)/2 + 1; + if ( minNewIndexCount > newIndexCount ) + newIndexCount = minNewIndexCount; + void *newIndexes = realloc( indexes, sizeof(Value*) * newIndexCount ); + if ( !newIndexes ) + throw std::bad_alloc(); + indexCount = newIndexCount; + indexes = static_cast( newIndexes ); + } + virtual void releaseArrayPageIndex( Value **indexes, + ValueInternalArray::PageIndex indexCount ) + { + if ( indexes ) + free( indexes ); + } + + virtual Value *allocateArrayPage() + { + return static_cast( malloc( sizeof(Value) * ValueInternalArray::itemsPerPage ) ); + } + + virtual void releaseArrayPage( Value *value ) + { + if ( value ) + free( value ); + } +}; + +#else // #ifdef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR +/// @todo make this thread-safe (lock when accessign batch allocator) +class DefaultValueArrayAllocator : public ValueArrayAllocator +{ +public: // overridden from ValueArrayAllocator + virtual ~DefaultValueArrayAllocator() + { + } + + virtual ValueInternalArray *newArray() + { + ValueInternalArray *array = arraysAllocator_.allocate(); + new (array) ValueInternalArray(); // placement new + return array; + } + + virtual ValueInternalArray *newArrayCopy( const ValueInternalArray &other ) + { + ValueInternalArray *array = arraysAllocator_.allocate(); + new (array) ValueInternalArray( other ); // placement new + return array; + } + + virtual void destructArray( ValueInternalArray *array ) + { + if ( array ) + { + array->~ValueInternalArray(); + arraysAllocator_.release( array ); + } + } + + virtual void reallocateArrayPageIndex( Value **&indexes, + ValueInternalArray::PageIndex &indexCount, + ValueInternalArray::PageIndex minNewIndexCount ) + { + ValueInternalArray::PageIndex newIndexCount = (indexCount*3)/2 + 1; + if ( minNewIndexCount > newIndexCount ) + newIndexCount = minNewIndexCount; + void *newIndexes = realloc( indexes, sizeof(Value*) * newIndexCount ); + if ( !newIndexes ) + throw std::bad_alloc(); + indexCount = newIndexCount; + indexes = static_cast( newIndexes ); + } + virtual void releaseArrayPageIndex( Value **indexes, + ValueInternalArray::PageIndex indexCount ) + { + if ( indexes ) + free( indexes ); + } + + virtual Value *allocateArrayPage() + { + return static_cast( pagesAllocator_.allocate() ); + } + + virtual void releaseArrayPage( Value *value ) + { + if ( value ) + pagesAllocator_.release( value ); + } +private: + BatchAllocator arraysAllocator_; + BatchAllocator pagesAllocator_; +}; +#endif // #ifdef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR + +static ValueArrayAllocator *&arrayAllocator() +{ + static DefaultValueArrayAllocator defaultAllocator; + static ValueArrayAllocator *arrayAllocator = &defaultAllocator; + return arrayAllocator; +} + +static struct DummyArrayAllocatorInitializer { + DummyArrayAllocatorInitializer() + { + arrayAllocator(); // ensure arrayAllocator() statics are initialized before main(). + } +} dummyArrayAllocatorInitializer; + +// ////////////////////////////////////////////////////////////////// +// class ValueInternalArray +// ////////////////////////////////////////////////////////////////// +bool +ValueInternalArray::equals( const IteratorState &x, + const IteratorState &other ) +{ + return x.array_ == other.array_ + && x.currentItemIndex_ == other.currentItemIndex_ + && x.currentPageIndex_ == other.currentPageIndex_; +} + + +void +ValueInternalArray::increment( IteratorState &it ) +{ + JSON_ASSERT_MESSAGE( it.array_ && + (it.currentPageIndex_ - it.array_->pages_)*itemsPerPage + it.currentItemIndex_ + != it.array_->size_, + "ValueInternalArray::increment(): moving iterator beyond end" ); + ++(it.currentItemIndex_); + if ( it.currentItemIndex_ == itemsPerPage ) + { + it.currentItemIndex_ = 0; + ++(it.currentPageIndex_); + } +} + + +void +ValueInternalArray::decrement( IteratorState &it ) +{ + JSON_ASSERT_MESSAGE( it.array_ && it.currentPageIndex_ == it.array_->pages_ + && it.currentItemIndex_ == 0, + "ValueInternalArray::decrement(): moving iterator beyond end" ); + if ( it.currentItemIndex_ == 0 ) + { + it.currentItemIndex_ = itemsPerPage-1; + --(it.currentPageIndex_); + } + else + { + --(it.currentItemIndex_); + } +} + + +Value & +ValueInternalArray::unsafeDereference( const IteratorState &it ) +{ + return (*(it.currentPageIndex_))[it.currentItemIndex_]; +} + + +Value & +ValueInternalArray::dereference( const IteratorState &it ) +{ + JSON_ASSERT_MESSAGE( it.array_ && + (it.currentPageIndex_ - it.array_->pages_)*itemsPerPage + it.currentItemIndex_ + < it.array_->size_, + "ValueInternalArray::dereference(): dereferencing invalid iterator" ); + return unsafeDereference( it ); +} + +void +ValueInternalArray::makeBeginIterator( IteratorState &it ) const +{ + it.array_ = const_cast( this ); + it.currentItemIndex_ = 0; + it.currentPageIndex_ = pages_; +} + + +void +ValueInternalArray::makeIterator( IteratorState &it, ArrayIndex index ) const +{ + it.array_ = const_cast( this ); + it.currentItemIndex_ = index % itemsPerPage; + it.currentPageIndex_ = pages_ + index / itemsPerPage; +} + + +void +ValueInternalArray::makeEndIterator( IteratorState &it ) const +{ + makeIterator( it, size_ ); +} + + +ValueInternalArray::ValueInternalArray() + : pages_( 0 ) + , size_( 0 ) + , pageCount_( 0 ) +{ +} + + +ValueInternalArray::ValueInternalArray( const ValueInternalArray &other ) + : pages_( 0 ) + , pageCount_( 0 ) + , size_( other.size_ ) +{ + PageIndex minNewPages = other.size_ / itemsPerPage; + arrayAllocator()->reallocateArrayPageIndex( pages_, pageCount_, minNewPages ); + JSON_ASSERT_MESSAGE( pageCount_ >= minNewPages, + "ValueInternalArray::reserve(): bad reallocation" ); + IteratorState itOther; + other.makeBeginIterator( itOther ); + Value *value; + for ( ArrayIndex index = 0; index < size_; ++index, increment(itOther) ) + { + if ( index % itemsPerPage == 0 ) + { + PageIndex pageIndex = index / itemsPerPage; + value = arrayAllocator()->allocateArrayPage(); + pages_[pageIndex] = value; + } + new (value) Value( dereference( itOther ) ); + } +} + + +ValueInternalArray & +ValueInternalArray::operator =( const ValueInternalArray &other ) +{ + ValueInternalArray temp( other ); + swap( temp ); + return *this; +} + + +ValueInternalArray::~ValueInternalArray() +{ + // destroy all constructed items + IteratorState it; + IteratorState itEnd; + makeBeginIterator( it); + makeEndIterator( itEnd ); + for ( ; !equals(it,itEnd); increment(it) ) + { + Value *value = &dereference(it); + value->~Value(); + } + // release all pages + PageIndex lastPageIndex = size_ / itemsPerPage; + for ( PageIndex pageIndex = 0; pageIndex < lastPageIndex; ++pageIndex ) + arrayAllocator()->releaseArrayPage( pages_[pageIndex] ); + // release pages index + arrayAllocator()->releaseArrayPageIndex( pages_, pageCount_ ); +} + + +void +ValueInternalArray::swap( ValueInternalArray &other ) +{ + Value **tempPages = pages_; + pages_ = other.pages_; + other.pages_ = tempPages; + ArrayIndex tempSize = size_; + size_ = other.size_; + other.size_ = tempSize; + PageIndex tempPageCount = pageCount_; + pageCount_ = other.pageCount_; + other.pageCount_ = tempPageCount; +} + +void +ValueInternalArray::clear() +{ + ValueInternalArray dummy; + swap( dummy ); +} + + +void +ValueInternalArray::resize( ArrayIndex newSize ) +{ + if ( newSize == 0 ) + clear(); + else if ( newSize < size_ ) + { + IteratorState it; + IteratorState itEnd; + makeIterator( it, newSize ); + makeIterator( itEnd, size_ ); + for ( ; !equals(it,itEnd); increment(it) ) + { + Value *value = &dereference(it); + value->~Value(); + } + PageIndex pageIndex = (newSize + itemsPerPage - 1) / itemsPerPage; + PageIndex lastPageIndex = size_ / itemsPerPage; + for ( ; pageIndex < lastPageIndex; ++pageIndex ) + arrayAllocator()->releaseArrayPage( pages_[pageIndex] ); + size_ = newSize; + } + else if ( newSize > size_ ) + resolveReference( newSize ); +} + + +void +ValueInternalArray::makeIndexValid( ArrayIndex index ) +{ + // Need to enlarge page index ? + if ( index >= pageCount_ * itemsPerPage ) + { + PageIndex minNewPages = (index + 1) / itemsPerPage; + arrayAllocator()->reallocateArrayPageIndex( pages_, pageCount_, minNewPages ); + JSON_ASSERT_MESSAGE( pageCount_ >= minNewPages, "ValueInternalArray::reserve(): bad reallocation" ); + } + + // Need to allocate new pages ? + ArrayIndex nextPageIndex = + (size_ % itemsPerPage) != 0 ? size_ - (size_%itemsPerPage) + itemsPerPage + : size_; + if ( nextPageIndex <= index ) + { + PageIndex pageIndex = nextPageIndex / itemsPerPage; + PageIndex pageToAllocate = (index - nextPageIndex) / itemsPerPage + 1; + for ( ; pageToAllocate-- > 0; ++pageIndex ) + pages_[pageIndex] = arrayAllocator()->allocateArrayPage(); + } + + // Initialize all new entries + IteratorState it; + IteratorState itEnd; + makeIterator( it, size_ ); + size_ = index + 1; + makeIterator( itEnd, size_ ); + for ( ; !equals(it,itEnd); increment(it) ) + { + Value *value = &dereference(it); + new (value) Value(); // Construct a default value using placement new + } +} + +Value & +ValueInternalArray::resolveReference( ArrayIndex index ) +{ + if ( index >= size_ ) + makeIndexValid( index ); + return pages_[index/itemsPerPage][index%itemsPerPage]; +} + +Value * +ValueInternalArray::find( ArrayIndex index ) const +{ + if ( index >= size_ ) + return 0; + return &(pages_[index/itemsPerPage][index%itemsPerPage]); +} + +ValueInternalArray::ArrayIndex +ValueInternalArray::size() const +{ + return size_; +} + +int +ValueInternalArray::distance( const IteratorState &x, const IteratorState &y ) +{ + return indexOf(y) - indexOf(x); +} + + +ValueInternalArray::ArrayIndex +ValueInternalArray::indexOf( const IteratorState &iterator ) +{ + if ( !iterator.array_ ) + return ArrayIndex(-1); + return ArrayIndex( + (iterator.currentPageIndex_ - iterator.array_->pages_) * itemsPerPage + + iterator.currentItemIndex_ ); +} + + +int +ValueInternalArray::compare( const ValueInternalArray &other ) const +{ + int sizeDiff( size_ - other.size_ ); + if ( sizeDiff != 0 ) + return sizeDiff; + + for ( ArrayIndex index =0; index < size_; ++index ) + { + int diff = pages_[index/itemsPerPage][index%itemsPerPage].compare( + other.pages_[index/itemsPerPage][index%itemsPerPage] ); + if ( diff != 0 ) + return diff; + } + return 0; +} diff --git a/src/third_party/jsoncpp/json_internalmap.inl b/src/third_party/jsoncpp/json_internalmap.inl new file mode 100644 index 0000000..d0dd62a --- /dev/null +++ b/src/third_party/jsoncpp/json_internalmap.inl @@ -0,0 +1,612 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +// included by json_value.cpp +// everything is within Json namespace + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueInternalMap +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +/** \internal MUST be safely initialized using memset( this, 0, sizeof(ValueInternalLink) ); + * This optimization is used by the fast allocator. + */ +ValueInternalLink::ValueInternalLink() + : previous_( 0 ) + , next_( 0 ) +{ +} + +ValueInternalLink::~ValueInternalLink() +{ + for ( int index =0; index < itemPerLink; ++index ) + { + if ( !items_[index].isItemAvailable() ) + { + if ( !items_[index].isMemberNameStatic() ) + free( keys_[index] ); + } + else + break; + } +} + + + +ValueMapAllocator::~ValueMapAllocator() +{ +} + +#ifdef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR +class DefaultValueMapAllocator : public ValueMapAllocator +{ +public: // overridden from ValueMapAllocator + virtual ValueInternalMap *newMap() + { + return new ValueInternalMap(); + } + + virtual ValueInternalMap *newMapCopy( const ValueInternalMap &other ) + { + return new ValueInternalMap( other ); + } + + virtual void destructMap( ValueInternalMap *map ) + { + delete map; + } + + virtual ValueInternalLink *allocateMapBuckets( unsigned int size ) + { + return new ValueInternalLink[size]; + } + + virtual void releaseMapBuckets( ValueInternalLink *links ) + { + delete [] links; + } + + virtual ValueInternalLink *allocateMapLink() + { + return new ValueInternalLink(); + } + + virtual void releaseMapLink( ValueInternalLink *link ) + { + delete link; + } +}; +#else +/// @todo make this thread-safe (lock when accessign batch allocator) +class DefaultValueMapAllocator : public ValueMapAllocator +{ +public: // overridden from ValueMapAllocator + virtual ValueInternalMap *newMap() + { + ValueInternalMap *map = mapsAllocator_.allocate(); + new (map) ValueInternalMap(); // placement new + return map; + } + + virtual ValueInternalMap *newMapCopy( const ValueInternalMap &other ) + { + ValueInternalMap *map = mapsAllocator_.allocate(); + new (map) ValueInternalMap( other ); // placement new + return map; + } + + virtual void destructMap( ValueInternalMap *map ) + { + if ( map ) + { + map->~ValueInternalMap(); + mapsAllocator_.release( map ); + } + } + + virtual ValueInternalLink *allocateMapBuckets( unsigned int size ) + { + return new ValueInternalLink[size]; + } + + virtual void releaseMapBuckets( ValueInternalLink *links ) + { + delete [] links; + } + + virtual ValueInternalLink *allocateMapLink() + { + ValueInternalLink *link = linksAllocator_.allocate(); + memset( link, 0, sizeof(ValueInternalLink) ); + return link; + } + + virtual void releaseMapLink( ValueInternalLink *link ) + { + link->~ValueInternalLink(); + linksAllocator_.release( link ); + } +private: + BatchAllocator mapsAllocator_; + BatchAllocator linksAllocator_; +}; +#endif + +static ValueMapAllocator *&mapAllocator() +{ + static DefaultValueMapAllocator defaultAllocator; + static ValueMapAllocator *mapAllocator = &defaultAllocator; + return mapAllocator; +} + +static struct DummyMapAllocatorInitializer { + DummyMapAllocatorInitializer() + { + mapAllocator(); // ensure mapAllocator() statics are initialized before main(). + } +} dummyMapAllocatorInitializer; + + + +// h(K) = value * K >> w ; with w = 32 & K prime w.r.t. 2^32. + +/* +use linked list hash map. +buckets array is a container. +linked list element contains 6 key/values. (memory = (16+4) * 6 + 4 = 124) +value have extra state: valid, available, deleted +*/ + + +ValueInternalMap::ValueInternalMap() + : buckets_( 0 ) + , tailLink_( 0 ) + , bucketsSize_( 0 ) + , itemCount_( 0 ) +{ +} + + +ValueInternalMap::ValueInternalMap( const ValueInternalMap &other ) + : buckets_( 0 ) + , tailLink_( 0 ) + , bucketsSize_( 0 ) + , itemCount_( 0 ) +{ + reserve( other.itemCount_ ); + IteratorState it; + IteratorState itEnd; + other.makeBeginIterator( it ); + other.makeEndIterator( itEnd ); + for ( ; !equals(it,itEnd); increment(it) ) + { + bool isStatic; + const char *memberName = key( it, isStatic ); + const Value &aValue = value( it ); + resolveReference(memberName, isStatic) = aValue; + } +} + + +ValueInternalMap & +ValueInternalMap::operator =( const ValueInternalMap &other ) +{ + ValueInternalMap dummy( other ); + swap( dummy ); + return *this; +} + + +ValueInternalMap::~ValueInternalMap() +{ + if ( buckets_ ) + { + for ( BucketIndex bucketIndex =0; bucketIndex < bucketsSize_; ++bucketIndex ) + { + ValueInternalLink *link = buckets_[bucketIndex].next_; + while ( link ) + { + ValueInternalLink *linkToRelease = link; + link = link->next_; + mapAllocator()->releaseMapLink( linkToRelease ); + } + } + mapAllocator()->releaseMapBuckets( buckets_ ); + } +} + + +void +ValueInternalMap::swap( ValueInternalMap &other ) +{ + ValueInternalLink *tempBuckets = buckets_; + buckets_ = other.buckets_; + other.buckets_ = tempBuckets; + ValueInternalLink *tempTailLink = tailLink_; + tailLink_ = other.tailLink_; + other.tailLink_ = tempTailLink; + BucketIndex tempBucketsSize = bucketsSize_; + bucketsSize_ = other.bucketsSize_; + other.bucketsSize_ = tempBucketsSize; + BucketIndex tempItemCount = itemCount_; + itemCount_ = other.itemCount_; + other.itemCount_ = tempItemCount; +} + + +void +ValueInternalMap::clear() +{ + ValueInternalMap dummy; + swap( dummy ); +} + + +ValueInternalMap::BucketIndex +ValueInternalMap::size() const +{ + return itemCount_; +} + +bool +ValueInternalMap::reserveDelta( BucketIndex growth ) +{ + return reserve( itemCount_ + growth ); +} + +bool +ValueInternalMap::reserve( BucketIndex newItemCount ) +{ + if ( !buckets_ && newItemCount > 0 ) + { + buckets_ = mapAllocator()->allocateMapBuckets( 1 ); + bucketsSize_ = 1; + tailLink_ = &buckets_[0]; + } +// BucketIndex idealBucketCount = (newItemCount + ValueInternalLink::itemPerLink) / ValueInternalLink::itemPerLink; + return true; +} + + +const Value * +ValueInternalMap::find( const char *key ) const +{ + if ( !bucketsSize_ ) + return 0; + HashKey hashedKey = hash( key ); + BucketIndex bucketIndex = hashedKey % bucketsSize_; + for ( const ValueInternalLink *current = &buckets_[bucketIndex]; + current != 0; + current = current->next_ ) + { + for ( BucketIndex index=0; index < ValueInternalLink::itemPerLink; ++index ) + { + if ( current->items_[index].isItemAvailable() ) + return 0; + if ( strcmp( key, current->keys_[index] ) == 0 ) + return ¤t->items_[index]; + } + } + return 0; +} + + +Value * +ValueInternalMap::find( const char *key ) +{ + const ValueInternalMap *constThis = this; + return const_cast( constThis->find( key ) ); +} + + +Value & +ValueInternalMap::resolveReference( const char *key, + bool isStatic ) +{ + HashKey hashedKey = hash( key ); + if ( bucketsSize_ ) + { + BucketIndex bucketIndex = hashedKey % bucketsSize_; + ValueInternalLink **previous = 0; + BucketIndex index; + for ( ValueInternalLink *current = &buckets_[bucketIndex]; + current != 0; + previous = ¤t->next_, current = current->next_ ) + { + for ( index=0; index < ValueInternalLink::itemPerLink; ++index ) + { + if ( current->items_[index].isItemAvailable() ) + return setNewItem( key, isStatic, current, index ); + if ( strcmp( key, current->keys_[index] ) == 0 ) + return current->items_[index]; + } + } + } + + reserveDelta( 1 ); + return unsafeAdd( key, isStatic, hashedKey ); +} + + +void +ValueInternalMap::remove( const char *key ) +{ + HashKey hashedKey = hash( key ); + if ( !bucketsSize_ ) + return; + BucketIndex bucketIndex = hashedKey % bucketsSize_; + for ( ValueInternalLink *link = &buckets_[bucketIndex]; + link != 0; + link = link->next_ ) + { + BucketIndex index; + for ( index =0; index < ValueInternalLink::itemPerLink; ++index ) + { + if ( link->items_[index].isItemAvailable() ) + return; + if ( strcmp( key, link->keys_[index] ) == 0 ) + { + doActualRemove( link, index, bucketIndex ); + return; + } + } + } +} + +void +ValueInternalMap::doActualRemove( ValueInternalLink *link, + BucketIndex index, + BucketIndex bucketIndex ) +{ + // find last item of the bucket and swap it with the 'removed' one. + // set removed items flags to 'available'. + // if last page only contains 'available' items, then desallocate it (it's empty) + ValueInternalLink *&lastLink = getLastLinkInBucket( index ); + BucketIndex lastItemIndex = 1; // a link can never be empty, so start at 1 + for ( ; + lastItemIndex < ValueInternalLink::itemPerLink; + ++lastItemIndex ) // may be optimized with dicotomic search + { + if ( lastLink->items_[lastItemIndex].isItemAvailable() ) + break; + } + + BucketIndex lastUsedIndex = lastItemIndex - 1; + Value *valueToDelete = &link->items_[index]; + Value *valueToPreserve = &lastLink->items_[lastUsedIndex]; + if ( valueToDelete != valueToPreserve ) + valueToDelete->swap( *valueToPreserve ); + if ( lastUsedIndex == 0 ) // page is now empty + { // remove it from bucket linked list and delete it. + ValueInternalLink *linkPreviousToLast = lastLink->previous_; + if ( linkPreviousToLast != 0 ) // can not deleted bucket link. + { + mapAllocator()->releaseMapLink( lastLink ); + linkPreviousToLast->next_ = 0; + lastLink = linkPreviousToLast; + } + } + else + { + Value dummy; + valueToPreserve->swap( dummy ); // restore deleted to default Value. + valueToPreserve->setItemUsed( false ); + } + --itemCount_; +} + + +ValueInternalLink *& +ValueInternalMap::getLastLinkInBucket( BucketIndex bucketIndex ) +{ + if ( bucketIndex == bucketsSize_ - 1 ) + return tailLink_; + ValueInternalLink *&previous = buckets_[bucketIndex+1].previous_; + if ( !previous ) + previous = &buckets_[bucketIndex]; + return previous; +} + + +Value & +ValueInternalMap::setNewItem( const char *key, + bool isStatic, + ValueInternalLink *link, + BucketIndex index ) +{ + char *duplicatedKey = makeMemberName( key ); + ++itemCount_; + link->keys_[index] = duplicatedKey; + link->items_[index].setItemUsed(); + link->items_[index].setMemberNameIsStatic( isStatic ); + return link->items_[index]; // items already default constructed. +} + + +Value & +ValueInternalMap::unsafeAdd( const char *key, + bool isStatic, + HashKey hashedKey ) +{ + JSON_ASSERT_MESSAGE( bucketsSize_ > 0, "ValueInternalMap::unsafeAdd(): internal logic error." ); + BucketIndex bucketIndex = hashedKey % bucketsSize_; + ValueInternalLink *&previousLink = getLastLinkInBucket( bucketIndex ); + ValueInternalLink *link = previousLink; + BucketIndex index; + for ( index =0; index < ValueInternalLink::itemPerLink; ++index ) + { + if ( link->items_[index].isItemAvailable() ) + break; + } + if ( index == ValueInternalLink::itemPerLink ) // need to add a new page + { + ValueInternalLink *newLink = mapAllocator()->allocateMapLink(); + index = 0; + link->next_ = newLink; + previousLink = newLink; + link = newLink; + } + return setNewItem( key, isStatic, link, index ); +} + + +ValueInternalMap::HashKey +ValueInternalMap::hash( const char *key ) const +{ + HashKey hash = 0; + while ( *key ) + hash += *key++ * 37; + return hash; +} + + +int +ValueInternalMap::compare( const ValueInternalMap &other ) const +{ + int sizeDiff( itemCount_ - other.itemCount_ ); + if ( sizeDiff != 0 ) + return sizeDiff; + // Strict order guaranty is required. Compare all keys FIRST, then compare values. + IteratorState it; + IteratorState itEnd; + makeBeginIterator( it ); + makeEndIterator( itEnd ); + for ( ; !equals(it,itEnd); increment(it) ) + { + if ( !other.find( key( it ) ) ) + return 1; + } + + // All keys are equals, let's compare values + makeBeginIterator( it ); + for ( ; !equals(it,itEnd); increment(it) ) + { + const Value *otherValue = other.find( key( it ) ); + int valueDiff = value(it).compare( *otherValue ); + if ( valueDiff != 0 ) + return valueDiff; + } + return 0; +} + + +void +ValueInternalMap::makeBeginIterator( IteratorState &it ) const +{ + it.map_ = const_cast( this ); + it.bucketIndex_ = 0; + it.itemIndex_ = 0; + it.link_ = buckets_; +} + + +void +ValueInternalMap::makeEndIterator( IteratorState &it ) const +{ + it.map_ = const_cast( this ); + it.bucketIndex_ = bucketsSize_; + it.itemIndex_ = 0; + it.link_ = 0; +} + + +bool +ValueInternalMap::equals( const IteratorState &x, const IteratorState &other ) +{ + return x.map_ == other.map_ + && x.bucketIndex_ == other.bucketIndex_ + && x.link_ == other.link_ + && x.itemIndex_ == other.itemIndex_; +} + + +void +ValueInternalMap::incrementBucket( IteratorState &iterator ) +{ + ++iterator.bucketIndex_; + JSON_ASSERT_MESSAGE( iterator.bucketIndex_ <= iterator.map_->bucketsSize_, + "ValueInternalMap::increment(): attempting to iterate beyond end." ); + if ( iterator.bucketIndex_ == iterator.map_->bucketsSize_ ) + iterator.link_ = 0; + else + iterator.link_ = &(iterator.map_->buckets_[iterator.bucketIndex_]); + iterator.itemIndex_ = 0; +} + + +void +ValueInternalMap::increment( IteratorState &iterator ) +{ + JSON_ASSERT_MESSAGE( iterator.map_, "Attempting to iterator using invalid iterator." ); + ++iterator.itemIndex_; + if ( iterator.itemIndex_ == ValueInternalLink::itemPerLink ) + { + JSON_ASSERT_MESSAGE( iterator.link_ != 0, + "ValueInternalMap::increment(): attempting to iterate beyond end." ); + iterator.link_ = iterator.link_->next_; + if ( iterator.link_ == 0 ) + incrementBucket( iterator ); + } + else if ( iterator.link_->items_[iterator.itemIndex_].isItemAvailable() ) + { + incrementBucket( iterator ); + } +} + + +void +ValueInternalMap::decrement( IteratorState &iterator ) +{ + if ( iterator.itemIndex_ == 0 ) + { + JSON_ASSERT_MESSAGE( iterator.map_, "Attempting to iterate using invalid iterator." ); + if ( iterator.link_ == &iterator.map_->buckets_[iterator.bucketIndex_] ) + { + JSON_ASSERT_MESSAGE( iterator.bucketIndex_ > 0, "Attempting to iterate beyond beginning." ); + --(iterator.bucketIndex_); + } + iterator.link_ = iterator.link_->previous_; + iterator.itemIndex_ = ValueInternalLink::itemPerLink - 1; + } +} + + +const char * +ValueInternalMap::key( const IteratorState &iterator ) +{ + JSON_ASSERT_MESSAGE( iterator.link_, "Attempting to iterate using invalid iterator." ); + return iterator.link_->keys_[iterator.itemIndex_]; +} + +const char * +ValueInternalMap::key( const IteratorState &iterator, bool &isStatic ) +{ + JSON_ASSERT_MESSAGE( iterator.link_, "Attempting to iterate using invalid iterator." ); + isStatic = iterator.link_->items_[iterator.itemIndex_].isMemberNameStatic(); + return iterator.link_->keys_[iterator.itemIndex_]; +} + + +Value & +ValueInternalMap::value( const IteratorState &iterator ) +{ + JSON_ASSERT_MESSAGE( iterator.link_, "Attempting to iterate using invalid iterator." ); + return iterator.link_->items_[iterator.itemIndex_]; +} + + +int +ValueInternalMap::distance( const IteratorState &x, const IteratorState &y ) +{ + int offset = 0; + IteratorState it = x; + while ( !equals( it, y ) ) + increment( it ); + return offset; +} diff --git a/src/third_party/jsoncpp/json_reader.cpp b/src/third_party/jsoncpp/json_reader.cpp new file mode 100644 index 0000000..07561f2 --- /dev/null +++ b/src/third_party/jsoncpp/json_reader.cpp @@ -0,0 +1,870 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#include "json_reader.h" +#include "json_value.h" +#include "json_tool.h" +#include +#include +#include +#include +#include +#include + +#if _MSC_VER >= 1400 // VC++ 8.0 +#pragma warning( disable : 4996 ) // disable warning about strdup being deprecated. +#endif + +namespace Json { + +// Implementation of class Features +// //////////////////////////////// + +Features::Features() + : allowComments_( true ) + , strictRoot_( false ) +{ +} + + +Features +Features::all() +{ + return Features(); +} + + +Features +Features::strictMode() +{ + Features features; + features.allowComments_ = false; + features.strictRoot_ = true; + return features; +} + +// Implementation of class Reader +// //////////////////////////////// + + +static inline bool +in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 ) +{ + return c == c1 || c == c2 || c == c3 || c == c4; +} + +static inline bool +in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 ) +{ + return c == c1 || c == c2 || c == c3 || c == c4 || c == c5; +} + + +static bool +containsNewLine( Reader::Location begin, + Reader::Location end ) +{ + for ( ;begin < end; ++begin ) + if ( *begin == '\n' || *begin == '\r' ) + return true; + return false; +} + + +// Class Reader +// ////////////////////////////////////////////////////////////////// + +Reader::Reader() + : features_( Features::all() ) +{ +} + + +Reader::Reader( const Features &features ) + : features_( features ) +{ +} + + +bool +Reader::parse( const std::string &document, + Value &root, + bool collectComments ) +{ + document_ = document; + const char *begin = document_.c_str(); + const char *end = begin + document_.length(); + return parse( begin, end, root, collectComments ); +} + + +bool +Reader::parse( std::istream& sin, + Value &root, + bool collectComments ) +{ + //std::istream_iterator begin(sin); + //std::istream_iterator end; + // Those would allow streamed input from a file, if parse() were a + // template function. + + // Since std::string is reference-counted, this at least does not + // create an extra copy. + std::string doc; + std::getline(sin, doc, (char)EOF); + return parse( doc, root, collectComments ); +} + +bool +Reader::parse( const char *beginDoc, const char *endDoc, + Value &root, + bool collectComments ) +{ + if ( !features_.allowComments_ ) + { + collectComments = false; + } + + begin_ = beginDoc; + end_ = endDoc; + collectComments_ = collectComments; + current_ = begin_; + lastValueEnd_ = 0; + lastValue_ = 0; + commentsBefore_ = ""; + errors_.clear(); + while ( !nodes_.empty() ) + nodes_.pop(); + nodes_.push( &root ); + + bool successful = readValue(); + Token token; + skipCommentTokens( token ); + if ( collectComments_ && !commentsBefore_.empty() ) + root.setComment( commentsBefore_, commentAfter ); + if ( features_.strictRoot_ ) + { + if ( !root.isArray() && !root.isObject() ) + { + // Set error location to start of doc, ideally should be first token found in doc + token.type_ = tokenError; + token.start_ = beginDoc; + token.end_ = endDoc; + addError( "A valid JSON document must be either an array or an object value.", + token ); + return false; + } + } + return successful; +} + + +bool +Reader::readValue() +{ + Token token; + skipCommentTokens( token ); + bool successful = true; + + if ( collectComments_ && !commentsBefore_.empty() ) + { + currentValue().setComment( commentsBefore_, commentBefore ); + commentsBefore_ = ""; + } + + + switch ( token.type_ ) + { + case tokenObjectBegin: + successful = readObject( token ); + break; + case tokenArrayBegin: + successful = readArray( token ); + break; + case tokenNumber: + successful = decodeNumber( token ); + break; + case tokenString: + successful = decodeString( token ); + break; + case tokenTrue: + currentValue() = true; + break; + case tokenFalse: + currentValue() = false; + break; + case tokenNull: + currentValue() = Value(); + break; + default: + return addError( "Syntax error: value, object or array expected.", token ); + } + + if ( collectComments_ ) + { + lastValueEnd_ = current_; + lastValue_ = ¤tValue(); + } + + return successful; +} + + +void +Reader::skipCommentTokens( Token &token ) +{ + if ( features_.allowComments_ ) + { + do + { + readToken( token ); + } + while ( token.type_ == tokenComment ); + } + else + { + readToken( token ); + } +} + + +bool +Reader::expectToken( TokenType type, Token &token, const char *message ) +{ + readToken( token ); + if ( token.type_ != type ) + return addError( message, token ); + return true; +} + + +bool +Reader::readToken( Token &token ) +{ + skipSpaces(); + token.start_ = current_; + Char c = getNextChar(); + bool ok = true; + switch ( c ) + { + case '{': + token.type_ = tokenObjectBegin; + break; + case '}': + token.type_ = tokenObjectEnd; + break; + case '[': + token.type_ = tokenArrayBegin; + break; + case ']': + token.type_ = tokenArrayEnd; + break; + case '"': + token.type_ = tokenString; + ok = readString(); + break; + case '/': + token.type_ = tokenComment; + ok = readComment(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + token.type_ = tokenNumber; + readNumber(); + break; + case 't': + token.type_ = tokenTrue; + ok = match( "rue", 3 ); + break; + case 'f': + token.type_ = tokenFalse; + ok = match( "alse", 4 ); + break; + case 'n': + token.type_ = tokenNull; + ok = match( "ull", 3 ); + break; + case ',': + token.type_ = tokenArraySeparator; + break; + case ':': + token.type_ = tokenMemberSeparator; + break; + case 0: + token.type_ = tokenEndOfStream; + break; + default: + ok = false; + break; + } + if ( !ok ) + token.type_ = tokenError; + token.end_ = current_; + return true; +} + + +void +Reader::skipSpaces() +{ + while ( current_ != end_ ) + { + Char c = *current_; + if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) + ++current_; + else + break; + } +} + + +bool +Reader::match( Location pattern, + int patternLength ) +{ + if ( end_ - current_ < patternLength ) + return false; + int index = patternLength; + while ( index-- ) + if ( current_[index] != pattern[index] ) + return false; + current_ += patternLength; + return true; +} + + +bool +Reader::readComment() +{ + Location commentBegin = current_ - 1; + Char c = getNextChar(); + bool successful = false; + if ( c == '*' ) + successful = readCStyleComment(); + else if ( c == '/' ) + successful = readCppStyleComment(); + if ( !successful ) + return false; + + if ( collectComments_ ) + { + CommentPlacement placement = commentBefore; + if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) ) + { + if ( c != '*' || !containsNewLine( commentBegin, current_ ) ) + placement = commentAfterOnSameLine; + } + + addComment( commentBegin, current_, placement ); + } + return true; +} + + +void +Reader::addComment( Location begin, + Location end, + CommentPlacement placement ) +{ + assert( collectComments_ ); + if ( placement == commentAfterOnSameLine ) + { + assert( lastValue_ != 0 ); + lastValue_->setComment( std::string( begin, end ), placement ); + } + else + { + if ( !commentsBefore_.empty() ) + commentsBefore_ += "\n"; + commentsBefore_ += std::string( begin, end ); + } +} + + +bool +Reader::readCStyleComment() +{ + while ( current_ != end_ ) + { + Char c = getNextChar(); + if ( c == '*' && *current_ == '/' ) + break; + } + return getNextChar() == '/'; +} + + +bool +Reader::readCppStyleComment() +{ + while ( current_ != end_ ) + { + Char c = getNextChar(); + if ( c == '\r' || c == '\n' ) + break; + } + return true; +} + + +void +Reader::readNumber() +{ + while ( current_ != end_ ) + { + if ( !(*current_ >= '0' && *current_ <= '9') && + !in( *current_, '.', 'e', 'E', '+', '-' ) ) + break; + ++current_; + } +} + +bool +Reader::readString() +{ + Char c = 0; + while ( current_ != end_ ) + { + c = getNextChar(); + if ( c == '\\' ) + getNextChar(); + else if ( c == '"' ) + break; + } + return c == '"'; +} + + +bool +Reader::readObject( Token &tokenStart ) +{ + Token tokenName; + std::string name; + currentValue() = Value( objectValue ); + while ( readToken( tokenName ) ) + { + bool initialTokenOk = true; + while ( tokenName.type_ == tokenComment && initialTokenOk ) + initialTokenOk = readToken( tokenName ); + if ( !initialTokenOk ) + break; + if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object + return true; + if ( tokenName.type_ != tokenString ) + break; + + name = ""; + if ( !decodeString( tokenName, name ) ) + return recoverFromError( tokenObjectEnd ); + + Token colon; + if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator ) + { + return addErrorAndRecover( "Missing ':' after object member name", + colon, + tokenObjectEnd ); + } + Value &value = currentValue()[ name ]; + nodes_.push( &value ); + bool ok = readValue(); + nodes_.pop(); + if ( !ok ) // error already set + return recoverFromError( tokenObjectEnd ); + + Token comma; + if ( !readToken( comma ) + || ( comma.type_ != tokenObjectEnd && + comma.type_ != tokenArraySeparator && + comma.type_ != tokenComment ) ) + { + return addErrorAndRecover( "Missing ',' or '}' in object declaration", + comma, + tokenObjectEnd ); + } + bool finalizeTokenOk = true; + while ( comma.type_ == tokenComment && + finalizeTokenOk ) + finalizeTokenOk = readToken( comma ); + if ( comma.type_ == tokenObjectEnd ) + return true; + } + return addErrorAndRecover( "Missing '}' or object member name", + tokenName, + tokenObjectEnd ); +} + + +bool +Reader::readArray( Token &tokenStart ) +{ + currentValue() = Value( arrayValue ); + skipSpaces(); + if ( *current_ == ']' ) // empty array + { + Token endArray; + readToken( endArray ); + return true; + } + int index = 0; + while ( true ) + { + Value &value = currentValue()[ index++ ]; + nodes_.push( &value ); + bool ok = readValue(); + nodes_.pop(); + if ( !ok ) // error already set + return recoverFromError( tokenArrayEnd ); + + Token token; + // Accept Comment after last item in the array. + ok = readToken( token ); + while ( token.type_ == tokenComment && ok ) + { + ok = readToken( token ); + } + bool badTokenType = ( token.type_ == tokenArraySeparator && + token.type_ == tokenArrayEnd ); + if ( !ok || badTokenType ) + { + return addErrorAndRecover( "Missing ',' or ']' in array declaration", + token, + tokenArrayEnd ); + } + if ( token.type_ == tokenArrayEnd ) + break; + } + return true; +} + + +bool +Reader::decodeNumber( Token &token ) +{ + bool isDouble = false; + for ( Location inspect = token.start_; inspect != token.end_; ++inspect ) + { + isDouble = isDouble + || in( *inspect, '.', 'e', 'E', '+' ) + || ( *inspect == '-' && inspect != token.start_ ); + } + if ( isDouble ) + return decodeDouble( token ); + // Attempts to parse the number as an integer. If the number is + // larger than the maximum supported value of an integer then + // we decode the number as a double. + Location current = token.start_; + bool isNegative = *current == '-'; + if ( isNegative ) + ++current; + Value::UInt maxIntegerValue = isNegative ? Value::UInt(-Value::minInt) + : Value::maxUInt; + Value::UInt threshold = maxIntegerValue / 10; + Value::UInt lastDigitThreshold = maxIntegerValue % 10; + assert( lastDigitThreshold >=0 && lastDigitThreshold <= 9 ); + Value::UInt value = 0; + while ( current < token.end_ ) + { + Char c = *current++; + if ( c < '0' || c > '9' ) + return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); + Value::UInt digit(c - '0'); + if ( value >= threshold ) + { + // If the current digit is not the last one, or if it is + // greater than the last digit of the maximum integer value, + // the parse the number as a double. + if ( current != token.end_ || digit > lastDigitThreshold ) + { + return decodeDouble( token ); + } + } + value = value * 10 + digit; + } + if ( isNegative ) + currentValue() = -Value::Int( value ); + else if ( value <= Value::UInt(Value::maxInt) ) + currentValue() = Value::Int( value ); + else + currentValue() = value; + return true; +} + + +bool +Reader::decodeDouble( Token &token ) +{ + double value = 0; + const int bufferSize = 32; + int count; + int length = int(token.end_ - token.start_); + if ( length <= bufferSize ) + { + Char buffer[bufferSize]; + memcpy( buffer, token.start_, length ); + buffer[length] = 0; + count = sscanf( buffer, "%lf", &value ); + } + else + { + std::string buffer( token.start_, token.end_ ); + count = sscanf( buffer.c_str(), "%lf", &value ); + } + + if ( count != 1 ) + return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); + currentValue() = value; + return true; +} + + +bool +Reader::decodeString( Token &token ) +{ + std::string decoded; + if ( !decodeString( token, decoded ) ) + return false; + currentValue() = decoded; + return true; +} + + +bool +Reader::decodeString( Token &token, std::string &decoded ) +{ + decoded.reserve( token.end_ - token.start_ - 2 ); + Location current = token.start_ + 1; // skip '"' + Location end = token.end_ - 1; // do not include '"' + while ( current != end ) + { + Char c = *current++; + if ( c == '"' ) + break; + else if ( c == '\\' ) + { + if ( current == end ) + return addError( "Empty escape sequence in string", token, current ); + Char escape = *current++; + switch ( escape ) + { + case '"': decoded += '"'; break; + case '/': decoded += '/'; break; + case '\\': decoded += '\\'; break; + case 'b': decoded += '\b'; break; + case 'f': decoded += '\f'; break; + case 'n': decoded += '\n'; break; + case 'r': decoded += '\r'; break; + case 't': decoded += '\t'; break; + case 'u': + { + unsigned int unicode; + if ( !decodeUnicodeCodePoint( token, current, end, unicode ) ) + return false; + decoded += codePointToUTF8(unicode); + } + break; + default: + return addError( "Bad escape sequence in string", token, current ); + } + } + else + { + decoded += c; + } + } + return true; +} + +bool +Reader::decodeUnicodeCodePoint( Token &token, + Location ¤t, + Location end, + unsigned int &unicode ) +{ + + if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) ) + return false; + if (unicode >= 0xD800 && unicode <= 0xDBFF) + { + // surrogate pairs + if (end - current < 6) + return addError( "additional six characters expected to parse unicode surrogate pair.", token, current ); + unsigned int surrogatePair; + if (*(current++) == '\\' && *(current++)== 'u') + { + if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair )) + { + unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); + } + else + return false; + } + else + return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current ); + } + return true; +} + +bool +Reader::decodeUnicodeEscapeSequence( Token &token, + Location ¤t, + Location end, + unsigned int &unicode ) +{ + if ( end - current < 4 ) + return addError( "Bad unicode escape sequence in string: four digits expected.", token, current ); + unicode = 0; + for ( int index =0; index < 4; ++index ) + { + Char c = *current++; + unicode *= 16; + if ( c >= '0' && c <= '9' ) + unicode += c - '0'; + else if ( c >= 'a' && c <= 'f' ) + unicode += c - 'a' + 10; + else if ( c >= 'A' && c <= 'F' ) + unicode += c - 'A' + 10; + else + return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current ); + } + return true; +} + + +bool +Reader::addError( const std::string &message, + Token &token, + Location extra ) +{ + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = extra; + errors_.push_back( info ); + return false; +} + + +bool +Reader::recoverFromError( TokenType skipUntilToken ) +{ + int errorCount = int(errors_.size()); + Token skip; + while ( true ) + { + if ( !readToken(skip) ) + errors_.resize( errorCount ); // discard errors caused by recovery + if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream ) + break; + } + errors_.resize( errorCount ); + return false; +} + + +bool +Reader::addErrorAndRecover( const std::string &message, + Token &token, + TokenType skipUntilToken ) +{ + addError( message, token ); + return recoverFromError( skipUntilToken ); +} + + +Value & +Reader::currentValue() +{ + return *(nodes_.top()); +} + + +Reader::Char +Reader::getNextChar() +{ + if ( current_ == end_ ) + return 0; + return *current_++; +} + + +void +Reader::getLocationLineAndColumn( Location location, + int &line, + int &column ) const +{ + Location current = begin_; + Location lastLineStart = current; + line = 0; + while ( current < location && current != end_ ) + { + Char c = *current++; + if ( c == '\r' ) + { + if ( *current == '\n' ) + ++current; + lastLineStart = current; + ++line; + } + else if ( c == '\n' ) + { + lastLineStart = current; + ++line; + } + } + // column & line start at 1 + column = int(location - lastLineStart) + 1; + ++line; +} + + +std::string +Reader::getLocationLineAndColumn( Location location ) const +{ + int line, column; + getLocationLineAndColumn( location, line, column ); + char buffer[18+16+16+1]; + sprintf( buffer, "Line %d, Column %d", line, column ); + return buffer; +} + + +std::string +Reader::getFormatedErrorMessages() const +{ + std::string formattedMessage; + for ( Errors::const_iterator itError = errors_.begin(); + itError != errors_.end(); + ++itError ) + { + const ErrorInfo &error = *itError; + formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n"; + formattedMessage += " " + error.message_ + "\n"; + if ( error.extra_ ) + formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n"; + } + return formattedMessage; +} + + +std::istream& operator>>( std::istream &sin, Value &root ) +{ + Json::Reader reader; + bool ok = reader.parse(sin, root, true); + //JSON_ASSERT( ok ); + if (!ok) throw std::runtime_error(reader.getFormatedErrorMessages()); + return sin; +} + + +} // namespace Json diff --git a/src/third_party/jsoncpp/json_reader.h b/src/third_party/jsoncpp/json_reader.h new file mode 100644 index 0000000..4caa102 --- /dev/null +++ b/src/third_party/jsoncpp/json_reader.h @@ -0,0 +1,201 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef CPPTL_JSON_READER_H_INCLUDED +# define CPPTL_JSON_READER_H_INCLUDED + +# include "json_features.h" +# include "json_value.h" +# include +# include +# include +# include + +namespace Json { + + /** \brief Unserialize a JSON document into a Value. + * + */ + class JSON_API Reader + { + public: + typedef char Char; + typedef const Char *Location; + + /** \brief Constructs a Reader allowing all features + * for parsing. + */ + Reader(); + + /** \brief Constructs a Reader allowing the specified feature set + * for parsing. + */ + Reader( const Features &features ); + + /** \brief Read a Value from a JSON document. + * \param document UTF-8 encoded string containing the document to read. + * \param root [out] Contains the root value of the document if it was + * successfully parsed. + * \param collectComments \c true to collect comment and allow writing them back during + * serialization, \c false to discard comments. + * This parameter is ignored if Features::allowComments_ + * is \c false. + * \return \c true if the document was successfully parsed, \c false if an error occurred. + */ + bool parse( const std::string &document, + Value &root, + bool collectComments = true ); + + /** \brief Read a Value from a JSON document. + * \param document UTF-8 encoded string containing the document to read. + * \param root [out] Contains the root value of the document if it was + * successfully parsed. + * \param collectComments \c true to collect comment and allow writing them back during + * serialization, \c false to discard comments. + * This parameter is ignored if Features::allowComments_ + * is \c false. + * \return \c true if the document was successfully parsed, \c false if an error occurred. + */ + bool parse( const char *beginDoc, const char *endDoc, + Value &root, + bool collectComments = true ); + + /// \brief Parse from input stream. + /// \see Json::operator>>(std::istream&, Json::Value&). + bool parse( std::istream &is, + Value &root, + bool collectComments = true ); + + /** \brief Returns a user friendly string that list errors in the parsed document. + * \return Formatted error message with the list of errors with their location in + * the parsed document. An empty string is returned if no error occurred + * during parsing. + */ + std::string getFormatedErrorMessages() const; + + private: + enum TokenType + { + tokenEndOfStream = 0, + tokenObjectBegin, + tokenObjectEnd, + tokenArrayBegin, + tokenArrayEnd, + tokenString, + tokenNumber, + tokenTrue, + tokenFalse, + tokenNull, + tokenArraySeparator, + tokenMemberSeparator, + tokenComment, + tokenError + }; + + class Token + { + public: + TokenType type_; + Location start_; + Location end_; + }; + + class ErrorInfo + { + public: + Token token_; + std::string message_; + Location extra_; + }; + + typedef std::deque Errors; + + bool expectToken( TokenType type, Token &token, const char *message ); + bool readToken( Token &token ); + void skipSpaces(); + bool match( Location pattern, + int patternLength ); + bool readComment(); + bool readCStyleComment(); + bool readCppStyleComment(); + bool readString(); + void readNumber(); + bool readValue(); + bool readObject( Token &token ); + bool readArray( Token &token ); + bool decodeNumber( Token &token ); + bool decodeString( Token &token ); + bool decodeString( Token &token, std::string &decoded ); + bool decodeDouble( Token &token ); + bool decodeUnicodeCodePoint( Token &token, + Location ¤t, + Location end, + unsigned int &unicode ); + bool decodeUnicodeEscapeSequence( Token &token, + Location ¤t, + Location end, + unsigned int &unicode ); + bool addError( const std::string &message, + Token &token, + Location extra = 0 ); + bool recoverFromError( TokenType skipUntilToken ); + bool addErrorAndRecover( const std::string &message, + Token &token, + TokenType skipUntilToken ); + void skipUntilSpace(); + Value ¤tValue(); + Char getNextChar(); + void getLocationLineAndColumn( Location location, + int &line, + int &column ) const; + std::string getLocationLineAndColumn( Location location ) const; + void addComment( Location begin, + Location end, + CommentPlacement placement ); + void skipCommentTokens( Token &token ); + + typedef std::stack Nodes; + Nodes nodes_; + Errors errors_; + std::string document_; + Location begin_; + Location end_; + Location current_; + Location lastValueEnd_; + Value *lastValue_; + std::string commentsBefore_; + Features features_; + bool collectComments_; + }; + + /** \brief Read from 'sin' into 'root'. + + Always keep comments from the input JSON. + + This can be used to read a file into a particular sub-object. + For example: + \code + Json::Value root; + cin >> root["dir"]["file"]; + cout << root; + \endcode + Result: + \verbatim + { + "dir": { + "file": { + // The input stream JSON would be nested here. + } + } + } + \endverbatim + \throw std::exception on parse error. + \see Json::operator<<() + */ + std::istream& operator>>( std::istream&, Value& ); + +} // namespace Json + +#endif // CPPTL_JSON_READER_H_INCLUDED diff --git a/src/third_party/jsoncpp/json_tool.h b/src/third_party/jsoncpp/json_tool.h new file mode 100644 index 0000000..c20639d --- /dev/null +++ b/src/third_party/jsoncpp/json_tool.h @@ -0,0 +1,93 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef LIB_JSONCPP_JSON_TOOL_H_INCLUDED +# define LIB_JSONCPP_JSON_TOOL_H_INCLUDED + +/* This header provides common string manipulation support, such as UTF-8, + * portable conversion from/to string... + * + * It is an internal header that must not be exposed. + */ + +namespace Json { + +/// Converts a unicode code-point to UTF-8. +static inline std::string +codePointToUTF8(unsigned int cp) +{ + std::string result; + + // based on description from http://en.wikipedia.org/wiki/UTF-8 + + if (cp <= 0x7f) + { + result.resize(1); + result[0] = static_cast(cp); + } + else if (cp <= 0x7FF) + { + result.resize(2); + result[1] = static_cast(0x80 | (0x3f & cp)); + result[0] = static_cast(0xC0 | (0x1f & (cp >> 6))); + } + else if (cp <= 0xFFFF) + { + result.resize(3); + result[2] = static_cast(0x80 | (0x3f & cp)); + result[1] = 0x80 | static_cast((0x3f & (cp >> 6))); + result[0] = 0xE0 | static_cast((0xf & (cp >> 12))); + } + else if (cp <= 0x10FFFF) + { + result.resize(4); + result[3] = static_cast(0x80 | (0x3f & cp)); + result[2] = static_cast(0x80 | (0x3f & (cp >> 6))); + result[1] = static_cast(0x80 | (0x3f & (cp >> 12))); + result[0] = static_cast(0xF0 | (0x7 & (cp >> 18))); + } + + return result; +} + + +/// Returns true if ch is a control character (in range [0,32[). +static inline bool +isControlCharacter(char ch) +{ + return ch > 0 && ch <= 0x1F; +} + + +enum { + /// Constant that specify the size of the buffer that must be passed to uintToString. + uintToStringBufferSize = 3*sizeof(UInt)+1 +}; + +// Defines a char buffer for use with uintToString(). +typedef char UIntToStringBuffer[uintToStringBufferSize]; + + +/** Converts an unsigned integer to string. + * @param value Unsigned interger to convert to string + * @param current Input/Output string buffer. + * Must have at least uintToStringBufferSize chars free. + */ +static inline void +uintToString( UInt value, + char *¤t ) +{ + *--current = 0; + do + { + *--current = char(value % 10) + '0'; + value /= 10; + } + while ( value != 0 ); +} + +} // namespace Json { + +#endif // LIB_JSONCPP_JSON_TOOL_H_INCLUDED diff --git a/src/third_party/jsoncpp/json_value.cpp b/src/third_party/jsoncpp/json_value.cpp new file mode 100644 index 0000000..69d0c78 --- /dev/null +++ b/src/third_party/jsoncpp/json_value.cpp @@ -0,0 +1,1701 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#include +#include "json_value.h" +#include "json_writer.h" +#include +#include +#include +#include +#ifdef JSON_USE_CPPTL +# include +#endif +#include // size_t +#ifndef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR +# include "json_batchallocator.h" +#endif // #ifndef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR + +#define JSON_ASSERT_UNREACHABLE assert( false ) +#define JSON_ASSERT( condition ) assert( condition ); // @todo <= change this into an exception throw +#define JSON_ASSERT_MESSAGE( condition, message ) if (!( condition )) throw std::runtime_error( message ); + +namespace Json { + +const Value Value::null; +const Int Value::minInt = Int( ~(UInt(-1)/2) ); +const Int Value::maxInt = Int( UInt(-1)/2 ); +const UInt Value::maxUInt = UInt(-1); + +/// Unknown size marker +enum { unknown = (unsigned)-1 }; + + +/** Duplicates the specified string value. + * @param value Pointer to the string to duplicate. Must be zero-terminated if + * length is "unknown". + * @param length Length of the value. if equals to unknown, then it will be + * computed using strlen(value). + * @return Pointer on the duplicate instance of string. + */ +static inline char * +duplicateStringValue( const char *value, + unsigned int length = unknown ) +{ + if ( length == unknown ) + length = (unsigned int)strlen(value); + char *newString = static_cast( malloc( length + 1 ) ); + memcpy( newString, value, length ); + newString[length] = 0; + return newString; +} + + +/** Free the string duplicated by duplicateStringValue(). + */ +static inline void +releaseStringValue( char *value ) +{ + if ( value ) + free( value ); +} + + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ValueInternals... +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +#ifdef JSON_VALUE_USE_INTERNAL_MAP +# include "json_internalarray.inl" +# include "json_internalmap.inl" +#endif // JSON_VALUE_USE_INTERNAL_MAP + +# include "json_valueiterator.inl" + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class Value::CommentInfo +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + + +Value::CommentInfo::CommentInfo() + : comment_( 0 ) +{ +} + +Value::CommentInfo::~CommentInfo() +{ + if ( comment_ ) + releaseStringValue( comment_ ); +} + + +void +Value::CommentInfo::setComment( const char *text ) +{ + if ( comment_ ) + releaseStringValue( comment_ ); + JSON_ASSERT( text ); + JSON_ASSERT_MESSAGE( text[0]=='\0' || text[0]=='/', "Comments must start with /"); + // It seems that /**/ style comments are acceptable as well. + comment_ = duplicateStringValue( text ); +} + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class Value::CZString +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +# ifndef JSON_VALUE_USE_INTERNAL_MAP + +// Notes: index_ indicates if the string was allocated when +// a string is stored. + +Value::CZString::CZString( ArrayIndex index ) + : cstr_( 0 ) + , index_( index ) +{ +} + +Value::CZString::CZString( const char *cstr, DuplicationPolicy allocate ) + : cstr_( allocate == duplicate ? duplicateStringValue(cstr) + : cstr ) + , index_( allocate ) +{ +} + +Value::CZString::CZString( const CZString &other ) +: cstr_( other.index_ != noDuplication && other.cstr_ != 0 + ? duplicateStringValue( other.cstr_ ) + : other.cstr_ ) + , index_( other.cstr_ ? (other.index_ == noDuplication ? noDuplication : duplicate) + : other.index_ ) +{ +} + +Value::CZString::~CZString() +{ + if ( cstr_ && index_ == duplicate ) + releaseStringValue( const_cast( cstr_ ) ); +} + +void +Value::CZString::swap( CZString &other ) +{ + std::swap( cstr_, other.cstr_ ); + std::swap( index_, other.index_ ); +} + +Value::CZString & +Value::CZString::operator =( const CZString &other ) +{ + CZString temp( other ); + swap( temp ); + return *this; +} + +bool +Value::CZString::operator<( const CZString &other ) const +{ + if ( cstr_ ) + return strcmp( cstr_, other.cstr_ ) < 0; + return index_ < other.index_; +} + +bool +Value::CZString::operator==( const CZString &other ) const +{ + if ( cstr_ ) + return strcmp( cstr_, other.cstr_ ) == 0; + return index_ == other.index_; +} + + +ArrayIndex +Value::CZString::index() const +{ + return index_; +} + + +const char * +Value::CZString::c_str() const +{ + return cstr_; +} + +bool +Value::CZString::isStaticString() const +{ + return index_ == noDuplication; +} + +#endif // ifndef JSON_VALUE_USE_INTERNAL_MAP + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class Value::Value +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +/*! \internal Default constructor initialization must be equivalent to: + * memset( this, 0, sizeof(Value) ) + * This optimization is used in ValueInternalMap fast allocator. + */ +Value::Value( ValueType type ) + : type_( type ) + , allocated_( 0 ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + switch ( type ) + { + case nullValue: + break; + case intValue: + case uintValue: + value_.int_ = 0; + break; + case realValue: + value_.real_ = 0.0; + break; + case stringValue: + value_.string_ = 0; + break; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + value_.map_ = new ObjectValues(); + break; +#else + case arrayValue: + value_.array_ = arrayAllocator()->newArray(); + break; + case objectValue: + value_.map_ = mapAllocator()->newMap(); + break; +#endif + case booleanValue: + value_.bool_ = false; + break; + default: + JSON_ASSERT_UNREACHABLE; + } +} + + +#if !defined(JSON_NO_INT64) +Value::Value( ArrayIndex value ) + : type_( uintValue ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.uint_ = value; +} + +Value::Value( int value ) + : type_( intValue ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.int_ = value; +} + +#endif // if !defined(JSON_NO_INT64) + + +Value::Value( Int value ) + : type_( intValue ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.int_ = value; +} + + +Value::Value( UInt value ) + : type_( uintValue ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.uint_ = value; +} + +Value::Value( double value ) + : type_( realValue ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.real_ = value; +} + +Value::Value( const char *value ) + : type_( stringValue ) + , allocated_( true ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.string_ = duplicateStringValue( value ); +} + + +Value::Value( const char *beginValue, + const char *endValue ) + : type_( stringValue ) + , allocated_( true ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.string_ = duplicateStringValue( beginValue, + (unsigned int)(endValue - beginValue) ); +} + + +Value::Value( const std::string &value ) + : type_( stringValue ) + , allocated_( true ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.string_ = duplicateStringValue( value.c_str(), + (unsigned int)value.length() ); + +} + +Value::Value( const StaticString &value ) + : type_( stringValue ) + , allocated_( false ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.string_ = const_cast( value.c_str() ); +} + + +# ifdef JSON_USE_CPPTL +Value::Value( const CppTL::ConstString &value ) + : type_( stringValue ) + , allocated_( true ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.string_ = duplicateStringValue( value, value.length() ); +} +# endif + +Value::Value( bool value ) + : type_( booleanValue ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.bool_ = value; +} + + +Value::Value( const Value &other ) + : type_( other.type_ ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + switch ( type_ ) + { + case nullValue: + case intValue: + case uintValue: + case realValue: + case booleanValue: + value_ = other.value_; + break; + case stringValue: + if ( other.value_.string_ ) + { + value_.string_ = duplicateStringValue( other.value_.string_ ); + allocated_ = true; + } + else + value_.string_ = 0; + break; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + value_.map_ = new ObjectValues( *other.value_.map_ ); + break; +#else + case arrayValue: + value_.array_ = arrayAllocator()->newArrayCopy( *other.value_.array_ ); + break; + case objectValue: + value_.map_ = mapAllocator()->newMapCopy( *other.value_.map_ ); + break; +#endif + default: + JSON_ASSERT_UNREACHABLE; + } + if ( other.comments_ ) + { + comments_ = new CommentInfo[numberOfCommentPlacement]; + for ( int comment =0; comment < numberOfCommentPlacement; ++comment ) + { + const CommentInfo &otherComment = other.comments_[comment]; + if ( otherComment.comment_ ) + comments_[comment].setComment( otherComment.comment_ ); + } + } +} + + +Value::~Value() +{ + switch ( type_ ) + { + case nullValue: + case intValue: + case uintValue: + case realValue: + case booleanValue: + break; + case stringValue: + if ( allocated_ ) + releaseStringValue( value_.string_ ); + break; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + delete value_.map_; + break; +#else + case arrayValue: + arrayAllocator()->destructArray( value_.array_ ); + break; + case objectValue: + mapAllocator()->destructMap( value_.map_ ); + break; +#endif + default: + JSON_ASSERT_UNREACHABLE; + } + + if ( comments_ ) + delete[] comments_; +} + +Value & +Value::operator=( const Value &other ) +{ + Value temp( other ); + swap( temp ); + return *this; +} + +void +Value::swap( Value &other ) +{ + ValueType temp = type_; + type_ = other.type_; + other.type_ = temp; + std::swap( value_, other.value_ ); + int temp2 = allocated_; + allocated_ = other.allocated_; + other.allocated_ = temp2; +} + +ValueType +Value::type() const +{ + return type_; +} + + +int +Value::compare( const Value &other ) +{ + /* + int typeDelta = other.type_ - type_; + switch ( type_ ) + { + case nullValue: + + return other.type_ == type_; + case intValue: + if ( other.type_.isNumeric() + case uintValue: + case realValue: + case booleanValue: + break; + case stringValue, + break; + case arrayValue: + delete value_.array_; + break; + case objectValue: + delete value_.map_; + default: + JSON_ASSERT_UNREACHABLE; + } + */ + return 0; // unreachable +} + +bool +Value::operator <( const Value &other ) const +{ + int typeDelta = type_ - other.type_; + if ( typeDelta ) + return typeDelta < 0 ? true : false; + switch ( type_ ) + { + case nullValue: + return false; + case intValue: + return value_.int_ < other.value_.int_; + case uintValue: + return value_.uint_ < other.value_.uint_; + case realValue: + return value_.real_ < other.value_.real_; + case booleanValue: + return value_.bool_ < other.value_.bool_; + case stringValue: + return ( value_.string_ == 0 && other.value_.string_ ) + || ( other.value_.string_ + && value_.string_ + && strcmp( value_.string_, other.value_.string_ ) < 0 ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + { + int delta = int( value_.map_->size() - other.value_.map_->size() ); + if ( delta ) + return delta < 0; + return (*value_.map_) < (*other.value_.map_); + } +#else + case arrayValue: + return value_.array_->compare( *(other.value_.array_) ) < 0; + case objectValue: + return value_.map_->compare( *(other.value_.map_) ) < 0; +#endif + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable +} + +bool +Value::operator <=( const Value &other ) const +{ + return !(other > *this); +} + +bool +Value::operator >=( const Value &other ) const +{ + return !(*this < other); +} + +bool +Value::operator >( const Value &other ) const +{ + return other < *this; +} + +bool +Value::operator ==( const Value &other ) const +{ + //if ( type_ != other.type_ ) + // GCC 2.95.3 says: + // attempt to take address of bit-field structure member `Json::Value::type_' + // Beats me, but a temp solves the problem. + int temp = other.type_; + if ( type_ != temp ) + return false; + switch ( type_ ) + { + case nullValue: + return true; + case intValue: + return value_.int_ == other.value_.int_; + case uintValue: + return value_.uint_ == other.value_.uint_; + case realValue: + return value_.real_ == other.value_.real_; + case booleanValue: + return value_.bool_ == other.value_.bool_; + case stringValue: + return ( value_.string_ == other.value_.string_ ) + || ( other.value_.string_ + && value_.string_ + && strcmp( value_.string_, other.value_.string_ ) == 0 ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + return value_.map_->size() == other.value_.map_->size() + && (*value_.map_) == (*other.value_.map_); +#else + case arrayValue: + return value_.array_->compare( *(other.value_.array_) ) == 0; + case objectValue: + return value_.map_->compare( *(other.value_.map_) ) == 0; +#endif + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable +} + +bool +Value::operator !=( const Value &other ) const +{ + return !( *this == other ); +} + +const char * +Value::asCString() const +{ + JSON_ASSERT( type_ == stringValue ); + return value_.string_; +} + + +std::string +Value::asString() const +{ + switch ( type_ ) + { + case nullValue: + return ""; + case stringValue: + return value_.string_ ? value_.string_ : ""; + case booleanValue: + return value_.bool_ ? "true" : "false"; + case intValue: + case uintValue: + case realValue: + case arrayValue: + case objectValue: + JSON_ASSERT_MESSAGE( false, "Type is not convertible to string" ); + default: + JSON_ASSERT_UNREACHABLE; + } + return ""; // unreachable +} + +# ifdef JSON_USE_CPPTL +CppTL::ConstString +Value::asConstString() const +{ + return CppTL::ConstString( asString().c_str() ); +} +# endif + +Value::Int +Value::asInt() const +{ + switch ( type_ ) + { + case nullValue: + return 0; + case intValue: + return value_.int_; + case uintValue: + JSON_ASSERT_MESSAGE( value_.uint_ < (unsigned)maxInt, "integer out of signed integer range" ); + return value_.uint_; + case realValue: + JSON_ASSERT_MESSAGE( value_.real_ >= minInt && value_.real_ <= maxInt, "Real out of signed integer range" ); + return Int( value_.real_ ); + case booleanValue: + return value_.bool_ ? 1 : 0; + case stringValue: + case arrayValue: + case objectValue: + JSON_ASSERT_MESSAGE( false, "Type is not convertible to int" ); + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable; +} + +Value::UInt +Value::asUInt() const +{ + switch ( type_ ) + { + case nullValue: + return 0; + case intValue: + JSON_ASSERT_MESSAGE( value_.int_ >= 0, "Negative integer can not be converted to unsigned integer" ); + return value_.int_; + case uintValue: + return value_.uint_; + case realValue: + JSON_ASSERT_MESSAGE( value_.real_ >= 0 && value_.real_ <= maxUInt, "Real out of unsigned integer range" ); + return UInt( value_.real_ ); + case booleanValue: + return value_.bool_ ? 1 : 0; + case stringValue: + case arrayValue: + case objectValue: + JSON_ASSERT_MESSAGE( false, "Type is not convertible to uint" ); + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable; +} + +double +Value::asDouble() const +{ + switch ( type_ ) + { + case nullValue: + return 0.0; + case intValue: + return static_cast( value_.int_ ); + case uintValue: +#if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) + return static_cast( value_.uint_ ); +#else // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) + return static_cast( Int(value_.uint_/2) ) * 2 + Int(value_.uint_ & 1); +#endif // if !defined(JSON_USE_INT64_DOUBLE_CONVERSION) + case realValue: + return value_.real_; + case booleanValue: + return value_.bool_ ? 1.0 : 0.0; + case stringValue: + case arrayValue: + case objectValue: + JSON_ASSERT_MESSAGE( false, "Type is not convertible to double" ); + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable; +} + +bool +Value::asBool() const +{ + switch ( type_ ) + { + case nullValue: + return false; + case intValue: + case uintValue: + return value_.int_ != 0; + case realValue: + return value_.real_ != 0.0; + case booleanValue: + return value_.bool_; + case stringValue: + return value_.string_ && value_.string_[0] != 0; + case arrayValue: + case objectValue: + return value_.map_->size() != 0; + default: + JSON_ASSERT_UNREACHABLE; + } + return false; // unreachable; +} + + +bool +Value::isConvertibleTo( ValueType other ) const +{ + switch ( type_ ) + { + case nullValue: + return true; + case intValue: + return ( other == nullValue && value_.int_ == 0 ) + || other == intValue + || ( other == uintValue && value_.int_ >= 0 ) + || other == realValue + || other == stringValue + || other == booleanValue; + case uintValue: + return ( other == nullValue && value_.uint_ == 0 ) + || ( other == intValue && value_.uint_ <= (unsigned)maxInt ) + || other == uintValue + || other == realValue + || other == stringValue + || other == booleanValue; + case realValue: + return ( other == nullValue && value_.real_ == 0.0 ) + || ( other == intValue && value_.real_ >= minInt && value_.real_ <= maxInt ) + || ( other == uintValue && value_.real_ >= 0 && value_.real_ <= maxUInt ) + || other == realValue + || other == stringValue + || other == booleanValue; + case booleanValue: + return ( other == nullValue && value_.bool_ == false ) + || other == intValue + || other == uintValue + || other == realValue + || other == stringValue + || other == booleanValue; + case stringValue: + return other == stringValue + || ( other == nullValue && (!value_.string_ || value_.string_[0] == 0) ); + case arrayValue: + return other == arrayValue + || ( other == nullValue && value_.map_->size() == 0 ); + case objectValue: + return other == objectValue + || ( other == nullValue && value_.map_->size() == 0 ); + default: + JSON_ASSERT_UNREACHABLE; + } + return false; // unreachable; +} + + +/// Number of values in array or object +ArrayIndex +Value::size() const +{ + switch ( type_ ) + { + case nullValue: + case intValue: + case uintValue: + case realValue: + case booleanValue: + case stringValue: + return 0; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: // size of the array is highest index + 1 + if ( !value_.map_->empty() ) + { + ObjectValues::const_iterator itLast = value_.map_->end(); + --itLast; + return (*itLast).first.index()+1; + } + return 0; + case objectValue: + return ArrayIndex( value_.map_->size() ); +#else + case arrayValue: + return Int( value_.array_->size() ); + case objectValue: + return Int( value_.map_->size() ); +#endif + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable; +} + + +bool +Value::empty() const +{ + if ( isNull() || isArray() || isObject() ) + return size() == 0u; + else + return false; +} + + +bool +Value::operator!() const +{ + return isNull(); +} + + +void +Value::clear() +{ + JSON_ASSERT( type_ == nullValue || type_ == arrayValue || type_ == objectValue ); + + switch ( type_ ) + { +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + value_.map_->clear(); + break; +#else + case arrayValue: + value_.array_->clear(); + break; + case objectValue: + value_.map_->clear(); + break; +#endif + default: + break; + } +} + +void +Value::resize( ArrayIndex newSize ) +{ + JSON_ASSERT( type_ == nullValue || type_ == arrayValue ); + if ( type_ == nullValue ) + *this = Value( arrayValue ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + ArrayIndex oldSize = size(); + if ( newSize == 0 ) + clear(); + else if ( newSize > oldSize ) + (*this)[ newSize - 1 ]; + else + { + for ( ArrayIndex index = newSize; index < oldSize; ++index ) + { + value_.map_->erase( index ); + } + assert( size() == newSize ); + } +#else + value_.array_->resize( newSize ); +#endif +} + + +Value & +Value::operator[]( ArrayIndex index ) +{ + JSON_ASSERT( type_ == nullValue || type_ == arrayValue ); + if ( type_ == nullValue ) + *this = Value( arrayValue ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + CZString key( index ); + ObjectValues::iterator it = value_.map_->lower_bound( key ); + if ( it != value_.map_->end() && (*it).first == key ) + return (*it).second; + + ObjectValues::value_type defaultValue( key, null ); + it = value_.map_->insert( it, defaultValue ); + return (*it).second; +#else + return value_.array_->resolveReference( index ); +#endif +} + + +const Value & +Value::operator[]( ArrayIndex index ) const +{ + JSON_ASSERT( type_ == nullValue || type_ == arrayValue ); + if ( type_ == nullValue ) + return null; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + CZString key( index ); + ObjectValues::const_iterator it = value_.map_->find( key ); + if ( it == value_.map_->end() ) + return null; + return (*it).second; +#else + Value *value = value_.array_->find( index ); + return value ? *value : null; +#endif +} + + +Value & +Value::operator[]( const char *key ) +{ + return resolveReference( key, false ); +} + + +Value & +Value::resolveReference( const char *key, + bool isStatic ) +{ + JSON_ASSERT( type_ == nullValue || type_ == objectValue ); + if ( type_ == nullValue ) + *this = Value( objectValue ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + CZString actualKey( key, isStatic ? CZString::noDuplication + : CZString::duplicateOnCopy ); + ObjectValues::iterator it = value_.map_->lower_bound( actualKey ); + if ( it != value_.map_->end() && (*it).first == actualKey ) + return (*it).second; + + ObjectValues::value_type defaultValue( actualKey, null ); + it = value_.map_->insert( it, defaultValue ); + Value &value = (*it).second; + return value; +#else + return value_.map_->resolveReference( key, isStatic ); +#endif +} + + +Value +Value::get( ArrayIndex index, + const Value &defaultValue ) const +{ + const Value *value = &((*this)[index]); + return value == &null ? defaultValue : *value; +} + + +bool +Value::isValidIndex( ArrayIndex index ) const +{ + return index < size(); +} + + + +const Value & +Value::operator[]( const char *key ) const +{ + JSON_ASSERT( type_ == nullValue || type_ == objectValue ); + if ( type_ == nullValue ) + return null; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + CZString actualKey( key, CZString::noDuplication ); + ObjectValues::const_iterator it = value_.map_->find( actualKey ); + if ( it == value_.map_->end() ) + return null; + return (*it).second; +#else + const Value *value = value_.map_->find( key ); + return value ? *value : null; +#endif +} + + +Value & +Value::operator[]( const std::string &key ) +{ + return (*this)[ key.c_str() ]; +} + + +const Value & +Value::operator[]( const std::string &key ) const +{ + return (*this)[ key.c_str() ]; +} + +Value & +Value::operator[]( const StaticString &key ) +{ + return resolveReference( key, true ); +} + + +# ifdef JSON_USE_CPPTL +Value & +Value::operator[]( const CppTL::ConstString &key ) +{ + return (*this)[ key.c_str() ]; +} + + +const Value & +Value::operator[]( const CppTL::ConstString &key ) const +{ + return (*this)[ key.c_str() ]; +} +# endif + + +Value & +Value::append( const Value &value ) +{ + return (*this)[size()] = value; +} + + +Value +Value::get( const char *key, + const Value &defaultValue ) const +{ + const Value *value = &((*this)[key]); + return value == &null ? defaultValue : *value; +} + + +Value +Value::get( const std::string &key, + const Value &defaultValue ) const +{ + return get( key.c_str(), defaultValue ); +} + +Value +Value::removeMember( const char* key ) +{ + JSON_ASSERT( type_ == nullValue || type_ == objectValue ); + if ( type_ == nullValue ) + return null; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + CZString actualKey( key, CZString::noDuplication ); + ObjectValues::iterator it = value_.map_->find( actualKey ); + if ( it == value_.map_->end() ) + return null; + Value old(it->second); + value_.map_->erase(it); + return old; +#else + Value *value = value_.map_->find( key ); + if (value){ + Value old(*value); + value_.map_.remove( key ); + return old; + } else { + return null; + } +#endif +} + +Value +Value::removeMember( const std::string &key ) +{ + return removeMember( key.c_str() ); +} + +# ifdef JSON_USE_CPPTL +Value +Value::get( const CppTL::ConstString &key, + const Value &defaultValue ) const +{ + return get( key.c_str(), defaultValue ); +} +# endif + +bool +Value::isMember( const char *key ) const +{ + const Value *value = &((*this)[key]); + return value != &null; +} + + +bool +Value::isMember( const std::string &key ) const +{ + return isMember( key.c_str() ); +} + + +# ifdef JSON_USE_CPPTL +bool +Value::isMember( const CppTL::ConstString &key ) const +{ + return isMember( key.c_str() ); +} +#endif + +Value::Members +Value::getMemberNames() const +{ + JSON_ASSERT( type_ == nullValue || type_ == objectValue ); + if ( type_ == nullValue ) + return Value::Members(); + Members members; + members.reserve( value_.map_->size() ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + ObjectValues::const_iterator it = value_.map_->begin(); + ObjectValues::const_iterator itEnd = value_.map_->end(); + for ( ; it != itEnd; ++it ) + members.push_back( std::string( (*it).first.c_str() ) ); +#else + ValueInternalMap::IteratorState it; + ValueInternalMap::IteratorState itEnd; + value_.map_->makeBeginIterator( it ); + value_.map_->makeEndIterator( itEnd ); + for ( ; !ValueInternalMap::equals( it, itEnd ); ValueInternalMap::increment(it) ) + members.push_back( std::string( ValueInternalMap::key( it ) ) ); +#endif + return members; +} +// +//# ifdef JSON_USE_CPPTL +//EnumMemberNames +//Value::enumMemberNames() const +//{ +// if ( type_ == objectValue ) +// { +// return CppTL::Enum::any( CppTL::Enum::transform( +// CppTL::Enum::keys( *(value_.map_), CppTL::Type() ), +// MemberNamesTransform() ) ); +// } +// return EnumMemberNames(); +//} +// +// +//EnumValues +//Value::enumValues() const +//{ +// if ( type_ == objectValue || type_ == arrayValue ) +// return CppTL::Enum::anyValues( *(value_.map_), +// CppTL::Type() ); +// return EnumValues(); +//} +// +//# endif + + +bool +Value::isNull() const +{ + return type_ == nullValue; +} + + +bool +Value::isBool() const +{ + return type_ == booleanValue; +} + + +bool +Value::isInt() const +{ + return type_ == intValue; +} + + +bool +Value::isUInt() const +{ + return type_ == uintValue; +} + + +bool +Value::isIntegral() const +{ + return type_ == intValue + || type_ == uintValue + || type_ == booleanValue; +} + + +bool +Value::isDouble() const +{ + return type_ == realValue; +} + + +bool +Value::isNumeric() const +{ + return isIntegral() || isDouble(); +} + + +bool +Value::isString() const +{ + return type_ == stringValue; +} + + +bool +Value::isArray() const +{ + return type_ == nullValue || type_ == arrayValue; +} + + +bool +Value::isObject() const +{ + return type_ == nullValue || type_ == objectValue; +} + + +void +Value::setComment( const char *comment, + CommentPlacement placement ) +{ + if ( !comments_ ) + comments_ = new CommentInfo[numberOfCommentPlacement]; + comments_[placement].setComment( comment ); +} + + +void +Value::setComment( const std::string &comment, + CommentPlacement placement ) +{ + setComment( comment.c_str(), placement ); +} + + +bool +Value::hasComment( CommentPlacement placement ) const +{ + return comments_ != 0 && comments_[placement].comment_ != 0; +} + +std::string +Value::getComment( CommentPlacement placement ) const +{ + if ( hasComment(placement) ) + return comments_[placement].comment_; + return ""; +} + + +std::string +Value::toStyledString() const +{ + StyledWriter writer; + return writer.write( *this ); +} + + +Value::const_iterator +Value::begin() const +{ + switch ( type_ ) + { +#ifdef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + if ( value_.array_ ) + { + ValueInternalArray::IteratorState it; + value_.array_->makeBeginIterator( it ); + return const_iterator( it ); + } + break; + case objectValue: + if ( value_.map_ ) + { + ValueInternalMap::IteratorState it; + value_.map_->makeBeginIterator( it ); + return const_iterator( it ); + } + break; +#else + case arrayValue: + case objectValue: + if ( value_.map_ ) + return const_iterator( value_.map_->begin() ); + break; +#endif + default: + break; + } + return const_iterator(); +} + +Value::const_iterator +Value::end() const +{ + switch ( type_ ) + { +#ifdef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + if ( value_.array_ ) + { + ValueInternalArray::IteratorState it; + value_.array_->makeEndIterator( it ); + return const_iterator( it ); + } + break; + case objectValue: + if ( value_.map_ ) + { + ValueInternalMap::IteratorState it; + value_.map_->makeEndIterator( it ); + return const_iterator( it ); + } + break; +#else + case arrayValue: + case objectValue: + if ( value_.map_ ) + return const_iterator( value_.map_->end() ); + break; +#endif + default: + break; + } + return const_iterator(); +} + + +Value::iterator +Value::begin() +{ + switch ( type_ ) + { +#ifdef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + if ( value_.array_ ) + { + ValueInternalArray::IteratorState it; + value_.array_->makeBeginIterator( it ); + return iterator( it ); + } + break; + case objectValue: + if ( value_.map_ ) + { + ValueInternalMap::IteratorState it; + value_.map_->makeBeginIterator( it ); + return iterator( it ); + } + break; +#else + case arrayValue: + case objectValue: + if ( value_.map_ ) + return iterator( value_.map_->begin() ); + break; +#endif + default: + break; + } + return iterator(); +} + +Value::iterator +Value::end() +{ + switch ( type_ ) + { +#ifdef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + if ( value_.array_ ) + { + ValueInternalArray::IteratorState it; + value_.array_->makeEndIterator( it ); + return iterator( it ); + } + break; + case objectValue: + if ( value_.map_ ) + { + ValueInternalMap::IteratorState it; + value_.map_->makeEndIterator( it ); + return iterator( it ); + } + break; +#else + case arrayValue: + case objectValue: + if ( value_.map_ ) + return iterator( value_.map_->end() ); + break; +#endif + default: + break; + } + return iterator(); +} + + +// class PathArgument +// ////////////////////////////////////////////////////////////////// + +PathArgument::PathArgument() + : kind_( kindNone ) +{ +} + + +PathArgument::PathArgument( ArrayIndex index ) + : index_( index ) + , kind_( kindIndex ) +{ +} + + +PathArgument::PathArgument( const char *key ) + : key_( key ) + , kind_( kindKey ) +{ +} + + +PathArgument::PathArgument( const std::string &key ) + : key_( key.c_str() ) + , kind_( kindKey ) +{ +} + +// class Path +// ////////////////////////////////////////////////////////////////// + +Path::Path( const std::string &path, + const PathArgument &a1, + const PathArgument &a2, + const PathArgument &a3, + const PathArgument &a4, + const PathArgument &a5 ) +{ + InArgs in; + in.push_back( &a1 ); + in.push_back( &a2 ); + in.push_back( &a3 ); + in.push_back( &a4 ); + in.push_back( &a5 ); + makePath( path, in ); +} + + +void +Path::makePath( const std::string &path, + const InArgs &in ) +{ + const char *current = path.c_str(); + const char *end = current + path.length(); + InArgs::const_iterator itInArg = in.begin(); + while ( current != end ) + { + if ( *current == '[' ) + { + ++current; + if ( *current == '%' ) + addPathInArg( path, in, itInArg, PathArgument::kindIndex ); + else + { + ArrayIndex index = 0; + for ( ; current != end && *current >= '0' && *current <= '9'; ++current ) + index = index * 10 + ArrayIndex(*current - '0'); + args_.push_back( index ); + } + if ( current == end || *current++ != ']' ) + invalidPath( path, int(current - path.c_str()) ); + } + else if ( *current == '%' ) + { + addPathInArg( path, in, itInArg, PathArgument::kindKey ); + ++current; + } + else if ( *current == '.' ) + { + ++current; + } + else + { + const char *beginName = current; + while ( current != end && !strchr( "[.", *current ) ) + ++current; + args_.push_back( std::string( beginName, current ) ); + } + } +} + + +void +Path::addPathInArg( const std::string &path, + const InArgs &in, + InArgs::const_iterator &itInArg, + PathArgument::Kind kind ) +{ + if ( itInArg == in.end() ) + { + // Error: missing argument %d + } + else if ( (*itInArg)->kind_ != kind ) + { + // Error: bad argument type + } + else + { + args_.push_back( **itInArg ); + } +} + + +void +Path::invalidPath( const std::string &path, + int location ) +{ + // Error: invalid path. +} + + +const Value & +Path::resolve( const Value &root ) const +{ + const Value *node = &root; + for ( Args::const_iterator it = args_.begin(); it != args_.end(); ++it ) + { + const PathArgument &arg = *it; + if ( arg.kind_ == PathArgument::kindIndex ) + { + if ( !node->isArray() || node->isValidIndex( arg.index_ ) ) + { + // Error: unable to resolve path (array value expected at position... + } + node = &((*node)[arg.index_]); + } + else if ( arg.kind_ == PathArgument::kindKey ) + { + if ( !node->isObject() ) + { + // Error: unable to resolve path (object value expected at position...) + } + node = &((*node)[arg.key_]); + if ( node == &Value::null ) + { + // Error: unable to resolve path (object has no member named '' at position...) + } + } + } + return *node; +} + + +Value +Path::resolve( const Value &root, + const Value &defaultValue ) const +{ + const Value *node = &root; + for ( Args::const_iterator it = args_.begin(); it != args_.end(); ++it ) + { + const PathArgument &arg = *it; + if ( arg.kind_ == PathArgument::kindIndex ) + { + if ( !node->isArray() || node->isValidIndex( arg.index_ ) ) + return defaultValue; + node = &((*node)[arg.index_]); + } + else if ( arg.kind_ == PathArgument::kindKey ) + { + if ( !node->isObject() ) + return defaultValue; + node = &((*node)[arg.key_]); + if ( node == &Value::null ) + return defaultValue; + } + } + return *node; +} + + +Value & +Path::make( Value &root ) const +{ + Value *node = &root; + for ( Args::const_iterator it = args_.begin(); it != args_.end(); ++it ) + { + const PathArgument &arg = *it; + if ( arg.kind_ == PathArgument::kindIndex ) + { + if ( !node->isArray() ) + { + // Error: node is not an array at position ... + } + node = &((*node)[arg.index_]); + } + else if ( arg.kind_ == PathArgument::kindKey ) + { + if ( !node->isObject() ) + { + // Error: node is not an object at position... + } + node = &((*node)[arg.key_]); + } + } + return *node; +} + + +} // namespace Json diff --git a/src/third_party/jsoncpp/json_value.h b/src/third_party/jsoncpp/json_value.h new file mode 100644 index 0000000..5c99471 --- /dev/null +++ b/src/third_party/jsoncpp/json_value.h @@ -0,0 +1,1059 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef CPPTL_JSON_H_INCLUDED +# define CPPTL_JSON_H_INCLUDED + +# include "json_forwards.h" +# include +# include + +# ifndef JSON_USE_CPPTL_SMALLMAP +# include +# else +# include +# endif +# ifdef JSON_USE_CPPTL +# include +# endif + +/** \brief JSON (JavaScript Object Notation). + */ +namespace Json { + + /** \brief Type of the value held by a Value object. + */ + enum ValueType + { + nullValue = 0, ///< 'null' value + intValue, ///< signed integer value + uintValue, ///< unsigned integer value + realValue, ///< double value + stringValue, ///< UTF-8 string value + booleanValue, ///< bool value + arrayValue, ///< array value (ordered list) + objectValue ///< object value (collection of name/value pairs). + }; + + enum CommentPlacement + { + commentBefore = 0, ///< a comment placed on the line before a value + commentAfterOnSameLine, ///< a comment just after a value on the same line + commentAfter, ///< a comment on the line after a value (only make sense for root value) + numberOfCommentPlacement + }; + +//# ifdef JSON_USE_CPPTL +// typedef CppTL::AnyEnumerator EnumMemberNames; +// typedef CppTL::AnyEnumerator EnumValues; +//# endif + + /** \brief Lightweight wrapper to tag static string. + * + * Value constructor and objectValue member assignement takes advantage of the + * StaticString and avoid the cost of string duplication when storing the + * string or the member name. + * + * Example of usage: + * \code + * Json::Value aValue( StaticString("some text") ); + * Json::Value object; + * static const StaticString code("code"); + * object[code] = 1234; + * \endcode + */ + class JSON_API StaticString + { + public: + explicit StaticString( const char *czstring ) + : str_( czstring ) + { + } + + operator const char *() const + { + return str_; + } + + const char *c_str() const + { + return str_; + } + + private: + const char *str_; + }; + + /** \brief Represents a JSON value. + * + * This class is a discriminated union wrapper that can represents a: + * - signed integer [range: Value::minInt - Value::maxInt] + * - unsigned integer (range: 0 - Value::maxUInt) + * - double + * - UTF-8 string + * - boolean + * - 'null' + * - an ordered list of Value + * - collection of name/value pairs (javascript object) + * + * The type of the held value is represented by a #ValueType and + * can be obtained using type(). + * + * values of an #objectValue or #arrayValue can be accessed using operator[]() methods. + * Non const methods will automatically create the a #nullValue element + * if it does not exist. + * The sequence of an #arrayValue will be automatically resize and initialized + * with #nullValue. resize() can be used to enlarge or truncate an #arrayValue. + * + * The get() methods can be used to obtanis default value in the case the required element + * does not exist. + * + * It is possible to iterate over the list of a #objectValue values using + * the getMemberNames() method. + */ + class JSON_API Value + { + friend class ValueIteratorBase; +# ifdef JSON_VALUE_USE_INTERNAL_MAP + friend class ValueInternalLink; + friend class ValueInternalMap; +# endif + public: + typedef std::vector Members; + typedef ValueIterator iterator; + typedef ValueConstIterator const_iterator; + typedef Json::UInt UInt; + typedef Json::Int Int; + typedef Json::ArrayIndex ArrayIndex; + + static const Value null; + static const Int minInt; + static const Int maxInt; + static const UInt maxUInt; + + private: +#ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION +# ifndef JSON_VALUE_USE_INTERNAL_MAP + class CZString + { + public: + enum DuplicationPolicy + { + noDuplication = 0, + duplicate, + duplicateOnCopy + }; + CZString( ArrayIndex index ); + CZString( const char *cstr, DuplicationPolicy allocate ); + CZString( const CZString &other ); + ~CZString(); + CZString &operator =( const CZString &other ); + bool operator<( const CZString &other ) const; + bool operator==( const CZString &other ) const; + ArrayIndex index() const; + const char *c_str() const; + bool isStaticString() const; + private: + void swap( CZString &other ); + const char *cstr_; + ArrayIndex index_; + }; + + public: +# ifndef JSON_USE_CPPTL_SMALLMAP + typedef std::map ObjectValues; +# else + typedef CppTL::SmallMap ObjectValues; +# endif // ifndef JSON_USE_CPPTL_SMALLMAP +# endif // ifndef JSON_VALUE_USE_INTERNAL_MAP +#endif // ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + + public: + /** \brief Create a default Value of the given type. + + This is a very useful constructor. + To create an empty array, pass arrayValue. + To create an empty object, pass objectValue. + Another Value can then be set to this one by assignment. + This is useful since clear() and resize() will not alter types. + + Examples: + \code + Json::Value null_value; // null + Json::Value arr_value(Json::arrayValue); // [] + Json::Value obj_value(Json::objectValue); // {} + \endcode + */ + Value( ValueType type = nullValue ); +#if !defined(JSON_NO_INT64) + Value( int value ); + Value( ArrayIndex value ); +#endif // if !defined(JSON_NO_INT64) + Value( Int value ); + Value( UInt value ); + Value( double value ); + Value( const char *value ); + Value( const char *beginValue, const char *endValue ); + /** \brief Constructs a value from a static string. + + * Like other value string constructor but do not duplicate the string for + * internal storage. The given string must remain alive after the call to this + * constructor. + * Example of usage: + * \code + * Json::Value aValue( StaticString("some text") ); + * \endcode + */ + Value( const StaticString &value ); + Value( const std::string &value ); +# ifdef JSON_USE_CPPTL + Value( const CppTL::ConstString &value ); +# endif + Value( bool value ); + Value( const Value &other ); + ~Value(); + + Value &operator=( const Value &other ); + /// Swap values. + /// \note Currently, comments are intentionally not swapped, for + /// both logic and efficiency. + void swap( Value &other ); + + ValueType type() const; + + bool operator <( const Value &other ) const; + bool operator <=( const Value &other ) const; + bool operator >=( const Value &other ) const; + bool operator >( const Value &other ) const; + + bool operator ==( const Value &other ) const; + bool operator !=( const Value &other ) const; + + int compare( const Value &other ); + + const char *asCString() const; + std::string asString() const; +# ifdef JSON_USE_CPPTL + CppTL::ConstString asConstString() const; +# endif + Int asInt() const; + UInt asUInt() const; + double asDouble() const; + bool asBool() const; + + bool isNull() const; + bool isBool() const; + bool isInt() const; + bool isUInt() const; + bool isIntegral() const; + bool isDouble() const; + bool isNumeric() const; + bool isString() const; + bool isArray() const; + bool isObject() const; + + bool isConvertibleTo( ValueType other ) const; + + /// Number of values in array or object + ArrayIndex size() const; + + /// \brief Return true if empty array, empty object, or null; + /// otherwise, false. + bool empty() const; + + /// Return isNull() + bool operator!() const; + + /// Remove all object members and array elements. + /// \pre type() is arrayValue, objectValue, or nullValue + /// \post type() is unchanged + void clear(); + + /// Resize the array to size elements. + /// New elements are initialized to null. + /// May only be called on nullValue or arrayValue. + /// \pre type() is arrayValue or nullValue + /// \post type() is arrayValue + void resize( ArrayIndex size ); + + /// Access an array element (zero based index ). + /// If the array contains less than index element, then null value are inserted + /// in the array so that its size is index+1. + /// (You may need to say 'value[0u]' to get your compiler to distinguish + /// this from the operator[] which takes a string.) + Value &operator[]( ArrayIndex index ); + /// Access an array element (zero based index ) + /// (You may need to say 'value[0u]' to get your compiler to distinguish + /// this from the operator[] which takes a string.) + const Value &operator[]( ArrayIndex index ) const; + /// If the array contains at least index+1 elements, returns the element value, + /// otherwise returns defaultValue. + Value get( ArrayIndex index, + const Value &defaultValue ) const; + /// Return true if index < size(). + bool isValidIndex( ArrayIndex index ) const; + /// \brief Append value to array at the end. + /// + /// Equivalent to jsonvalue[jsonvalue.size()] = value; + Value &append( const Value &value ); + + /// Access an object value by name, create a null member if it does not exist. + Value &operator[]( const char *key ); + /// Access an object value by name, returns null if there is no member with that name. + const Value &operator[]( const char *key ) const; + /// Access an object value by name, create a null member if it does not exist. + Value &operator[]( const std::string &key ); + /// Access an object value by name, returns null if there is no member with that name. + const Value &operator[]( const std::string &key ) const; + /** \brief Access an object value by name, create a null member if it does not exist. + + * If the object as no entry for that name, then the member name used to store + * the new entry is not duplicated. + * Example of use: + * \code + * Json::Value object; + * static const StaticString code("code"); + * object[code] = 1234; + * \endcode + */ + Value &operator[]( const StaticString &key ); +# ifdef JSON_USE_CPPTL + /// Access an object value by name, create a null member if it does not exist. + Value &operator[]( const CppTL::ConstString &key ); + /// Access an object value by name, returns null if there is no member with that name. + const Value &operator[]( const CppTL::ConstString &key ) const; +# endif + /// Return the member named key if it exist, defaultValue otherwise. + Value get( const char *key, + const Value &defaultValue ) const; + /// Return the member named key if it exist, defaultValue otherwise. + Value get( const std::string &key, + const Value &defaultValue ) const; +# ifdef JSON_USE_CPPTL + /// Return the member named key if it exist, defaultValue otherwise. + Value get( const CppTL::ConstString &key, + const Value &defaultValue ) const; +# endif + /// \brief Remove and return the named member. + /// + /// Do nothing if it did not exist. + /// \return the removed Value, or null. + /// \pre type() is objectValue or nullValue + /// \post type() is unchanged + Value removeMember( const char* key ); + /// Same as removeMember(const char*) + Value removeMember( const std::string &key ); + + /// Return true if the object has a member named key. + bool isMember( const char *key ) const; + /// Return true if the object has a member named key. + bool isMember( const std::string &key ) const; +# ifdef JSON_USE_CPPTL + /// Return true if the object has a member named key. + bool isMember( const CppTL::ConstString &key ) const; +# endif + + /// \brief Return a list of the member names. + /// + /// If null, return an empty list. + /// \pre type() is objectValue or nullValue + /// \post if type() was nullValue, it remains nullValue + Members getMemberNames() const; + +//# ifdef JSON_USE_CPPTL +// EnumMemberNames enumMemberNames() const; +// EnumValues enumValues() const; +//# endif + + /// Comments must be //... or /* ... */ + void setComment( const char *comment, + CommentPlacement placement ); + /// Comments must be //... or /* ... */ + void setComment( const std::string &comment, + CommentPlacement placement ); + bool hasComment( CommentPlacement placement ) const; + /// Include delimiters and embedded newlines. + std::string getComment( CommentPlacement placement ) const; + + std::string toStyledString() const; + + const_iterator begin() const; + const_iterator end() const; + + iterator begin(); + iterator end(); + + private: + Value &resolveReference( const char *key, + bool isStatic ); + +# ifdef JSON_VALUE_USE_INTERNAL_MAP + inline bool isItemAvailable() const + { + return itemIsUsed_ == 0; + } + + inline void setItemUsed( bool isUsed = true ) + { + itemIsUsed_ = isUsed ? 1 : 0; + } + + inline bool isMemberNameStatic() const + { + return memberNameIsStatic_ == 0; + } + + inline void setMemberNameIsStatic( bool isStatic ) + { + memberNameIsStatic_ = isStatic ? 1 : 0; + } +# endif // # ifdef JSON_VALUE_USE_INTERNAL_MAP + + private: + struct CommentInfo + { + CommentInfo(); + ~CommentInfo(); + + void setComment( const char *text ); + + char *comment_; + }; + + //struct MemberNamesTransform + //{ + // typedef const char *result_type; + // const char *operator()( const CZString &name ) const + // { + // return name.c_str(); + // } + //}; + + union ValueHolder + { + Int int_; + UInt uint_; + double real_; + bool bool_; + char *string_; +# ifdef JSON_VALUE_USE_INTERNAL_MAP + ValueInternalArray *array_; + ValueInternalMap *map_; +#else + ObjectValues *map_; +# endif + } value_; + ValueType type_ : 8; + int allocated_ : 1; // Notes: if declared as bool, bitfield is useless. +# ifdef JSON_VALUE_USE_INTERNAL_MAP + unsigned int itemIsUsed_ : 1; // used by the ValueInternalMap container. + int memberNameIsStatic_ : 1; // used by the ValueInternalMap container. +# endif + CommentInfo *comments_; + }; + + + /** \brief Experimental and untested: represents an element of the "path" to access a node. + */ + class PathArgument + { + public: + friend class Path; + + PathArgument(); + PathArgument( ArrayIndex index ); + PathArgument( const char *key ); + PathArgument( const std::string &key ); + + private: + enum Kind + { + kindNone = 0, + kindIndex, + kindKey + }; + std::string key_; + ArrayIndex index_; + Kind kind_; + }; + + /** \brief Experimental and untested: represents a "path" to access a node. + * + * Syntax: + * - "." => root node + * - ".[n]" => elements at index 'n' of root node (an array value) + * - ".name" => member named 'name' of root node (an object value) + * - ".name1.name2.name3" + * - ".[0][1][2].name1[3]" + * - ".%" => member name is provided as parameter + * - ".[%]" => index is provied as parameter + */ + class Path + { + public: + Path( const std::string &path, + const PathArgument &a1 = PathArgument(), + const PathArgument &a2 = PathArgument(), + const PathArgument &a3 = PathArgument(), + const PathArgument &a4 = PathArgument(), + const PathArgument &a5 = PathArgument() ); + + const Value &resolve( const Value &root ) const; + Value resolve( const Value &root, + const Value &defaultValue ) const; + /// Creates the "path" to access the specified node and returns a reference on the node. + Value &make( Value &root ) const; + + private: + typedef std::vector InArgs; + typedef std::vector Args; + + void makePath( const std::string &path, + const InArgs &in ); + void addPathInArg( const std::string &path, + const InArgs &in, + InArgs::const_iterator &itInArg, + PathArgument::Kind kind ); + void invalidPath( const std::string &path, + int location ); + + Args args_; + }; + + + +#ifdef JSON_VALUE_USE_INTERNAL_MAP + /** \brief Allocator to customize Value internal map. + * Below is an example of a simple implementation (default implementation actually + * use memory pool for speed). + * \code + class DefaultValueMapAllocator : public ValueMapAllocator + { + public: // overridden from ValueMapAllocator + virtual ValueInternalMap *newMap() + { + return new ValueInternalMap(); + } + + virtual ValueInternalMap *newMapCopy( const ValueInternalMap &other ) + { + return new ValueInternalMap( other ); + } + + virtual void destructMap( ValueInternalMap *map ) + { + delete map; + } + + virtual ValueInternalLink *allocateMapBuckets( unsigned int size ) + { + return new ValueInternalLink[size]; + } + + virtual void releaseMapBuckets( ValueInternalLink *links ) + { + delete [] links; + } + + virtual ValueInternalLink *allocateMapLink() + { + return new ValueInternalLink(); + } + + virtual void releaseMapLink( ValueInternalLink *link ) + { + delete link; + } + }; + * \endcode + */ + class JSON_API ValueMapAllocator + { + public: + virtual ~ValueMapAllocator(); + virtual ValueInternalMap *newMap() = 0; + virtual ValueInternalMap *newMapCopy( const ValueInternalMap &other ) = 0; + virtual void destructMap( ValueInternalMap *map ) = 0; + virtual ValueInternalLink *allocateMapBuckets( unsigned int size ) = 0; + virtual void releaseMapBuckets( ValueInternalLink *links ) = 0; + virtual ValueInternalLink *allocateMapLink() = 0; + virtual void releaseMapLink( ValueInternalLink *link ) = 0; + }; + + /** \brief ValueInternalMap hash-map bucket chain link (for internal use only). + * \internal previous_ & next_ allows for bidirectional traversal. + */ + class JSON_API ValueInternalLink + { + public: + enum { itemPerLink = 6 }; // sizeof(ValueInternalLink) = 128 on 32 bits architecture. + enum InternalFlags { + flagAvailable = 0, + flagUsed = 1 + }; + + ValueInternalLink(); + + ~ValueInternalLink(); + + Value items_[itemPerLink]; + char *keys_[itemPerLink]; + ValueInternalLink *previous_; + ValueInternalLink *next_; + }; + + + /** \brief A linked page based hash-table implementation used internally by Value. + * \internal ValueInternalMap is a tradional bucket based hash-table, with a linked + * list in each bucket to handle collision. There is an addional twist in that + * each node of the collision linked list is a page containing a fixed amount of + * value. This provides a better compromise between memory usage and speed. + * + * Each bucket is made up of a chained list of ValueInternalLink. The last + * link of a given bucket can be found in the 'previous_' field of the following bucket. + * The last link of the last bucket is stored in tailLink_ as it has no following bucket. + * Only the last link of a bucket may contains 'available' item. The last link always + * contains at least one element unless is it the bucket one very first link. + */ + class JSON_API ValueInternalMap + { + friend class ValueIteratorBase; + friend class Value; + public: + typedef unsigned int HashKey; + typedef unsigned int BucketIndex; + +# ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + struct IteratorState + { + IteratorState() + : map_(0) + , link_(0) + , itemIndex_(0) + , bucketIndex_(0) + { + } + ValueInternalMap *map_; + ValueInternalLink *link_; + BucketIndex itemIndex_; + BucketIndex bucketIndex_; + }; +# endif // ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + + ValueInternalMap(); + ValueInternalMap( const ValueInternalMap &other ); + ValueInternalMap &operator =( const ValueInternalMap &other ); + ~ValueInternalMap(); + + void swap( ValueInternalMap &other ); + + BucketIndex size() const; + + void clear(); + + bool reserveDelta( BucketIndex growth ); + + bool reserve( BucketIndex newItemCount ); + + const Value *find( const char *key ) const; + + Value *find( const char *key ); + + Value &resolveReference( const char *key, + bool isStatic ); + + void remove( const char *key ); + + void doActualRemove( ValueInternalLink *link, + BucketIndex index, + BucketIndex bucketIndex ); + + ValueInternalLink *&getLastLinkInBucket( BucketIndex bucketIndex ); + + Value &setNewItem( const char *key, + bool isStatic, + ValueInternalLink *link, + BucketIndex index ); + + Value &unsafeAdd( const char *key, + bool isStatic, + HashKey hashedKey ); + + HashKey hash( const char *key ) const; + + int compare( const ValueInternalMap &other ) const; + + private: + void makeBeginIterator( IteratorState &it ) const; + void makeEndIterator( IteratorState &it ) const; + static bool equals( const IteratorState &x, const IteratorState &other ); + static void increment( IteratorState &iterator ); + static void incrementBucket( IteratorState &iterator ); + static void decrement( IteratorState &iterator ); + static const char *key( const IteratorState &iterator ); + static const char *key( const IteratorState &iterator, bool &isStatic ); + static Value &value( const IteratorState &iterator ); + static int distance( const IteratorState &x, const IteratorState &y ); + + private: + ValueInternalLink *buckets_; + ValueInternalLink *tailLink_; + BucketIndex bucketsSize_; + BucketIndex itemCount_; + }; + + /** \brief A simplified deque implementation used internally by Value. + * \internal + * It is based on a list of fixed "page", each page contains a fixed number of items. + * Instead of using a linked-list, a array of pointer is used for fast item look-up. + * Look-up for an element is as follow: + * - compute page index: pageIndex = itemIndex / itemsPerPage + * - look-up item in page: pages_[pageIndex][itemIndex % itemsPerPage] + * + * Insertion is amortized constant time (only the array containing the index of pointers + * need to be reallocated when items are appended). + */ + class JSON_API ValueInternalArray + { + friend class Value; + friend class ValueIteratorBase; + public: + enum { itemsPerPage = 8 }; // should be a power of 2 for fast divide and modulo. + typedef Value::ArrayIndex ArrayIndex; + typedef unsigned int PageIndex; + +# ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + struct IteratorState // Must be a POD + { + IteratorState() + : array_(0) + , currentPageIndex_(0) + , currentItemIndex_(0) + { + } + ValueInternalArray *array_; + Value **currentPageIndex_; + unsigned int currentItemIndex_; + }; +# endif // ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + + ValueInternalArray(); + ValueInternalArray( const ValueInternalArray &other ); + ValueInternalArray &operator =( const ValueInternalArray &other ); + ~ValueInternalArray(); + void swap( ValueInternalArray &other ); + + void clear(); + void resize( ArrayIndex newSize ); + + Value &resolveReference( ArrayIndex index ); + + Value *find( ArrayIndex index ) const; + + ArrayIndex size() const; + + int compare( const ValueInternalArray &other ) const; + + private: + static bool equals( const IteratorState &x, const IteratorState &other ); + static void increment( IteratorState &iterator ); + static void decrement( IteratorState &iterator ); + static Value &dereference( const IteratorState &iterator ); + static Value &unsafeDereference( const IteratorState &iterator ); + static int distance( const IteratorState &x, const IteratorState &y ); + static ArrayIndex indexOf( const IteratorState &iterator ); + void makeBeginIterator( IteratorState &it ) const; + void makeEndIterator( IteratorState &it ) const; + void makeIterator( IteratorState &it, ArrayIndex index ) const; + + void makeIndexValid( ArrayIndex index ); + + Value **pages_; + ArrayIndex size_; + PageIndex pageCount_; + }; + + /** \brief Experimental: do not use. Allocator to customize Value internal array. + * Below is an example of a simple implementation (actual implementation use + * memory pool). + \code +class DefaultValueArrayAllocator : public ValueArrayAllocator +{ +public: // overridden from ValueArrayAllocator + virtual ~DefaultValueArrayAllocator() + { + } + + virtual ValueInternalArray *newArray() + { + return new ValueInternalArray(); + } + + virtual ValueInternalArray *newArrayCopy( const ValueInternalArray &other ) + { + return new ValueInternalArray( other ); + } + + virtual void destruct( ValueInternalArray *array ) + { + delete array; + } + + virtual void reallocateArrayPageIndex( Value **&indexes, + ValueInternalArray::PageIndex &indexCount, + ValueInternalArray::PageIndex minNewIndexCount ) + { + ValueInternalArray::PageIndex newIndexCount = (indexCount*3)/2 + 1; + if ( minNewIndexCount > newIndexCount ) + newIndexCount = minNewIndexCount; + void *newIndexes = realloc( indexes, sizeof(Value*) * newIndexCount ); + if ( !newIndexes ) + throw std::bad_alloc(); + indexCount = newIndexCount; + indexes = static_cast( newIndexes ); + } + virtual void releaseArrayPageIndex( Value **indexes, + ValueInternalArray::PageIndex indexCount ) + { + if ( indexes ) + free( indexes ); + } + + virtual Value *allocateArrayPage() + { + return static_cast( malloc( sizeof(Value) * ValueInternalArray::itemsPerPage ) ); + } + + virtual void releaseArrayPage( Value *value ) + { + if ( value ) + free( value ); + } +}; + \endcode + */ + class JSON_API ValueArrayAllocator + { + public: + virtual ~ValueArrayAllocator(); + virtual ValueInternalArray *newArray() = 0; + virtual ValueInternalArray *newArrayCopy( const ValueInternalArray &other ) = 0; + virtual void destructArray( ValueInternalArray *array ) = 0; + /** \brief Reallocate array page index. + * Reallocates an array of pointer on each page. + * \param indexes [input] pointer on the current index. May be \c NULL. + * [output] pointer on the new index of at least + * \a minNewIndexCount pages. + * \param indexCount [input] current number of pages in the index. + * [output] number of page the reallocated index can handle. + * \b MUST be >= \a minNewIndexCount. + * \param minNewIndexCount Minimum number of page the new index must be able to + * handle. + */ + virtual void reallocateArrayPageIndex( Value **&indexes, + ValueInternalArray::PageIndex &indexCount, + ValueInternalArray::PageIndex minNewIndexCount ) = 0; + virtual void releaseArrayPageIndex( Value **indexes, + ValueInternalArray::PageIndex indexCount ) = 0; + virtual Value *allocateArrayPage() = 0; + virtual void releaseArrayPage( Value *value ) = 0; + }; +#endif // #ifdef JSON_VALUE_USE_INTERNAL_MAP + + + /** \brief base class for Value iterators. + * + */ + class ValueIteratorBase + { + public: + typedef unsigned int size_t; + typedef int difference_type; + typedef ValueIteratorBase SelfType; + + ValueIteratorBase(); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + explicit ValueIteratorBase( const Value::ObjectValues::iterator ¤t ); +#else + ValueIteratorBase( const ValueInternalArray::IteratorState &state ); + ValueIteratorBase( const ValueInternalMap::IteratorState &state ); +#endif + + bool operator ==( const SelfType &other ) const + { + return isEqual( other ); + } + + bool operator !=( const SelfType &other ) const + { + return !isEqual( other ); + } + + difference_type operator -( const SelfType &other ) const + { + return computeDistance( other ); + } + + /// Return either the index or the member name of the referenced value as a Value. + Value key() const; + + /// Return the index of the referenced Value. -1 if it is not an arrayValue. + UInt index() const; + + /// Return the member name of the referenced Value. "" if it is not an objectValue. + const char *memberName() const; + + protected: + Value &deref() const; + + void increment(); + + void decrement(); + + difference_type computeDistance( const SelfType &other ) const; + + bool isEqual( const SelfType &other ) const; + + void copy( const SelfType &other ); + + private: +#ifndef JSON_VALUE_USE_INTERNAL_MAP + Value::ObjectValues::iterator current_; + // Indicates that iterator is for a null value. + bool isNull_; +#else + union + { + ValueInternalArray::IteratorState array_; + ValueInternalMap::IteratorState map_; + } iterator_; + bool isArray_; +#endif + }; + + /** \brief const iterator for object and array value. + * + */ + class ValueConstIterator : public ValueIteratorBase + { + friend class Value; + public: + typedef unsigned int size_t; + typedef int difference_type; + typedef const Value &reference; + typedef const Value *pointer; + typedef ValueConstIterator SelfType; + + ValueConstIterator(); + private: + /*! \internal Use by Value to create an iterator. + */ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + explicit ValueConstIterator( const Value::ObjectValues::iterator ¤t ); +#else + ValueConstIterator( const ValueInternalArray::IteratorState &state ); + ValueConstIterator( const ValueInternalMap::IteratorState &state ); +#endif + public: + SelfType &operator =( const ValueIteratorBase &other ); + + SelfType operator++( int ) + { + SelfType temp( *this ); + ++*this; + return temp; + } + + SelfType operator--( int ) + { + SelfType temp( *this ); + --*this; + return temp; + } + + SelfType &operator--() + { + decrement(); + return *this; + } + + SelfType &operator++() + { + increment(); + return *this; + } + + reference operator *() const + { + return deref(); + } + }; + + + /** \brief Iterator for object and array value. + */ + class ValueIterator : public ValueIteratorBase + { + friend class Value; + public: + typedef unsigned int size_t; + typedef int difference_type; + typedef Value &reference; + typedef Value *pointer; + typedef ValueIterator SelfType; + + ValueIterator(); + ValueIterator( const ValueConstIterator &other ); + ValueIterator( const ValueIterator &other ); + private: + /*! \internal Use by Value to create an iterator. + */ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + explicit ValueIterator( const Value::ObjectValues::iterator ¤t ); +#else + ValueIterator( const ValueInternalArray::IteratorState &state ); + ValueIterator( const ValueInternalMap::IteratorState &state ); +#endif + public: + + SelfType &operator =( const SelfType &other ); + + SelfType operator++( int ) + { + SelfType temp( *this ); + ++*this; + return temp; + } + + SelfType operator--( int ) + { + SelfType temp( *this ); + --*this; + return temp; + } + + SelfType &operator--() + { + decrement(); + return *this; + } + + SelfType &operator++() + { + increment(); + return *this; + } + + reference operator *() const + { + return deref(); + } + }; + + +} // namespace Json + + +#endif // CPPTL_JSON_H_INCLUDED diff --git a/src/third_party/jsoncpp/json_valueiterator.inl b/src/third_party/jsoncpp/json_valueiterator.inl new file mode 100644 index 0000000..bd7c8d2 --- /dev/null +++ b/src/third_party/jsoncpp/json_valueiterator.inl @@ -0,0 +1,297 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +// included by json_value.cpp +// everything is within Json namespace + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueIteratorBase +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +ValueIteratorBase::ValueIteratorBase() +#ifndef JSON_VALUE_USE_INTERNAL_MAP + : current_() + , isNull_( true ) +{ +} +#else + : isArray_( true ) + , isNull_( true ) +{ + iterator_.array_ = ValueInternalArray::IteratorState(); +} +#endif + + +#ifndef JSON_VALUE_USE_INTERNAL_MAP +ValueIteratorBase::ValueIteratorBase( const Value::ObjectValues::iterator ¤t ) + : current_( current ) + , isNull_( false ) +{ +} +#else +ValueIteratorBase::ValueIteratorBase( const ValueInternalArray::IteratorState &state ) + : isArray_( true ) +{ + iterator_.array_ = state; +} + + +ValueIteratorBase::ValueIteratorBase( const ValueInternalMap::IteratorState &state ) + : isArray_( false ) +{ + iterator_.map_ = state; +} +#endif + +Value & +ValueIteratorBase::deref() const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + return current_->second; +#else + if ( isArray_ ) + return ValueInternalArray::dereference( iterator_.array_ ); + return ValueInternalMap::value( iterator_.map_ ); +#endif +} + + +void +ValueIteratorBase::increment() +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + ++current_; +#else + if ( isArray_ ) + ValueInternalArray::increment( iterator_.array_ ); + ValueInternalMap::increment( iterator_.map_ ); +#endif +} + + +void +ValueIteratorBase::decrement() +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + --current_; +#else + if ( isArray_ ) + ValueInternalArray::decrement( iterator_.array_ ); + ValueInternalMap::decrement( iterator_.map_ ); +#endif +} + + +ValueIteratorBase::difference_type +ValueIteratorBase::computeDistance( const SelfType &other ) const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP +# ifdef JSON_USE_CPPTL_SMALLMAP + return current_ - other.current_; +# else + // Iterator for null value are initialized using the default + // constructor, which initialize current_ to the default + // std::map::iterator. As begin() and end() are two instance + // of the default std::map::iterator, they can not be compared. + // To allow this, we handle this comparison specifically. + if ( isNull_ && other.isNull_ ) + { + return 0; + } + + + // Usage of std::distance is not portable (does not compile with Sun Studio 12 RogueWave STL, + // which is the one used by default). + // Using a portable hand-made version for non random iterator instead: + // return difference_type( std::distance( current_, other.current_ ) ); + difference_type myDistance = 0; + for ( Value::ObjectValues::iterator it = current_; it != other.current_; ++it ) + { + ++myDistance; + } + return myDistance; +# endif +#else + if ( isArray_ ) + return ValueInternalArray::distance( iterator_.array_, other.iterator_.array_ ); + return ValueInternalMap::distance( iterator_.map_, other.iterator_.map_ ); +#endif +} + + +bool +ValueIteratorBase::isEqual( const SelfType &other ) const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + if ( isNull_ ) + { + return other.isNull_; + } + return current_ == other.current_; +#else + if ( isArray_ ) + return ValueInternalArray::equals( iterator_.array_, other.iterator_.array_ ); + return ValueInternalMap::equals( iterator_.map_, other.iterator_.map_ ); +#endif +} + + +void +ValueIteratorBase::copy( const SelfType &other ) +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + current_ = other.current_; +#else + if ( isArray_ ) + iterator_.array_ = other.iterator_.array_; + iterator_.map_ = other.iterator_.map_; +#endif +} + + +Value +ValueIteratorBase::key() const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + const Value::CZString czstring = (*current_).first; + if ( czstring.c_str() ) + { + if ( czstring.isStaticString() ) + return Value( StaticString( czstring.c_str() ) ); + return Value( czstring.c_str() ); + } + return Value( czstring.index() ); +#else + if ( isArray_ ) + return Value( ValueInternalArray::indexOf( iterator_.array_ ) ); + bool isStatic; + const char *memberName = ValueInternalMap::key( iterator_.map_, isStatic ); + if ( isStatic ) + return Value( StaticString( memberName ) ); + return Value( memberName ); +#endif +} + + +UInt +ValueIteratorBase::index() const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + const Value::CZString czstring = (*current_).first; + if ( !czstring.c_str() ) + return czstring.index(); + return Value::UInt( -1 ); +#else + if ( isArray_ ) + return Value::UInt( ValueInternalArray::indexOf( iterator_.array_ ) ); + return Value::UInt( -1 ); +#endif +} + + +const char * +ValueIteratorBase::memberName() const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + const char *name = (*current_).first.c_str(); + return name ? name : ""; +#else + if ( !isArray_ ) + return ValueInternalMap::key( iterator_.map_ ); + return ""; +#endif +} + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueConstIterator +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +ValueConstIterator::ValueConstIterator() +{ +} + + +#ifndef JSON_VALUE_USE_INTERNAL_MAP +ValueConstIterator::ValueConstIterator( const Value::ObjectValues::iterator ¤t ) + : ValueIteratorBase( current ) +{ +} +#else +ValueConstIterator::ValueConstIterator( const ValueInternalArray::IteratorState &state ) + : ValueIteratorBase( state ) +{ +} + +ValueConstIterator::ValueConstIterator( const ValueInternalMap::IteratorState &state ) + : ValueIteratorBase( state ) +{ +} +#endif + +ValueConstIterator & +ValueConstIterator::operator =( const ValueIteratorBase &other ) +{ + copy( other ); + return *this; +} + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueIterator +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +ValueIterator::ValueIterator() +{ +} + + +#ifndef JSON_VALUE_USE_INTERNAL_MAP +ValueIterator::ValueIterator( const Value::ObjectValues::iterator ¤t ) + : ValueIteratorBase( current ) +{ +} +#else +ValueIterator::ValueIterator( const ValueInternalArray::IteratorState &state ) + : ValueIteratorBase( state ) +{ +} + +ValueIterator::ValueIterator( const ValueInternalMap::IteratorState &state ) + : ValueIteratorBase( state ) +{ +} +#endif + +ValueIterator::ValueIterator( const ValueConstIterator &other ) + : ValueIteratorBase( other ) +{ +} + +ValueIterator::ValueIterator( const ValueIterator &other ) + : ValueIteratorBase( other ) +{ +} + +ValueIterator & +ValueIterator::operator =( const SelfType &other ) +{ + copy( other ); + return *this; +} diff --git a/src/third_party/jsoncpp/json_writer.cpp b/src/third_party/jsoncpp/json_writer.cpp new file mode 100644 index 0000000..4e45496 --- /dev/null +++ b/src/third_party/jsoncpp/json_writer.cpp @@ -0,0 +1,819 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#include "json_writer.h" +#include "json_tool.h" +#include +#include +#include +#include +#include +#include +#include + +#if _MSC_VER >= 1400 // VC++ 8.0 +#pragma warning( disable : 4996 ) // disable warning about strdup being deprecated. +#endif + +namespace Json { + +static bool containsControlCharacter( const char* str ) +{ + while ( *str ) + { + if ( isControlCharacter( *(str++) ) ) + return true; + } + return false; +} + +std::string valueToString( Int value ) +{ + UIntToStringBuffer buffer; + char *current = buffer + sizeof(buffer); + bool isNegative = value < 0; + if ( isNegative ) + value = -value; + uintToString( UInt(value), current ); + if ( isNegative ) + *--current = '-'; + assert( current >= buffer ); + return current; +} + + +std::string valueToString( UInt value ) +{ + UIntToStringBuffer buffer; + char *current = buffer + sizeof(buffer); + uintToString( value, current ); + assert( current >= buffer ); + return current; +} + +std::string valueToString( double value ) +{ + char buffer[32]; +#if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__) // Use secure version with visual studio 2005 to avoid warning. + sprintf_s(buffer, sizeof(buffer), "%#.16g", value); +#else + sprintf(buffer, "%#.16g", value); +#endif + char* ch = buffer + strlen(buffer) - 1; + if (*ch != '0') return buffer; // nothing to truncate, so save time + while(ch > buffer && *ch == '0'){ + --ch; + } + char* last_nonzero = ch; + while(ch >= buffer){ + switch(*ch){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + --ch; + continue; + case '.': + // Truncate zeroes to save bytes in output, but keep one. + *(last_nonzero+2) = '\0'; + return buffer; + default: + return buffer; + } + } + return buffer; +} + + +std::string valueToString( bool value ) +{ + return value ? "true" : "false"; +} + +std::string valueToQuotedString( const char *value ) +{ + // Not sure how to handle unicode... + if (strpbrk(value, "\"\\\b\f\n\r\t") == NULL && !containsControlCharacter( value )) + return std::string("\"") + value + "\""; + // We have to walk value and escape any special characters. + // Appending to std::string is not efficient, but this should be rare. + // (Note: forward slashes are *not* rare, but I am not escaping them.) + std::string::size_type maxsize = strlen(value)*2 + 3; // allescaped+quotes+NULL + std::string result; + result.reserve(maxsize); // to avoid lots of mallocs + result += "\""; + for (const char* c=value; *c != 0; ++c) + { + switch(*c) + { + case '\"': + result += "\\\""; + break; + case '\\': + result += "\\\\"; + break; + case '\b': + result += "\\b"; + break; + case '\f': + result += "\\f"; + break; + case '\n': + result += "\\n"; + break; + case '\r': + result += "\\r"; + break; + case '\t': + result += "\\t"; + break; + //case '/': + // Even though \/ is considered a legal escape in JSON, a bare + // slash is also legal, so I see no reason to escape it. + // (I hope I am not misunderstanding something. + // blep notes: actually escaping \/ may be useful in javascript to avoid (*c); + result += oss.str(); + } + else + { + result += *c; + } + break; + } + } + result += "\""; + return result; +} + +// Class Writer +// ////////////////////////////////////////////////////////////////// +Writer::~Writer() +{ +} + + +// Class FastWriter +// ////////////////////////////////////////////////////////////////// + +FastWriter::FastWriter() + : yamlCompatiblityEnabled_( false ) +{ +} + + +void +FastWriter::enableYAMLCompatibility() +{ + yamlCompatiblityEnabled_ = true; +} + + +std::string +FastWriter::write( const Value &root ) +{ + document_ = ""; + writeValue( root ); + document_ += "\n"; + return document_; +} + + +void +FastWriter::writeValue( const Value &value ) +{ + switch ( value.type() ) + { + case nullValue: + document_ += "null"; + break; + case intValue: + document_ += valueToString( value.asInt() ); + break; + case uintValue: + document_ += valueToString( value.asUInt() ); + break; + case realValue: + document_ += valueToString( value.asDouble() ); + break; + case stringValue: + document_ += valueToQuotedString( value.asCString() ); + break; + case booleanValue: + document_ += valueToString( value.asBool() ); + break; + case arrayValue: + { + document_ += "["; + int size = value.size(); + for ( int index =0; index < size; ++index ) + { + if ( index > 0 ) + document_ += ","; + writeValue( value[index] ); + } + document_ += "]"; + } + break; + case objectValue: + { + Value::Members members( value.getMemberNames() ); + document_ += "{"; + for ( Value::Members::iterator it = members.begin(); + it != members.end(); + ++it ) + { + const std::string &name = *it; + if ( it != members.begin() ) + document_ += ","; + document_ += valueToQuotedString( name.c_str() ); + document_ += yamlCompatiblityEnabled_ ? ": " + : ":"; + writeValue( value[name] ); + } + document_ += "}"; + } + break; + } +} + + +// Class StyledWriter +// ////////////////////////////////////////////////////////////////// + +StyledWriter::StyledWriter() + : rightMargin_( 74 ) + , indentSize_( 3 ) +{ +} + + +std::string +StyledWriter::write( const Value &root ) +{ + document_ = ""; + addChildValues_ = false; + indentString_ = ""; + writeCommentBeforeValue( root ); + writeValue( root ); + writeCommentAfterValueOnSameLine( root ); + document_ += "\n"; + return document_; +} + + +void +StyledWriter::writeValue( const Value &value ) +{ + switch ( value.type() ) + { + case nullValue: + pushValue( "null" ); + break; + case intValue: + pushValue( valueToString( value.asInt() ) ); + break; + case uintValue: + pushValue( valueToString( value.asUInt() ) ); + break; + case realValue: + pushValue( valueToString( value.asDouble() ) ); + break; + case stringValue: + pushValue( valueToQuotedString( value.asCString() ) ); + break; + case booleanValue: + pushValue( valueToString( value.asBool() ) ); + break; + case arrayValue: + writeArrayValue( value); + break; + case objectValue: + { + Value::Members members( value.getMemberNames() ); + if ( members.empty() ) + pushValue( "{}" ); + else + { + writeWithIndent( "{" ); + indent(); + Value::Members::iterator it = members.begin(); + while ( true ) + { + const std::string &name = *it; + const Value &childValue = value[name]; + writeCommentBeforeValue( childValue ); + writeWithIndent( valueToQuotedString( name.c_str() ) ); + document_ += " : "; + writeValue( childValue ); + if ( ++it == members.end() ) + { + writeCommentAfterValueOnSameLine( childValue ); + break; + } + document_ += ","; + writeCommentAfterValueOnSameLine( childValue ); + } + unindent(); + writeWithIndent( "}" ); + } + } + break; + } +} + + +void +StyledWriter::writeArrayValue( const Value &value ) +{ + unsigned size = value.size(); + if ( size == 0 ) + pushValue( "[]" ); + else + { + bool isArrayMultiLine = isMultineArray( value ); + if ( isArrayMultiLine ) + { + writeWithIndent( "[" ); + indent(); + bool hasChildValue = !childValues_.empty(); + unsigned index =0; + while ( true ) + { + const Value &childValue = value[index]; + writeCommentBeforeValue( childValue ); + if ( hasChildValue ) + writeWithIndent( childValues_[index] ); + else + { + writeIndent(); + writeValue( childValue ); + } + if ( ++index == size ) + { + writeCommentAfterValueOnSameLine( childValue ); + break; + } + document_ += ","; + writeCommentAfterValueOnSameLine( childValue ); + } + unindent(); + writeWithIndent( "]" ); + } + else // output on a single line + { + assert( childValues_.size() == size ); + document_ += "[ "; + for ( unsigned index =0; index < size; ++index ) + { + if ( index > 0 ) + document_ += ", "; + document_ += childValues_[index]; + } + document_ += " ]"; + } + } +} + + +bool +StyledWriter::isMultineArray( const Value &value ) +{ + int size = value.size(); + bool isMultiLine = size*3 >= rightMargin_ ; + childValues_.clear(); + for ( int index =0; index < size && !isMultiLine; ++index ) + { + const Value &childValue = value[index]; + isMultiLine = isMultiLine || + ( (childValue.isArray() || childValue.isObject()) && + childValue.size() > 0 ); + } + if ( !isMultiLine ) // check if line length > max line length + { + childValues_.reserve( size ); + addChildValues_ = true; + int lineLength = 4 + (size-1)*2; // '[ ' + ', '*n + ' ]' + for ( int index =0; index < size && !isMultiLine; ++index ) + { + writeValue( value[index] ); + lineLength += int( childValues_[index].length() ); + isMultiLine = isMultiLine && hasCommentForValue( value[index] ); + } + addChildValues_ = false; + isMultiLine = isMultiLine || lineLength >= rightMargin_; + } + return isMultiLine; +} + + +void +StyledWriter::pushValue( const std::string &value ) +{ + if ( addChildValues_ ) + childValues_.push_back( value ); + else + document_ += value; +} + + +void +StyledWriter::writeIndent() +{ + if ( !document_.empty() ) + { + char last = document_[document_.length()-1]; + if ( last == ' ' ) // already indented + return; + if ( last != '\n' ) // Comments may add new-line + document_ += '\n'; + } + document_ += indentString_; +} + + +void +StyledWriter::writeWithIndent( const std::string &value ) +{ + writeIndent(); + document_ += value; +} + + +void +StyledWriter::indent() +{ + indentString_ += std::string( indentSize_, ' ' ); +} + + +void +StyledWriter::unindent() +{ + assert( int(indentString_.size()) >= indentSize_ ); + indentString_.resize( indentString_.size() - indentSize_ ); +} + + +void +StyledWriter::writeCommentBeforeValue( const Value &root ) +{ + if ( !root.hasComment( commentBefore ) ) + return; + document_ += normalizeEOL( root.getComment( commentBefore ) ); + document_ += "\n"; +} + + +void +StyledWriter::writeCommentAfterValueOnSameLine( const Value &root ) +{ + if ( root.hasComment( commentAfterOnSameLine ) ) + document_ += " " + normalizeEOL( root.getComment( commentAfterOnSameLine ) ); + + if ( root.hasComment( commentAfter ) ) + { + document_ += "\n"; + document_ += normalizeEOL( root.getComment( commentAfter ) ); + document_ += "\n"; + } +} + + +bool +StyledWriter::hasCommentForValue( const Value &value ) +{ + return value.hasComment( commentBefore ) + || value.hasComment( commentAfterOnSameLine ) + || value.hasComment( commentAfter ); +} + + +std::string +StyledWriter::normalizeEOL( const std::string &text ) +{ + std::string normalized; + normalized.reserve( text.length() ); + const char *begin = text.c_str(); + const char *end = begin + text.length(); + const char *current = begin; + while ( current != end ) + { + char c = *current++; + if ( c == '\r' ) // mac or dos EOL + { + if ( *current == '\n' ) // convert dos EOL + ++current; + normalized += '\n'; + } + else // handle unix EOL & other char + normalized += c; + } + return normalized; +} + + +// Class StyledStreamWriter +// ////////////////////////////////////////////////////////////////// + +StyledStreamWriter::StyledStreamWriter( std::string indentation ) + : document_(NULL) + , rightMargin_( 74 ) + , indentation_( indentation ) +{ +} + + +void +StyledStreamWriter::write( std::ostream &out, const Value &root ) +{ + document_ = &out; + addChildValues_ = false; + indentString_ = ""; + writeCommentBeforeValue( root ); + writeValue( root ); + writeCommentAfterValueOnSameLine( root ); + *document_ << "\n"; + document_ = NULL; // Forget the stream, for safety. +} + + +void +StyledStreamWriter::writeValue( const Value &value ) +{ + switch ( value.type() ) + { + case nullValue: + pushValue( "null" ); + break; + case intValue: + pushValue( valueToString( value.asInt() ) ); + break; + case uintValue: + pushValue( valueToString( value.asUInt() ) ); + break; + case realValue: + pushValue( valueToString( value.asDouble() ) ); + break; + case stringValue: + pushValue( valueToQuotedString( value.asCString() ) ); + break; + case booleanValue: + pushValue( valueToString( value.asBool() ) ); + break; + case arrayValue: + writeArrayValue( value); + break; + case objectValue: + { + Value::Members members( value.getMemberNames() ); + if ( members.empty() ) + pushValue( "{}" ); + else + { + writeWithIndent( "{" ); + indent(); + Value::Members::iterator it = members.begin(); + while ( true ) + { + const std::string &name = *it; + const Value &childValue = value[name]; + writeCommentBeforeValue( childValue ); + writeWithIndent( valueToQuotedString( name.c_str() ) ); + *document_ << " : "; + writeValue( childValue ); + if ( ++it == members.end() ) + { + writeCommentAfterValueOnSameLine( childValue ); + break; + } + *document_ << ","; + writeCommentAfterValueOnSameLine( childValue ); + } + unindent(); + writeWithIndent( "}" ); + } + } + break; + } +} + + +void +StyledStreamWriter::writeArrayValue( const Value &value ) +{ + unsigned size = value.size(); + if ( size == 0 ) + pushValue( "[]" ); + else + { + bool isArrayMultiLine = isMultineArray( value ); + if ( isArrayMultiLine ) + { + writeWithIndent( "[" ); + indent(); + bool hasChildValue = !childValues_.empty(); + unsigned index =0; + while ( true ) + { + const Value &childValue = value[index]; + writeCommentBeforeValue( childValue ); + if ( hasChildValue ) + writeWithIndent( childValues_[index] ); + else + { + writeIndent(); + writeValue( childValue ); + } + if ( ++index == size ) + { + writeCommentAfterValueOnSameLine( childValue ); + break; + } + *document_ << ","; + writeCommentAfterValueOnSameLine( childValue ); + } + unindent(); + writeWithIndent( "]" ); + } + else // output on a single line + { + assert( childValues_.size() == size ); + *document_ << "[ "; + for ( unsigned index =0; index < size; ++index ) + { + if ( index > 0 ) + *document_ << ", "; + *document_ << childValues_[index]; + } + *document_ << " ]"; + } + } +} + + +bool +StyledStreamWriter::isMultineArray( const Value &value ) +{ + int size = value.size(); + bool isMultiLine = size*3 >= rightMargin_ ; + childValues_.clear(); + for ( int index =0; index < size && !isMultiLine; ++index ) + { + const Value &childValue = value[index]; + isMultiLine = isMultiLine || + ( (childValue.isArray() || childValue.isObject()) && + childValue.size() > 0 ); + } + if ( !isMultiLine ) // check if line length > max line length + { + childValues_.reserve( size ); + addChildValues_ = true; + int lineLength = 4 + (size-1)*2; // '[ ' + ', '*n + ' ]' + for ( int index =0; index < size && !isMultiLine; ++index ) + { + writeValue( value[index] ); + lineLength += int( childValues_[index].length() ); + isMultiLine = isMultiLine && hasCommentForValue( value[index] ); + } + addChildValues_ = false; + isMultiLine = isMultiLine || lineLength >= rightMargin_; + } + return isMultiLine; +} + + +void +StyledStreamWriter::pushValue( const std::string &value ) +{ + if ( addChildValues_ ) + childValues_.push_back( value ); + else + *document_ << value; +} + + +void +StyledStreamWriter::writeIndent() +{ + /* + Some comments in this method would have been nice. ;-) + + if ( !document_.empty() ) + { + char last = document_[document_.length()-1]; + if ( last == ' ' ) // already indented + return; + if ( last != '\n' ) // Comments may add new-line + *document_ << '\n'; + } + */ + *document_ << '\n' << indentString_; +} + + +void +StyledStreamWriter::writeWithIndent( const std::string &value ) +{ + writeIndent(); + *document_ << value; +} + + +void +StyledStreamWriter::indent() +{ + indentString_ += indentation_; +} + + +void +StyledStreamWriter::unindent() +{ + assert( indentString_.size() >= indentation_.size() ); + indentString_.resize( indentString_.size() - indentation_.size() ); +} + + +void +StyledStreamWriter::writeCommentBeforeValue( const Value &root ) +{ + if ( !root.hasComment( commentBefore ) ) + return; + *document_ << normalizeEOL( root.getComment( commentBefore ) ); + *document_ << "\n"; +} + + +void +StyledStreamWriter::writeCommentAfterValueOnSameLine( const Value &root ) +{ + if ( root.hasComment( commentAfterOnSameLine ) ) + *document_ << " " + normalizeEOL( root.getComment( commentAfterOnSameLine ) ); + + if ( root.hasComment( commentAfter ) ) + { + *document_ << "\n"; + *document_ << normalizeEOL( root.getComment( commentAfter ) ); + *document_ << "\n"; + } +} + + +bool +StyledStreamWriter::hasCommentForValue( const Value &value ) +{ + return value.hasComment( commentBefore ) + || value.hasComment( commentAfterOnSameLine ) + || value.hasComment( commentAfter ); +} + + +std::string +StyledStreamWriter::normalizeEOL( const std::string &text ) +{ + std::string normalized; + normalized.reserve( text.length() ); + const char *begin = text.c_str(); + const char *end = begin + text.length(); + const char *current = begin; + while ( current != end ) + { + char c = *current++; + if ( c == '\r' ) // mac or dos EOL + { + if ( *current == '\n' ) // convert dos EOL + ++current; + normalized += '\n'; + } + else // handle unix EOL & other char + normalized += c; + } + return normalized; +} + + +std::ostream& operator<<( std::ostream &sout, const Value &root ) +{ + Json::StyledStreamWriter writer; + writer.write(sout, root); + return sout; +} + + +} // namespace Json diff --git a/src/third_party/jsoncpp/json_writer.h b/src/third_party/jsoncpp/json_writer.h new file mode 100644 index 0000000..f01194d --- /dev/null +++ b/src/third_party/jsoncpp/json_writer.h @@ -0,0 +1,179 @@ +// Copyright 2007-2010 Baptiste Lepilleur +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#ifndef JSON_WRITER_H_INCLUDED +# define JSON_WRITER_H_INCLUDED + +# include "json_value.h" +# include +# include +# include + +namespace Json { + + class Value; + + /** \brief Abstract class for writers. + */ + class JSON_API Writer + { + public: + virtual ~Writer(); + + virtual std::string write( const Value &root ) = 0; + }; + + /** \brief Outputs a Value in JSON format without formatting (not human friendly). + * + * The JSON document is written in a single line. It is not intended for 'human' consumption, + * but may be usefull to support feature such as RPC where bandwith is limited. + * \sa Reader, Value + */ + class JSON_API FastWriter : public Writer + { + public: + FastWriter(); + virtual ~FastWriter(){} + + void enableYAMLCompatibility(); + + public: // overridden from Writer + virtual std::string write( const Value &root ); + + private: + void writeValue( const Value &value ); + + std::string document_; + bool yamlCompatiblityEnabled_; + }; + + /** \brief Writes a Value in JSON format in a human friendly way. + * + * The rules for line break and indent are as follow: + * - Object value: + * - if empty then print {} without indent and line break + * - if not empty the print '{', line break & indent, print one value per line + * and then unindent and line break and print '}'. + * - Array value: + * - if empty then print [] without indent and line break + * - if the array contains no object value, empty array or some other value types, + * and all the values fit on one lines, then print the array on a single line. + * - otherwise, it the values do not fit on one line, or the array contains + * object or non empty array, then print one value per line. + * + * If the Value have comments then they are outputed according to their #CommentPlacement. + * + * \sa Reader, Value, Value::setComment() + */ + class JSON_API StyledWriter: public Writer + { + public: + StyledWriter(); + virtual ~StyledWriter(){} + + public: // overridden from Writer + /** \brief Serialize a Value in JSON format. + * \param root Value to serialize. + * \return String containing the JSON document that represents the root value. + */ + virtual std::string write( const Value &root ); + + private: + void writeValue( const Value &value ); + void writeArrayValue( const Value &value ); + bool isMultineArray( const Value &value ); + void pushValue( const std::string &value ); + void writeIndent(); + void writeWithIndent( const std::string &value ); + void indent(); + void unindent(); + void writeCommentBeforeValue( const Value &root ); + void writeCommentAfterValueOnSameLine( const Value &root ); + bool hasCommentForValue( const Value &value ); + static std::string normalizeEOL( const std::string &text ); + + typedef std::vector ChildValues; + + ChildValues childValues_; + std::string document_; + std::string indentString_; + int rightMargin_; + int indentSize_; + bool addChildValues_; + }; + + /** \brief Writes a Value in JSON format in a human friendly way, + to a stream rather than to a string. + * + * The rules for line break and indent are as follow: + * - Object value: + * - if empty then print {} without indent and line break + * - if not empty the print '{', line break & indent, print one value per line + * and then unindent and line break and print '}'. + * - Array value: + * - if empty then print [] without indent and line break + * - if the array contains no object value, empty array or some other value types, + * and all the values fit on one lines, then print the array on a single line. + * - otherwise, it the values do not fit on one line, or the array contains + * object or non empty array, then print one value per line. + * + * If the Value have comments then they are outputed according to their #CommentPlacement. + * + * \param indentation Each level will be indented by this amount extra. + * \sa Reader, Value, Value::setComment() + */ + class JSON_API StyledStreamWriter + { + public: + StyledStreamWriter( std::string indentation="\t" ); + ~StyledStreamWriter(){} + + public: + /** \brief Serialize a Value in JSON format. + * \param out Stream to write to. (Can be ostringstream, e.g.) + * \param root Value to serialize. + * \note There is no point in deriving from Writer, since write() should not return a value. + */ + void write( std::ostream &out, const Value &root ); + + private: + void writeValue( const Value &value ); + void writeArrayValue( const Value &value ); + bool isMultineArray( const Value &value ); + void pushValue( const std::string &value ); + void writeIndent(); + void writeWithIndent( const std::string &value ); + void indent(); + void unindent(); + void writeCommentBeforeValue( const Value &root ); + void writeCommentAfterValueOnSameLine( const Value &root ); + bool hasCommentForValue( const Value &value ); + static std::string normalizeEOL( const std::string &text ); + + typedef std::vector ChildValues; + + ChildValues childValues_; + std::ostream* document_; + std::string indentString_; + int rightMargin_; + std::string indentation_; + bool addChildValues_; + }; + + std::string JSON_API valueToString( Int value ); + std::string JSON_API valueToString( UInt value ); + std::string JSON_API valueToString( double value ); + std::string JSON_API valueToString( bool value ); + std::string JSON_API valueToQuotedString( const char *value ); + + /// \brief Output using the StyledStreamWriter. + /// \see Json::operator>>() + std::ostream& operator<<( std::ostream&, const Value &root ); + +} // namespace Json + + + +#endif // JSON_WRITER_H_INCLUDED diff --git a/src/toolkit/Makefile b/src/toolkit/Makefile index 4fcf723..077c6cc 100644 --- a/src/toolkit/Makefile +++ b/src/toolkit/Makefile @@ -9,8 +9,9 @@ API_DIR = ../api UTILS_DIR = ../utils OBJ_DIR = ../../obj BIN_DIR = ../../bin +THIRD_PARTY_DIR = ../third_party -INCLUDES = -I$(API_DIR)/ -I$(UTILS_DIR) +INCLUDES = -I$(API_DIR)/ -I$(UTILS_DIR) -I$(THIRD_PARTY_DIR) # ---------------------------------- # define our source and object files diff --git a/src/toolkit/bamtools.cpp b/src/toolkit/bamtools.cpp index f75b93d..20b8cf1 100644 --- a/src/toolkit/bamtools.cpp +++ b/src/toolkit/bamtools.cpp @@ -77,16 +77,16 @@ int Help(int argc, char* argv[]) { cerr << "usage: bamtools [--help] COMMAND [ARGS]" << endl; cerr << endl; cerr << "Available bamtools commands:" << endl; - cerr << "\tconvert Converts between BAM and a number of other formats" << endl; - cerr << "\tcount Prints number of alignments in BAM file" << endl; - cerr << "\tcoverage Prints coverage statistics from the input BAM file" << endl; - cerr << "\tfilter Filters BAM file(s) by user-specified criteria" << endl; - cerr << "\theader Prints BAM header information" << endl; - cerr << "\tindex Generates index for BAM file" << endl; - cerr << "\tmerge Merge multiple BAM files into single file" << endl; - cerr << "\trandom Grab a random subset of alignments" << endl; - cerr << "\tsort Sorts the BAM file according to some criteria" << endl; - cerr << "\tstats Prints general alignment statistics" << endl; + cerr << "\tconvert Converts between BAM and a number of other formats" << endl; + cerr << "\tcount Prints number of alignments in BAM file(s)" << endl; + cerr << "\tcoverage Prints coverage statistics from the input BAM file" << endl; + cerr << "\tfilter Filters BAM file(s) by user-specified criteria" << endl; + cerr << "\theader Prints BAM header information" << endl; + cerr << "\tindex Generates index for BAM file" << endl; + cerr << "\tmerge Merge multiple BAM files into single file" << endl; + cerr << "\trandom Select random alignments from existing BAM file(s)" << endl; + cerr << "\tsort Sorts the BAM file according to some criteria" << endl; + cerr << "\tstats Prints some basic statistics from input BAM file(s)" << endl; cerr << endl; cerr << "See 'bamtools help COMMAND' for more information on a specific command." << endl; cerr << endl; diff --git a/src/toolkit/bamtools_count.cpp b/src/toolkit/bamtools_count.cpp index 4bd7c82..20ed3ae 100644 --- a/src/toolkit/bamtools_count.cpp +++ b/src/toolkit/bamtools_count.cpp @@ -3,16 +3,12 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 2 June 2010 +// Last modified: 3 September 2010 // --------------------------------------------------------------------------- -// Prints alignment count for BAM file -// -// ** Expand to multiple?? -// +// Prints alignment count for BAM file(s) // *************************************************************************** #include -#include #include #include @@ -53,15 +49,14 @@ CountTool::CountTool(void) , m_settings(new CountSettings) { // set program details - Options::SetProgramInfo("bamtools count", "prints alignment counts for a BAM file", "-in [-region ]"); + Options::SetProgramInfo("bamtools count", "prints alignment counts for a BAM file", "[-in -in ...] [-region ]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); - Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts); - //Options::AddValueOption("-index", "BAM index filename", "the BAM index file", "", m_settings->HasBamIndexFilename, m_settings->BamIndexFilename, IO_Opts); + Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters"); - Options::AddValueOption("-region", "REGION", "genomic region. Index file is recommended for better performance, and is read automatically if it exists as .bai or .bti. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts); + Options::AddValueOption("-region", "REGION", "genomic region. Index file is required and is read automatically if it exists as .bai or .bti. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts); } CountTool::~CountTool(void) { @@ -79,114 +74,82 @@ int CountTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); + // if no '-in' args supplied, default to stdin if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); + // open reader without index BamMultiReader reader; reader.Open(m_settings->InputFiles, false, true); // alignment counter + BamAlignment al; int alignmentCount(0); - // set up error handling - ostringstream errorStream(""); - bool foundError(false); - // if no region specified, count entire file if ( !m_settings->HasRegion ) { - BamAlignment al; while ( reader.GetNextAlignmentCore(al) ) ++alignmentCount; } - // more complicated - region specified + // otherwise attempt to use region as constraint else { + // if region string parses OK BamRegion region; if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { - // check if there are index files *.bai/*.bti corresponding to the input files - bool hasDefaultIndex = false; - bool hasBamtoolsIndex = false; - bool hasNoIndex = false; - int defaultIndexCount = 0; - int bamtoolsIndexCount = 0; - for (vector::const_iterator f = m_settings->InputFiles.begin(); f != m_settings->InputFiles.end(); ++f) { - - if ( Utilities::FileExists(*f + ".bai") ) { - hasDefaultIndex = true; - ++defaultIndexCount; - } - - if ( Utilities::FileExists(*f + ".bti") ) { - hasBamtoolsIndex = true; - ++bamtoolsIndexCount; - } - - if ( !hasDefaultIndex && !hasBamtoolsIndex ) { - hasNoIndex = true; - cerr << "*WARNING - could not find index file for " << *f - << ", parsing whole file(s) to get alignment counts for target region" - << " (could be slow)" << endl; - break; - } - } + // attempt to re-open reader with index files + reader.Close(); + bool openedOK = reader.Open(m_settings->InputFiles, true, true ); - // determine if index file types are heterogeneous - bool hasDifferentIndexTypes = false; - if ( defaultIndexCount > 0 && bamtoolsIndexCount > 0 ) { - hasDifferentIndexTypes = true; - cerr << "*WARNING - different index file formats found" - << ", parsing whole file(s) to get alignment counts for target region" - << " (could be slow)" << endl; + // if error + if ( !openedOK ) { + cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; + return 1; } - // if any input file has no index, or if input files use different index formats - // can't use BamMultiReader to jump directly (**for now**) - if ( hasNoIndex || hasDifferentIndexTypes ) { - - // read through sequentially, counting all overlapping reads - BamAlignment al; + // if index data available, we can use SetRegion + if ( reader.IsIndexLoaded() ) { + + // attempt to use SetRegion(), if failed report error + if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { + cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl; + reader.Close(); + return 1; + } + + // everything checks out, just iterate through specified region, counting alignments + while ( reader.GetNextAlignmentCore(al) ) + ++alignmentCount; + } + + // no index data available, we have to iterate through until we + // find overlapping alignments + else { while( reader.GetNextAlignmentCore(al) ) { if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && - (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) + (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) { ++alignmentCount; } } } - - // has index file for each input file (and same format) - else { - - // this is kind of a hack...? - BamMultiReader reader; - reader.Open(m_settings->InputFiles, true, true, hasDefaultIndex ); - - if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { - foundError = true; - errorStream << "Could not set BamReader region to REGION: " << m_settings->Region << endl; - } else { - BamAlignment al; - while ( reader.GetNextAlignmentCore(al) ) - ++alignmentCount; - } - } - - } else { - foundError = true; - errorStream << "Could not parse REGION: " << m_settings->Region << endl; - errorStream << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; + } + + // error parsing REGION string + else { + cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl; + cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; + reader.Close(); + return 1; } } - - // print errors OR results - if ( foundError ) - cerr << errorStream.str() << endl; - else - cout << alignmentCount << endl; + + // print results + cout << alignmentCount << endl; // clean & exit reader.Close(); - return (int)foundError; + return 0; } diff --git a/src/toolkit/bamtools_filter.cpp b/src/toolkit/bamtools_filter.cpp index 2249022..5c8f338 100644 --- a/src/toolkit/bamtools_filter.cpp +++ b/src/toolkit/bamtools_filter.cpp @@ -3,42 +3,209 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 1 June 2010 +// Last modified: 31 August 2010 // --------------------------------------------------------------------------- // Filters a single BAM file (or filters multiple BAM files and merges) // according to some user-specified criteria. // *************************************************************************** +// std includes +#include #include +#include #include #include +// BamTools includes #include "bamtools_filter.h" +#include "bamtools_filter_engine.h" #include "bamtools_options.h" +#include "bamtools_utilities.h" #include "BamReader.h" #include "BamMultiReader.h" +#include "BamWriter.h" + +//JsonCPP includes +#include "jsoncpp/json.h" using namespace std; using namespace BamTools; +using namespace Json; + +namespace BamTools { + +// ------------------------------- +// string literal constants + +// property names +const string ALIGNMENTFLAG_PROPERTY = "alignmentFlag"; +const string INSERTSIZE_PROPERTY = "insertSize"; +const string ISDUPLICATE_PROPERTY = "isDuplicate"; +const string ISFAILEDQC_PROPERTY = "isFailedQC"; +const string ISFIRSTMATE_PROPERTY = "isFirstMate"; +const string ISMAPPED_PROPERTY = "isMapped"; +const string ISMATEMAPPED_PROPERTY = "isMateMapped"; +const string ISMATEREVERSESTRAND_PROPERTY = "isMateReverseStrand"; +const string ISPAIRED_PROPERTY = "isPaired"; +const string ISPRIMARYALIGNMENT_PROPERTY = "isPrimaryAlignment"; +const string ISPROPERPAIR_PROPERTY = "isProperPair"; +const string ISREVERSESTRAND_PROPERTY = "isReverseStrand"; +const string ISSECONDMATE_PROPERTY = "isSecondMate"; +const string MAPQUALITY_PROPERTY = "mapQuality"; +const string MATEPOSITION_PROPERTY = "matePosition"; +const string MATEREFERENCE_PROPERTY = "mateReference"; +const string NAME_PROPERTY = "name"; +const string POSITION_PROPERTY = "position"; +const string QUERYBASES_PROPERTY = "queryBases"; +const string REFERENCE_PROPERTY = "reference"; + +// boolalpha +const string TRUE_STR = "true"; +const string FALSE_STR = "false"; + +} // namespace BamTools + +// --------------------------------------------- +// FilterToolPrivate declaration + +class FilterTool::FilterToolPrivate { + + // ctor & dtor + public: + FilterToolPrivate(FilterTool::FilterSettings* settings); + ~FilterToolPrivate(void); + + // 'public' interface + public: + bool Run(void); + + // internal methods + private: + bool AddPropertyTokensToFilter(const string& filterName, const map& propertyTokens); + bool CheckAlignment(const BamAlignment& al); + const string GetScriptContents(void); + void InitProperties(void); + bool ParseCommandLine(void); + bool ParseFilterObject(const string& filterName, const Json::Value& filterObject); + bool ParseScript(void); + bool SetupFilters(void); + + // data members + private: + vector m_propertyNames; + FilterTool::FilterSettings* m_settings; + RefVector m_references; +}; // --------------------------------------------- // FilterSettings implementation struct FilterTool::FilterSettings { + // ---------------------------------- + // IO opts + // flags bool HasInputBamFilename; bool HasOutputBamFilename; - + bool HasRegion; + bool HasScriptFilename; + bool IsForceCompression; + // filenames vector InputFiles; string OutputFilename; + string Region; + string ScriptFilename; + + // ----------------------------------- + // General filter opts + + // flags + bool HasAlignmentFlagFilter; + bool HasInsertSizeFilter; + bool HasMapQualityFilter; + bool HasNameFilter; + bool HasQueryBasesFilter; +// bool HasTagFilters; + + // filters + string AlignmentFlagFilter; + string InsertSizeFilter; + string NameFilter; + string MapQualityFilter; + string QueryBasesFilter; + +// vector TagFilters; + + // ----------------------------------- + // AlignmentFlag filter opts + + // flags + bool HasIsDuplicateFilter; + bool HasIsFailedQCFilter; + bool HasIsFirstMateFilter; + bool HasIsMappedFilter; + bool HasIsMateMappedFilter; + bool HasIsMateReverseStrandFilter; + bool HasIsPairedFilter; + bool HasIsPrimaryAlignmentFilter; + bool HasIsProperPairFilter; + bool HasIsReverseStrandFilter; + bool HasIsSecondMateFilter; + + // filters + string IsDuplicateFilter; + string IsFailedQCFilter; + string IsFirstMateFilter; + string IsMappedFilter; + string IsMateMappedFilter; + string IsMateReverseStrandFilter; + string IsPairedFilter; + string IsPrimaryAlignmentFilter; + string IsProperPairFilter; + string IsReverseStrandFilter; + string IsSecondMateFilter; + + // --------------------------------- // constructor + FilterSettings(void) : HasInputBamFilename(false) , HasOutputBamFilename(false) + , HasRegion(false) + , HasScriptFilename(false) + , IsForceCompression(false) , OutputFilename(Options::StandardOut()) + , HasAlignmentFlagFilter(false) + , HasInsertSizeFilter(false) + , HasMapQualityFilter(false) + , HasNameFilter(false) + , HasQueryBasesFilter(false) +// , HasTagFilters(false) + , HasIsDuplicateFilter(false) + , HasIsFailedQCFilter(false) + , HasIsFirstMateFilter(false) + , HasIsMappedFilter(false) + , HasIsMateMappedFilter(false) + , HasIsMateReverseStrandFilter(false) + , HasIsPairedFilter(false) + , HasIsPrimaryAlignmentFilter(false) + , HasIsProperPairFilter(false) + , HasIsReverseStrandFilter(false) + , HasIsSecondMateFilter(false) + , IsDuplicateFilter(TRUE_STR) + , IsFailedQCFilter(TRUE_STR) + , IsFirstMateFilter(TRUE_STR) + , IsMappedFilter(TRUE_STR) + , IsMateMappedFilter(TRUE_STR) + , IsMateReverseStrandFilter(TRUE_STR) + , IsPairedFilter(TRUE_STR) + , IsPrimaryAlignmentFilter(TRUE_STR) + , IsProperPairFilter(TRUE_STR) + , IsReverseStrandFilter(TRUE_STR) + , IsSecondMateFilter(TRUE_STR) { } }; @@ -48,19 +215,47 @@ struct FilterTool::FilterSettings { FilterTool::FilterTool(void) : AbstractTool() , m_settings(new FilterSettings) + , m_impl(0) { // set program details - Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "-in [-in ... ] -out "); + Options::SetProgramInfo("bamtools filter", "filters BAM file(s)", "-in [-in ... ] -out -region [ [-script HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn()); - Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); + Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts, Options::StandardIn()); + Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); + Options::AddValueOption("-region", "REGION", "only read data from this genomic region (see README for more details)", "", m_settings->HasRegion, m_settings->Region, IO_Opts); + Options::AddValueOption("-script", "filename", "the filter script file (see README for more details)", "", m_settings->HasScriptFilename, m_settings->ScriptFilename, IO_Opts); + Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); + + OptionGroup* FilterOpts = Options::CreateOptionGroup("General Filters"); + Options::AddValueOption("-alignmentFlag", "int", "keep reads with this *exact* alignment flag (for more detailed queries, see below)", "", m_settings->HasAlignmentFlagFilter, m_settings->AlignmentFlagFilter, FilterOpts); + Options::AddValueOption("-insertSize", "int", "keep reads with insert size that mathces pattern", "", m_settings->HasInsertSizeFilter, m_settings->InsertSizeFilter, FilterOpts); + Options::AddValueOption("-mapQuality", "[0-255]", "keep reads with map quality that matches pattern", "", m_settings->HasMapQualityFilter, m_settings->MapQualityFilter, FilterOpts); + Options::AddValueOption("-name", "string", "keep reads with name that matches pattern", "", m_settings->HasNameFilter, m_settings->NameFilter, FilterOpts); + Options::AddValueOption("-queryBases", "string", "keep reads with motif that mathces pattern", "", m_settings->HasQueryBasesFilter, m_settings->QueryBasesFilter, FilterOpts); + +// Options::AddValueOption("-tag", "TAG:VALUE", "keep reads with this key=>value pair. If multiple tags are given, reads must match all", "", m_settings->HasTagFilters, m_settings->TagFilters, FilterOpts); + + OptionGroup* AlignmentFlagOpts = Options::CreateOptionGroup("Alignment Flag Filters"); + Options::AddValueOption("-isDuplicate", "true/false", "keep only alignments that are marked as duplicate?", "", m_settings->HasIsDuplicateFilter, m_settings->IsDuplicateFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isFailedQC", "true/false", "keep only alignments that failed QC?", "", m_settings->HasIsFailedQCFilter, m_settings->IsFailedQCFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isFirstMate", "true/false", "keep only alignments marked as first mate?", "", m_settings->HasIsFirstMateFilter, m_settings->IsFirstMateFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isMapped", "true/false", "keep only alignments that were mapped?", "", m_settings->HasIsMappedFilter, m_settings->IsMappedFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isMateMapped", "true/false", "keep only alignments with mates that mapped", "", m_settings->HasIsMateMappedFilter, m_settings->IsMateMappedFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isMateReverseStrand", "true/false", "keep only alignments with mate on reverese strand?", "", m_settings->HasIsMateReverseStrandFilter, m_settings->IsMateReverseStrandFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isPaired", "true/false", "keep only alignments that were sequenced as paired?","", m_settings->HasIsPairedFilter, m_settings->IsPairedFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isPrimaryAlignment", "true/false", "keep only alignments marked as primary?", "", m_settings->HasIsPrimaryAlignmentFilter, m_settings->IsPrimaryAlignmentFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isProperPair", "true/false", "keep only alignments that passed PE resolution?", "", m_settings->HasIsProperPairFilter, m_settings->IsProperPairFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isReverseStrand", "true/false", "keep only alignments on reverse strand?", "", m_settings->HasIsReverseStrandFilter, m_settings->IsReverseStrandFilter, AlignmentFlagOpts, TRUE_STR); + Options::AddValueOption("-isSecondMate", "true/false", "keep only alignments marked as second mate?", "", m_settings->HasIsSecondMateFilter, m_settings->IsSecondMateFilter, AlignmentFlagOpts, TRUE_STR); } FilterTool::~FilterTool(void) { delete m_settings; m_settings = 0; + + delete m_impl; + m_impl = 0; } int FilterTool::Help(void) { @@ -72,18 +267,455 @@ int FilterTool::Run(int argc, char* argv[]) { // parse command line arguments Options::Parse(argc, argv, 1); + + // run internal FilterTool implementation, return success/fail + m_impl = new FilterToolPrivate(m_settings); + + if ( m_impl->Run() ) return 0; + else return 1; +} + +// --------------------------------------------- +// FilterToolPrivate implementation + +// constructor +FilterTool::FilterToolPrivate::FilterToolPrivate(FilterTool::FilterSettings* settings) + : m_settings(settings) +{ } + +// destructor +FilterTool::FilterToolPrivate::~FilterToolPrivate(void) { } + +bool FilterTool::FilterToolPrivate::AddPropertyTokensToFilter(const string& filterName, const map& propertyTokens) { + + + // dummy temp values for token parsing + bool boolValue; + int32_t int32Value; + uint16_t uint16Value; + uint32_t uint32Value; + string stringValue; + PropertyFilterValue::ValueCompareType type; + + // iterate over property token map + map::const_iterator mapIter = propertyTokens.begin(); + map::const_iterator mapEnd = propertyTokens.end(); + for ( ; mapIter != mapEnd; ++mapIter ) { + + const string& propertyName = (*mapIter).first; + const string& token = (*mapIter).second; + + // ------------------------------ + // convert token to value & compare type + // then add to filter engine + + // bool conversion + if ( propertyName == ISDUPLICATE_PROPERTY || + propertyName == ISFAILEDQC_PROPERTY || + propertyName == ISFIRSTMATE_PROPERTY || + propertyName == ISMAPPED_PROPERTY || + propertyName == ISMATEMAPPED_PROPERTY || + propertyName == ISMATEREVERSESTRAND_PROPERTY || + propertyName == ISPAIRED_PROPERTY || + propertyName == ISPRIMARYALIGNMENT_PROPERTY || + propertyName == ISPROPERPAIR_PROPERTY || + propertyName == ISREVERSESTRAND_PROPERTY || + propertyName == ISSECONDMATE_PROPERTY + ) + { + FilterEngine::parseToken(token, boolValue, type); + FilterEngine::setProperty(filterName, propertyName, boolValue, type); + } + + // int32_t conversion + else if ( propertyName == INSERTSIZE_PROPERTY || + propertyName == MATEPOSITION_PROPERTY || + propertyName == POSITION_PROPERTY + ) + { + FilterEngine::parseToken(token, int32Value, type); + FilterEngine::setProperty(filterName, propertyName, int32Value, type); + } + + // uint16_t conversion + else if ( propertyName == MAPQUALITY_PROPERTY ) + { + FilterEngine::parseToken(token, uint16Value, type); + FilterEngine::setProperty(filterName, propertyName, uint16Value, type); + } + + // uint32_t conversion + else if ( propertyName == ALIGNMENTFLAG_PROPERTY ) + { + FilterEngine::parseToken(token, uint32Value, type); + FilterEngine::setProperty(filterName, propertyName, uint32Value, type); + } + + // string conversion + else if ( propertyName == MATEREFERENCE_PROPERTY || + propertyName == NAME_PROPERTY || + propertyName == QUERYBASES_PROPERTY || + propertyName == REFERENCE_PROPERTY + ) + { + FilterEngine::parseToken(token, stringValue, type); + FilterEngine::setProperty(filterName, propertyName, stringValue, type); + } + + // else unknown property + else { + cerr << "Unknown property: " << propertyName << "!" << endl; + return false; + } + } + return true; +} + +bool FilterTool::FilterToolPrivate::CheckAlignment(const BamAlignment& al) { + + bool keepAlignment = true; + + // only consider properties that are actually enabled + // iterate over these enabled properties + const vector enabledProperties = FilterEngine::enabledPropertyNames(); + vector::const_iterator propIter = enabledProperties.begin(); + vector::const_iterator propEnd = enabledProperties.end(); + for ( ; propIter != propEnd; ++propIter ) { + + // check alignment data field depending on propertyName + const string& propertyName = (*propIter); + if ( propertyName == ALIGNMENTFLAG_PROPERTY ) keepAlignment &= FilterEngine::check(ALIGNMENTFLAG_PROPERTY, al.AlignmentFlag); + else if ( propertyName == INSERTSIZE_PROPERTY ) keepAlignment &= FilterEngine::check(INSERTSIZE_PROPERTY, al.InsertSize); + else if ( propertyName == ISDUPLICATE_PROPERTY ) keepAlignment &= FilterEngine::check(ISDUPLICATE_PROPERTY, al.IsDuplicate()); + else if ( propertyName == ISFAILEDQC_PROPERTY ) keepAlignment &= FilterEngine::check(ISFAILEDQC_PROPERTY, al.IsFailedQC()); + else if ( propertyName == ISFIRSTMATE_PROPERTY ) keepAlignment &= FilterEngine::check(ISFIRSTMATE_PROPERTY, al.IsFirstMate()); + else if ( propertyName == ISMAPPED_PROPERTY ) keepAlignment &= FilterEngine::check(ISMAPPED_PROPERTY, al.IsMapped()); + else if ( propertyName == ISMATEMAPPED_PROPERTY ) keepAlignment &= FilterEngine::check(ISMATEMAPPED_PROPERTY, al.IsMateMapped()); + else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY ) keepAlignment &= FilterEngine::check(ISMATEREVERSESTRAND_PROPERTY, al.IsMateReverseStrand()); + else if ( propertyName == ISPAIRED_PROPERTY ) keepAlignment &= FilterEngine::check(ISPAIRED_PROPERTY, al.IsPaired()); + else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY ) keepAlignment &= FilterEngine::check(ISPRIMARYALIGNMENT_PROPERTY, al.IsPrimaryAlignment()); + else if ( propertyName == ISPROPERPAIR_PROPERTY ) keepAlignment &= FilterEngine::check(ISPROPERPAIR_PROPERTY, al.IsProperPair()); + else if ( propertyName == ISREVERSESTRAND_PROPERTY ) keepAlignment &= FilterEngine::check(ISREVERSESTRAND_PROPERTY, al.IsReverseStrand()); + else if ( propertyName == ISSECONDMATE_PROPERTY ) keepAlignment &= FilterEngine::check(ISSECONDMATE_PROPERTY, al.IsSecondMate()); + else if ( propertyName == MAPQUALITY_PROPERTY ) keepAlignment &= FilterEngine::check(MAPQUALITY_PROPERTY, al.MapQuality); + else if ( propertyName == MATEPOSITION_PROPERTY ) keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && FilterEngine::check(MATEPOSITION_PROPERTY, al.MateRefID) ); + else if ( propertyName == MATEREFERENCE_PROPERTY ) { + if ( !al.IsPaired() || !al.IsMateMapped() ) return false; + BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)m_references.size())), "Invalid MateRefID"); + const string& refName = m_references.at(al.MateRefID).RefName; + keepAlignment &= FilterEngine::check(MATEREFERENCE_PROPERTY, refName); + } + else if ( propertyName == NAME_PROPERTY ) keepAlignment &= FilterEngine::check(NAME_PROPERTY, al.Name); + else if ( propertyName == POSITION_PROPERTY ) keepAlignment &= FilterEngine::check(POSITION_PROPERTY, al.Position); + else if ( propertyName == QUERYBASES_PROPERTY ) keepAlignment &= FilterEngine::check(QUERYBASES_PROPERTY, al.QueryBases); + else if ( propertyName == REFERENCE_PROPERTY ) { + BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)m_references.size())), "Invalid RefID"); + const string& refName = m_references.at(al.RefID).RefName; + keepAlignment &= FilterEngine::check(REFERENCE_PROPERTY, refName); + } + else BAMTOOLS_ASSERT_MESSAGE( false, "Unknown property"); + + // if alignment fails at ANY point, just quit and return false + if ( !keepAlignment ) return false; + } + // return success (should still be true at this point) + return keepAlignment; +} + +const string FilterTool::FilterToolPrivate::GetScriptContents(void) { + + // open script for reading + FILE* inFile = fopen(m_settings->ScriptFilename.c_str(), "rb"); + if ( !inFile ) { + cerr << "FilterTool error: Could not open script: " << m_settings->ScriptFilename << " for reading" << endl; + return false; + } + + // read in entire script contents + char buffer[1024]; + ostringstream docStream(""); + while ( true ) { + + // peek ahead, make sure there is data available + char ch = fgetc(inFile); + ungetc(ch, inFile); + if( feof(inFile) ) break; + + // read next block of data + if ( fgets(buffer, 1024, inFile) == 0 ) { + cerr << "FilterTool error : could not read from script" << endl; + return false; + } + + docStream << buffer; + } + + // close script file + fclose(inFile); + + // import buffer contents to document, return + string document = docStream.str(); + return document; +} + +void FilterTool::FilterToolPrivate::InitProperties(void) { + + // store property names in vector + m_propertyNames.push_back(ALIGNMENTFLAG_PROPERTY); + m_propertyNames.push_back(INSERTSIZE_PROPERTY); + m_propertyNames.push_back(ISDUPLICATE_PROPERTY); + m_propertyNames.push_back(ISFAILEDQC_PROPERTY); + m_propertyNames.push_back(ISFIRSTMATE_PROPERTY); + m_propertyNames.push_back(ISMAPPED_PROPERTY); + m_propertyNames.push_back(ISMATEMAPPED_PROPERTY); + m_propertyNames.push_back(ISMATEREVERSESTRAND_PROPERTY); + m_propertyNames.push_back(ISPAIRED_PROPERTY); + m_propertyNames.push_back(ISPRIMARYALIGNMENT_PROPERTY); + m_propertyNames.push_back(ISPROPERPAIR_PROPERTY); + m_propertyNames.push_back(ISREVERSESTRAND_PROPERTY); + m_propertyNames.push_back(ISSECONDMATE_PROPERTY); + m_propertyNames.push_back(MAPQUALITY_PROPERTY); + m_propertyNames.push_back(MATEPOSITION_PROPERTY); + m_propertyNames.push_back(MATEREFERENCE_PROPERTY); + m_propertyNames.push_back(NAME_PROPERTY); + m_propertyNames.push_back(POSITION_PROPERTY); + m_propertyNames.push_back(QUERYBASES_PROPERTY); + m_propertyNames.push_back(REFERENCE_PROPERTY); + + // add vector contents to FilterEngine + vector::const_iterator propertyNameIter = m_propertyNames.begin(); + vector::const_iterator propertyNameEnd = m_propertyNames.end(); + for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter ) + FilterEngine::addProperty((*propertyNameIter)); +} + +bool FilterTool::FilterToolPrivate::ParseCommandLine(void) { + + // add a rule set to filter engine + const string CMD = "COMMAND_LINE"; + FilterEngine::addFilter(CMD); + + // map property names to command line args + map propertyTokens; + if ( m_settings->HasAlignmentFlagFilter ) propertyTokens.insert( make_pair(ALIGNMENTFLAG_PROPERTY, m_settings->AlignmentFlagFilter) ); + if ( m_settings->HasInsertSizeFilter ) propertyTokens.insert( make_pair(INSERTSIZE_PROPERTY, m_settings->InsertSizeFilter) ); + if ( m_settings->HasIsDuplicateFilter ) propertyTokens.insert( make_pair(ISDUPLICATE_PROPERTY, m_settings->IsDuplicateFilter) ); + if ( m_settings->HasIsFailedQCFilter ) propertyTokens.insert( make_pair(ISFAILEDQC_PROPERTY, m_settings->IsFailedQCFilter) ); + if ( m_settings->HasIsFirstMateFilter ) propertyTokens.insert( make_pair(ISFIRSTMATE_PROPERTY, m_settings->IsFirstMateFilter) ); + if ( m_settings->HasIsMappedFilter ) propertyTokens.insert( make_pair(ISMAPPED_PROPERTY, m_settings->IsMappedFilter) ); + if ( m_settings->HasIsMateMappedFilter ) propertyTokens.insert( make_pair(ISMATEMAPPED_PROPERTY, m_settings->IsMateMappedFilter) ); + if ( m_settings->HasIsMateReverseStrandFilter ) propertyTokens.insert( make_pair(ISMATEREVERSESTRAND_PROPERTY, m_settings->IsMateReverseStrandFilter) ); + if ( m_settings->HasIsPairedFilter ) propertyTokens.insert( make_pair(ISPAIRED_PROPERTY, m_settings->IsPairedFilter) ); + if ( m_settings->HasIsPrimaryAlignmentFilter ) propertyTokens.insert( make_pair(ISPRIMARYALIGNMENT_PROPERTY, m_settings->IsPrimaryAlignmentFilter) ); + if ( m_settings->HasIsProperPairFilter ) propertyTokens.insert( make_pair(ISPROPERPAIR_PROPERTY, m_settings->IsProperPairFilter) ); + if ( m_settings->HasIsReverseStrandFilter ) propertyTokens.insert( make_pair(ISREVERSESTRAND_PROPERTY, m_settings->IsReverseStrandFilter) ); + if ( m_settings->HasIsSecondMateFilter ) propertyTokens.insert( make_pair(ISSECONDMATE_PROPERTY, m_settings->IsSecondMateFilter) ); + if ( m_settings->HasMapQualityFilter ) propertyTokens.insert( make_pair(MAPQUALITY_PROPERTY, m_settings->MapQualityFilter) ); + if ( m_settings->HasNameFilter ) propertyTokens.insert( make_pair(NAME_PROPERTY, m_settings->NameFilter) ); + if ( m_settings->HasQueryBasesFilter ) propertyTokens.insert( make_pair(QUERYBASES_PROPERTY, m_settings->QueryBasesFilter) ); + + // send add these properties to filter set "COMMAND_LINE" + return AddPropertyTokensToFilter(CMD, propertyTokens); +} + +bool FilterTool::FilterToolPrivate::ParseFilterObject(const string& filterName, const Json::Value& filterObject) { + + // filter object parsing variables + Json::Value null(Json::nullValue); + Json::Value propertyValue; + + // store results + map propertyTokens; + + // iterate over known properties + vector::const_iterator propertyNameIter = m_propertyNames.begin(); + vector::const_iterator propertyNameEnd = m_propertyNames.end(); + for ( ; propertyNameIter != propertyNameEnd; ++propertyNameIter ) { + const string& propertyName = (*propertyNameIter); + + // if property defined in filter, add to token list + propertyValue = filterObject.get(propertyName, null); + if ( propertyValue != null ) + propertyTokens.insert( make_pair(propertyName, propertyValue.asString()) ); + } + + // add this filter to engin + FilterEngine::addFilter(filterName); + + // add token list to this filter + return AddPropertyTokensToFilter(filterName, propertyTokens); +} + +bool FilterTool::FilterToolPrivate::ParseScript(void) { + + // read in script contents from file + const string document = GetScriptContents(); + + // set up JsonCPP reader and attempt to parse script + Json::Value root; + Json::Reader reader; + if ( !reader.parse(document, root) ) { + // use built-in error reporting mechanism to alert user what was wrong with the script + cerr << "Failed to parse configuration\n" << reader.getFormatedErrorMessages(); + return false; + } + + // initialize return status + bool success = true; + + // see if root object contains multiple filters + const Json::Value filters = root["filters"]; + if ( !filters.isNull() ) { + + // iterate over any filters found + int filterIndex = 0; + Json::Value::const_iterator filtersIter = filters.begin(); + Json::Value::const_iterator filtersEnd = filters.end(); + for ( ; filtersIter != filtersEnd; ++filtersIter, ++filterIndex ) { + Json::Value filter = (*filtersIter); + + // convert filter index to string + string filterName; + + // if id tag supplied + const Json::Value id = filter["id"]; + if ( !id.isNull() ) { + filterName = id.asString(); + } + + // use array index + else { + stringstream convert; + convert << filterIndex; + filterName = convert.str(); + } + + // create & parse filter + success &= ParseFilterObject(filterName, filter); + } + + // see if user defined "rule" for these filters + const Json::Value rule = root["rule"]; + if ( !rule.isNull() ) { + cout << "found rule: " << rule.asString() << endl; + } else { + cout << "no rule found!" << endl; + } + + return success; + } + + // otherwise, root is the only filter (just contains properties) + // create & parse filter named "ROOT" + else success = ParseFilterObject("ROOT", root); + + // return success/failure + return success; +} + + +bool FilterTool::FilterToolPrivate::Run(void) { + // set to default input if none provided if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn()); - // open files + // initialize defined properties & user-specified filters + // quit if failed + if ( !SetupFilters() ) return 1; + + // open reader without index BamMultiReader reader; - reader.Open(m_settings->InputFiles, false); - - // do filtering + reader.Open(m_settings->InputFiles, false, true); + const string headerText = reader.GetHeaderText(); + m_references = reader.GetReferenceData(); + // open writer + BamWriter writer; + bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); + writer.Open(m_settings->OutputFilename, headerText, m_references, writeUncompressed); + + BamAlignment al; + + // if no region specified, filter entire file + if ( !m_settings->HasRegion ) { + while ( reader.GetNextAlignment(al) ) { + if ( CheckAlignment(al) ) + writer.SaveAlignment(al); + } + } + + // otherwise attempt to use region as constraint + else { + + // if region string parses OK + BamRegion region; + if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { + + // attempt to re-open reader with index files + reader.Close(); + bool openedOK = reader.Open(m_settings->InputFiles, true, true ); + + // if error + if ( !openedOK ) { + cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; + return 1; + } + + // if index data available, we can use SetRegion + if ( reader.IsIndexLoaded() ) { + + // attempt to use SetRegion(), if failed report error + if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { + cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl; + reader.Close(); + return 1; + } + + // everything checks out, just iterate through specified region, filtering alignments + while ( reader.GetNextAlignmentCore(al) ) + if ( CheckAlignment(al) ) + writer.SaveAlignment(al); + } + + // no index data available, we have to iterate through until we + // find overlapping alignments + else { + while( reader.GetNextAlignmentCore(al) ) { + if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && + (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) + { + if ( CheckAlignment(al) ) + writer.SaveAlignment(al); + } + } + } + } + + // error parsing REGION string + else { + cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl; + cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; + reader.Close(); + return 1; + } + } + // clean up & exit reader.Close(); + writer.Close(); return 0; -} \ No newline at end of file +} + +bool FilterTool::FilterToolPrivate::SetupFilters(void) { + + // add known properties to FilterEngine + InitProperties(); + + // parse script for filter rules, if given + if ( m_settings->HasScriptFilename ) return ParseScript(); + + // otherwise check command line for filters + else return ParseCommandLine(); +} diff --git a/src/toolkit/bamtools_filter.h b/src/toolkit/bamtools_filter.h index fe8728b..2abb0e7 100644 --- a/src/toolkit/bamtools_filter.h +++ b/src/toolkit/bamtools_filter.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 2 June 2010 +// Last modified: 28 August 2010 // --------------------------------------------------------------------------- // Filters a single BAM file (or filters multiple BAM files and merges) // according to some user-specified criteria. @@ -29,6 +29,9 @@ class FilterTool : public AbstractTool { private: struct FilterSettings; FilterSettings* m_settings; + + struct FilterToolPrivate; + FilterToolPrivate* m_impl; }; } // namespace BamTools diff --git a/src/toolkit/bamtools_index.cpp b/src/toolkit/bamtools_index.cpp index 41dd5c7..b1e4bc3 100644 --- a/src/toolkit/bamtools_index.cpp +++ b/src/toolkit/bamtools_index.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 7 July 2010 +// Last modified: 2 September 2010 // --------------------------------------------------------------------------- // Creates a BAM index (".bai") file for the provided BAM file. // *************************************************************************** @@ -51,7 +51,7 @@ IndexTool::IndexTool(void) // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInputBamFilename, m_settings->InputBamFilename, IO_Opts, Options::StandardIn()); - Options::AddOption("-bti", "use (non-standard) BamTools indexing scheme", m_settings->IsUsingBamtoolsIndex, IO_Opts); + Options::AddOption("-bti", "create (non-standard) BamTools index file", m_settings->IsUsingBamtoolsIndex, IO_Opts); } IndexTool::~IndexTool(void) { diff --git a/src/toolkit/bamtools_merge.cpp b/src/toolkit/bamtools_merge.cpp index dcea172..3d2d902 100644 --- a/src/toolkit/bamtools_merge.cpp +++ b/src/toolkit/bamtools_merge.cpp @@ -3,12 +3,9 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 2 June 2010 +// Last modified: 7 September 2010 // --------------------------------------------------------------------------- // Merges multiple BAM files into one. -// -// ** Provide selectable region? eg chr2:10000..20000 -// // *************************************************************************** #include @@ -32,20 +29,22 @@ struct MergeTool::MergeSettings { // flags bool HasInputBamFilename; bool HasOutputBamFilename; -// bool HasRegion; + bool IsForceCompression; + bool HasRegion; // filenames vector InputFiles; // other parameters string OutputFilename; -// string Region; + string Region; // constructor MergeSettings(void) : HasInputBamFilename(false) , HasOutputBamFilename(false) -// , HasRegion(false) + , IsForceCompression(false) + , HasRegion(false) , OutputFilename(Options::StandardOut()) { } }; @@ -64,9 +63,10 @@ MergeTool::MergeTool(void) OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); Options::AddValueOption("-in", "BAM filename", "the input BAM file(s)", "", m_settings->HasInputBamFilename, m_settings->InputFiles, IO_Opts); Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutputBamFilename, m_settings->OutputFilename, IO_Opts); + Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); -// OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters"); -// Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, FilterOpts); + OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters"); + Options::AddValueOption("-region", "REGION", "genomic region. See README for more details", "", m_settings->HasRegion, m_settings->Region, FilterOpts); } MergeTool::~MergeTool(void) { @@ -85,24 +85,91 @@ int MergeTool::Run(int argc, char* argv[]) { Options::Parse(argc, argv, 1); // set to default input if none provided - if ( !m_settings->HasInputBamFilename ) m_settings->InputFiles.push_back(Options::StandardIn()); + if ( !m_settings->HasInputBamFilename ) + m_settings->InputFiles.push_back(Options::StandardIn()); - // opens the BAM files without checking for indexes + // opens the BAM files (by default without checking for indexes) BamMultiReader reader; - reader.Open(m_settings->InputFiles, false, true); - + if ( !reader.Open(m_settings->InputFiles, false, true) ) { + cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; + return 1; + } + // retrieve header & reference dictionary info std::string mergedHeader = reader.GetHeaderText(); RefVector references = reader.GetReferenceData(); - // open BamWriter + // open writer BamWriter writer; - writer.Open(m_settings->OutputFilename, mergedHeader, references); + bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); + if ( !writer.Open(m_settings->OutputFilename, mergedHeader, references, writeUncompressed) ) { + cerr << "ERROR: Could not open BAM file " << m_settings->OutputFilename << " for writing... Aborting." << endl; + reader.Close(); + return 1; + } + + // if no region specified, store entire contents of file(s) + if ( !m_settings->HasRegion ) { + BamAlignment al; + while ( reader.GetNextAlignmentCore(al) ) + writer.SaveAlignment(al); + } + + // otherwise attempt to use region as constraint + else { + + // if region string parses OK + BamRegion region; + if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) { - // store alignments to output file - BamAlignment bAlignment; - while (reader.GetNextAlignmentCore(bAlignment)) { - writer.SaveAlignment(bAlignment); + // attempt to re-open reader with index files + reader.Close(); + bool openedOK = reader.Open(m_settings->InputFiles, true, true ); + + // if error + if ( !openedOK ) { + cerr << "ERROR: Could not open input BAM file(s)... Aborting." << endl; + return 1; + } + + // if index data available, we can use SetRegion + if ( reader.IsIndexLoaded() ) { + + // attempt to use SetRegion(), if failed report error + if ( !reader.SetRegion(region.LeftRefID, region.LeftPosition, region.RightRefID, region.RightPosition) ) { + cerr << "ERROR: Region requested, but could not set BamReader region to REGION: " << m_settings->Region << " Aborting." << endl; + reader.Close(); + return 1; + } + + // everything checks out, just iterate through specified region, storing alignments + BamAlignment al; + while ( reader.GetNextAlignmentCore(al) ) + writer.SaveAlignment(al); + } + + // no index data available, we have to iterate through until we + // find overlapping alignments + else { + BamAlignment al; + while ( reader.GetNextAlignmentCore(al) ) { + if ( (al.RefID >= region.LeftRefID) && ( (al.Position + al.Length) >= region.LeftPosition ) && + (al.RefID <= region.RightRefID) && ( al.Position <= region.RightPosition) ) + { + writer.SaveAlignment(al); + } + } + } + } + + // error parsing REGION string + else { + cerr << "ERROR: Could not parse REGION - " << m_settings->Region << endl; + cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; + reader.Close(); + writer.Close(); + return 1; + } } // clean & exit diff --git a/src/toolkit/bamtools_random.cpp b/src/toolkit/bamtools_random.cpp index 89ca92b..fe8914f 100644 --- a/src/toolkit/bamtools_random.cpp +++ b/src/toolkit/bamtools_random.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 20 July 2010 (DB) +// Last modified: 3 September 2010 (DB) // --------------------------------------------------------------------------- // Grab a random subset of alignments. // *************************************************************************** @@ -23,8 +23,14 @@ using namespace BamTools; namespace BamTools { - // define constants - const unsigned int RANDOM_MAX_ALIGNMENT_COUNT = 10000; +// define constants +const unsigned int RANDOM_MAX_ALIGNMENT_COUNT = 10000; + +// utility methods for RandomTool +const int getRandomInt(const int& lowerBound, const int& upperBound) { + const int range = (upperBound - lowerBound) + 1; + return ( lowerBound + (int)(range * (double)rand()/((double)RAND_MAX + 1)) ); +} } // namespace BamTools @@ -38,6 +44,7 @@ struct RandomTool::RandomSettings { bool HasInput; bool HasOutput; bool HasRegion; + bool IsForceCompression; // parameters unsigned int AlignmentCount; @@ -51,7 +58,9 @@ struct RandomTool::RandomSettings { , HasInput(false) , HasOutput(false) , HasRegion(false) + , IsForceCompression(false) , AlignmentCount(RANDOM_MAX_ALIGNMENT_COUNT) + , OutputFilename(Options::StandardOut()) { } }; @@ -63,15 +72,16 @@ RandomTool::RandomTool(void) , m_settings(new RandomSettings) { // set program details - Options::SetProgramInfo("bamtools random", "grab a random subset of alignments", "[-in -in ...] [-out ] [-region ]"); + Options::SetProgramInfo("bamtools random", "grab a random subset of alignments", "[-in -in ...] [-out ] [-forceCompression] [-n] [-region ]"); // set up options OptionGroup* IO_Opts = Options::CreateOptionGroup("Input & Output"); Options::AddValueOption("-in", "BAM filename", "the input BAM file", "", m_settings->HasInput, m_settings->InputFiles, IO_Opts, Options::StandardIn()); Options::AddValueOption("-out", "BAM filename", "the output BAM file", "", m_settings->HasOutput, m_settings->OutputFilename, IO_Opts, Options::StandardOut()); + Options::AddOption("-forceCompression", "if results are sent to stdout (like when piping to another tool), default behavior is to leave output uncompressed. Use this flag to override and force compression", m_settings->IsForceCompression, IO_Opts); OptionGroup* FilterOpts = Options::CreateOptionGroup("Filters"); - Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed (currently)", "", m_settings->HasAlignmentCount, m_settings->AlignmentCount, FilterOpts, RANDOM_MAX_ALIGNMENT_COUNT); + Options::AddValueOption("-n", "count", "number of alignments to grab. Note - no duplicate checking is performed", "", m_settings->HasAlignmentCount, m_settings->AlignmentCount, FilterOpts, RANDOM_MAX_ALIGNMENT_COUNT); Options::AddValueOption("-region", "REGION", "limit source of random alignment subset to a particular genomic region. Index file is recommended for better performance, and is read automatically if it exists as .bai or .bti. See \'bamtools help index\' for more details on creating one", "", m_settings->HasRegion, m_settings->Region, FilterOpts); } @@ -87,41 +97,59 @@ int RandomTool::Help(void) { int RandomTool::Run(int argc, char* argv[]) { - // TODO: Handle BAM input WITHOUT index files. - // parse command line arguments Options::Parse(argc, argv, 1); - // set to default input if none provided + // set to default stdin if no input files provided if ( !m_settings->HasInput ) m_settings->InputFiles.push_back(Options::StandardIn()); - // open our BAM reader + // open our reader BamMultiReader reader; - reader.Open(m_settings->InputFiles); - string headerText = reader.GetHeaderText(); - RefVector references = reader.GetReferenceData(); - - // check that reference data is available, used for generating random jumps - if ( references.empty() ) { - cerr << "No reference data available... quitting." << endl; + if ( !reader.Open(m_settings->InputFiles) ) { + cerr << "ERROR: Could not open input BAM file(s)." << endl; + return 1; + } + + // make sure index data is available + if ( !reader.IsIndexLoaded() ) { + cerr << "ERROR: Could not load index data for all input BAM file(s)." << endl; + cerr << "\'bamtools random\' requires valid index files to provide efficient performance." << endl; reader.Close(); return 1; + } - - // see if user specified a REGION - BamRegion region; - if ( m_settings->HasRegion ) { - if ( Utilities::ParseRegionString(m_settings->Region, reader, region) ) - reader.SetRegion(region); + + // get BamReader metadata + const string headerText = reader.GetHeaderText(); + const RefVector references = reader.GetReferenceData(); + if ( references.empty() ) { + cerr << "ERROR: No reference data available - required to perform random access throughtout input file(s)." << endl; + reader.Close(); + return 1; } - // open out BAM writer + // open our writer BamWriter writer; - writer.Open(m_settings->OutputFilename, headerText, references); - + bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); + if ( !writer.Open(m_settings->OutputFilename, headerText, references, writeUncompressed) ) { + cerr << "ERROR: Could not open BamWriter." << endl; + reader.Close(); + return 1; + } + + // if user specified a REGION constraint, attempt to parse REGION string + BamRegion region; + if ( m_settings->HasRegion && !Utilities::ParseRegionString(m_settings->Region, reader, region) ) { + cerr << "ERROR: Could not parse REGION: " << m_settings->Region << endl; + cerr << "Be sure REGION is in valid format (see README) and that coordinates are valid for selected references" << endl; + reader.Close(); + writer.Close(); + return 1; + } + // seed our random number generator - srand (time(NULL) ); + srand( time(NULL) ); // grab random alignments BamAlignment al; @@ -131,37 +159,31 @@ int RandomTool::Run(int argc, char* argv[]) { int randomRefId = 0; int randomPosition = 0; - // use REGION constraints to generate random refId & position + // use REGION constraints to select random refId & position if ( m_settings->HasRegion ) { - int lowestRefId = region.LeftRefID; - int highestRefId = region.RightRefID; - int rangeRefId = (highestRefId - lowestRefId) + 1; - randomRefId = lowestRefId + (int)(rangeRefId * (double)(rand()/((double)RAND_MAX + 1))); + // select a random refId + randomRefId = getRandomInt(region.LeftRefID, region.RightRefID); - int lowestPosition = ( (randomRefId == region.LeftRefID) ? region.LeftPosition : 0 ); - int highestPosition = ( (randomRefId == region.RightRefID) ? region.RightPosition : references.at(randomRefId).RefLength - 1 ); - int rangePosition = (highestPosition - lowestPosition) + 1; - randomPosition = lowestPosition + (int)(rangePosition * (double)(rand()/((double)RAND_MAX + 1))); + // select a random position based on randomRefId + const int lowerBoundPosition = ( (randomRefId == region.LeftRefID) ? region.LeftPosition : 0 ); + const int upperBoundPosition = ( (randomRefId == region.RightRefID) ? region.RightPosition : (references.at(randomRefId).RefLength - 1) ); + randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition); } - // otherwise generate 'normal' random refId & position + // otherwise select from all possible random refId & position else { - // generate random refId - int lowestRefId = 0; - int highestRefId = references.size() - 1; - int rangeRefId = (highestRefId - lowestRefId) + 1; - randomRefId = lowestRefId + (int)(rangeRefId * (double)(rand()/((double)RAND_MAX + 1))); + // select random refId + randomRefId = getRandomInt(0, (int)references.size() - 1); - // generate random position - int lowestPosition = 0; - int highestPosition = references.at(randomRefId).RefLength - 1; - int rangePosition = (highestPosition - lowestPosition) + 1; - randomPosition = lowestPosition + (int)(rangePosition * (double)(rand()/((double)RAND_MAX + 1))); + // select random position based on randomRefId + const int lowerBoundPosition = 0; + const int upperBoundPosition = references.at(randomRefId).RefLength - 1; + randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition); } - // if jump & read successful, save alignment + // if jump & read successful, save first alignment that overlaps random refId & position if ( reader.Jump(randomRefId, randomPosition) ) { while ( reader.GetNextAlignmentCore(al) ) { if ( al.RefID == randomRefId && al.Position >= randomPosition ) { @@ -173,7 +195,7 @@ int RandomTool::Run(int argc, char* argv[]) { } } - // close reader & writer + // cleanup & exit reader.Close(); writer.Close(); return 0; diff --git a/src/utils/Makefile b/src/utils/Makefile index 8a68984..7e13cdd 100644 --- a/src/utils/Makefile +++ b/src/utils/Makefile @@ -15,6 +15,7 @@ INCLUDES = -I$(API_DIR)/ # define our source and object files # ---------------------------------- SOURCES = bamtools_fasta.cpp \ + bamtools_filter_engine.cpp \ bamtools_options.cpp \ bamtools_pileup.cpp \ bamtools_utilities.cpp diff --git a/src/utils/bamtools_filter_engine.cpp b/src/utils/bamtools_filter_engine.cpp new file mode 100644 index 0000000..3ad9430 --- /dev/null +++ b/src/utils/bamtools_filter_engine.cpp @@ -0,0 +1,102 @@ +// *************************************************************************** +// bamtools_filter_engine.cpp (c) 2010 Derek Barnett, Erik Garrison +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 30 August 2010 +// --------------------------------------------------------------------------- +// +// *************************************************************************** + +#include +#include "bamtools_filter_engine.h" +#include "BamAux.h" +using namespace std; +using namespace BamTools; + +// --------------------------------------------------------- +// FilterValue implementation + +// checks a string query against filter (value, compare type) +bool PropertyFilterValue::check(const string& query) const { + + // ensure filter value & query are same type + if ( !Value.is_type() ) { + cerr << "Cannot compare different types!" << endl; + return false; + } + + // localize string version of our filter value + const string& valueString = Value.get(); + + // string matching based on our filter type + switch ( Type ) { + case ( PropertyFilterValue::CONTAINS) : return ( query.find(valueString) != string::npos ); + case ( PropertyFilterValue::ENDS_WITH) : return ( query.find(valueString) == (query.length() - valueString.length()) ); + case ( PropertyFilterValue::EXACT) : return ( query == valueString ); + case ( PropertyFilterValue::GREATER_THAN) : return ( query > valueString ); + case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= valueString ); + case ( PropertyFilterValue::LESS_THAN) : return ( query < valueString ); + case ( PropertyFilterValue::LESS_THAN_EQUAL) : return ( query <= valueString ); + case ( PropertyFilterValue::NOT) : return ( query != valueString ); + case ( PropertyFilterValue::STARTS_WITH) : return ( query.find(valueString) == 0 ); + default : BAMTOOLS_ASSERT_UNREACHABLE; + } + return false; +} + +// --------------------------------------------------------- +// FilterEngine implementation + +// static FilterEngine data members +FilterMap FilterEngine::m_filters; +vector FilterEngine::m_properties; + +// creates a new filter set, returns true if created, false if error or already exists +bool FilterEngine::addFilter(const string& filterName) { + return (m_filters.insert(make_pair(filterName, PropertyFilter()))).second; +} + +// return list of current filter names +const vector FilterEngine::filterNames(void) { + vector names; + names.reserve(m_filters.size()); + FilterMap::const_iterator mapIter = m_filters.begin(); + FilterMap::const_iterator mapEnd = m_filters.end(); + for ( ; mapIter != mapEnd; ++mapIter ) + names.push_back( (*mapIter).first ); + return names; +} + +// add a new known property (& type) to engine +bool FilterEngine::addProperty(const string& propertyName) { + const vector propertyNames = allPropertyNames(); + bool found = binary_search( propertyNames.begin(), propertyNames.end(), propertyName ); + if ( found ) return false; + m_properties.push_back( Property(propertyName) ); + sort( m_properties.begin(), m_properties.end() ); + return true; +} + + +// returns list of all properties known by FilterEngine ( any created using addProperty() ) +const vector FilterEngine::allPropertyNames(void) { + vector names; + names.reserve(m_properties.size()); + vector::const_iterator propIter = m_properties.begin(); + vector::const_iterator propEnd = m_properties.end(); + for ( ; propIter != propEnd; ++propIter ) + names.push_back( (*propIter).Name ); + return names; +} + +// returns list of property names that are 'enabled' ( only those touched by setProperty() ) +const vector FilterEngine::enabledPropertyNames(void) { + vector names; + names.reserve(m_properties.size()); + vector::const_iterator propIter = m_properties.begin(); + vector::const_iterator propEnd = m_properties.end(); + for ( ; propIter != propEnd; ++propIter ) + if ( (*propIter).IsEnabled ) names.push_back( (*propIter).Name ); + return names; +} diff --git a/src/utils/bamtools_filter_engine.h b/src/utils/bamtools_filter_engine.h new file mode 100644 index 0000000..2297aea --- /dev/null +++ b/src/utils/bamtools_filter_engine.h @@ -0,0 +1,432 @@ +// *************************************************************************** +// bamtools_filter_engine.h (c) 2010 Derek Barnett, Erik Garrison +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 30 August 2010 +// --------------------------------------------------------------------------- +// +// *************************************************************************** + +#ifndef BAMTOOLS_FILTER_ENGINE_H +#define BAMTOOLS_FILTER_ENGINE_H + +#include +#include +#include +#include +#include +#include +#include "bamtools_utilities.h" +#include "bamtools_variant.h" + +namespace BamTools { + +struct PropertyFilterValue { + + // define valid ValueCompareTypes + enum ValueCompareType { CONTAINS = 0 + , ENDS_WITH + , EXACT + , GREATER_THAN + , GREATER_THAN_EQUAL + , LESS_THAN + , LESS_THAN_EQUAL + , NOT + , STARTS_WITH + }; + + // ctor + PropertyFilterValue(const Variant& value = Variant(), + const ValueCompareType& type = PropertyFilterValue::EXACT) + : Value(value) + , Type(type) + { } + + // filter check methods + template + bool check(const T& query) const; + bool check(const std::string& query) const; + + // data members + Variant Value; + ValueCompareType Type; +}; + +inline +const std::string toString(const PropertyFilterValue::ValueCompareType& type) { + + switch ( type ) { + case ( PropertyFilterValue::CONTAINS ) : return std::string( "CONTAINS"); + case ( PropertyFilterValue::ENDS_WITH ) : return std::string( "ENDS_WITH"); + case ( PropertyFilterValue::EXACT ) : return std::string( "EXACT"); + case ( PropertyFilterValue::GREATER_THAN ) : return std::string( "GREATER_THAN"); + case ( PropertyFilterValue::GREATER_THAN_EQUAL ) : return std::string( "GREATER_THAN_EQUAL"); + case ( PropertyFilterValue::LESS_THAN ) : return std::string( "LESS_THAN"); + case ( PropertyFilterValue::LESS_THAN_EQUAL ) : return std::string( "LESS_THAN_EQUAL"); + case ( PropertyFilterValue::NOT ) : return std::string( "NOT"); + case ( PropertyFilterValue::STARTS_WITH ) : return std::string( "STARTS_WITH"); + default : BAMTOOLS_ASSERT_UNREACHABLE; + } + return std::string(); +} + +// property name => property filter value +// ('name' => ('SSR', STARTS_WITH), 'mapQuality' => (50, GREATER_THAN_EQUAL), etc...) +typedef std::map PropertyMap; + +struct PropertyFilter { + + // will be used more later + // if we implement a compound 'rules' system - i.e. "(filter1 AND filter2) OR filter 3" + enum FilterCompareType { AND = 0 + , EXACT + , NOT + , OR + }; + + // data members + PropertyMap Properties; + FilterCompareType Type; + + // ctor + PropertyFilter(void) : Type( PropertyFilter::EXACT ) { } + + // filter check methods + template + bool check(const std::string& propertyName, const T& query) const; +}; + +// filter name => properties +// ('filter1' => properties1, 'filter2' => properties2, etc...) +typedef std::map FilterMap; + +// used to store properties known to engine & keep track of enabled state +struct Property { + std::string Name; + bool IsEnabled; + Property(const std::string& name, bool isEnabled = false) + : Name(name) + , IsEnabled(isEnabled) + { } +}; + +inline bool operator< (const Property& lhs, const Property& rhs) { return lhs.Name < rhs.Name; } +inline bool operator== (const Property& lhs, const Property& rhs) { return lhs.Name == rhs.Name; } + +class FilterEngine { + + // 'filter set' methods + public: + // creates a new filter set, returns true if created, false if error or already exists + static bool addFilter(const std::string& filterName); + + // return list of current filter names + static const std::vector filterNames(void); + + // 'property' methods + public: + + // add a new known property (& type) to engine + static bool addProperty(const std::string& propertyName); + + // sets property filter (value, type) for propertyName, on a particular filter set + // setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL) + template + static bool setProperty(const std::string& filterName, + const std::string& propertyName, + const T& value, + const PropertyFilterValue::ValueCompareType& type = PropertyFilterValue::EXACT); + + // returns list of all properties known by FilterEngine ( any created using addProperty() ) + static const std::vector allPropertyNames(void); + + // returns list of property names that are 'enabled' ( only those touched by setProperty() ) + static const std::vector enabledPropertyNames(void); + + // token parsing (for property filter generation) + public: + template + static bool parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type); + + // query evaluation + public: + // returns true if query passes all filters on 'propertyName' + template + static bool check(const std::string& propertyName, const T& query); + + // data members + private: + // all 'filter sets' + static FilterMap m_filters; + + // all known properties + static std::vector m_properties; + + // token-parsing constants + static const int NOT_CHAR = (int)'!'; + static const int EQUAL_CHAR = (int)'='; + static const int GREATER_THAN_CHAR = (int)'>'; + static const int LESS_THAN_CHAR = (int)'<'; + static const int WILDCARD_CHAR = (int)'*'; +}; + +// ------------------------------------------------------------------- +// template methods + +// checks a query against a filter (value, compare type) +template +bool PropertyFilterValue::check(const T& query) const { + + // ensure filter value & query are same type + if ( !Value.is_type() ) { + std::cerr << "Cannot compare different types!" << std::endl; + return false; + } + + // string matching + if ( Value.is_type() ) { + std::cerr << "Cannot compare different types - query is a string!" << std::endl; + return false; + } + + // numeric matching based on our filter type + switch ( Type ) { + case ( PropertyFilterValue::EXACT) : return ( query == Value.get() ); + case ( PropertyFilterValue::GREATER_THAN) : return ( query > Value.get() ); + case ( PropertyFilterValue::GREATER_THAN_EQUAL) : return ( query >= Value.get() ); + case ( PropertyFilterValue::LESS_THAN) : return ( query < Value.get() ); + case ( PropertyFilterValue::LESS_THAN_EQUAL) : return ( query <= Value.get() ); + case ( PropertyFilterValue::NOT) : return ( query != Value.get() ); + default : BAMTOOLS_ASSERT_UNREACHABLE; + } + return false; +} + +template +bool PropertyFilter::check(const std::string& propertyName, const T& query) const { + + // if propertyName found for this filter, + PropertyMap::const_iterator propIter = Properties.find(propertyName); + if ( propIter != Properties.end() ) { + const PropertyFilterValue& filterValue = (*propIter).second; + + // check + switch ( Type ) { + case ( PropertyFilter::EXACT ) : return filterValue.check(query); + case ( PropertyFilter::NOT ) : return !filterValue.check(query); + case ( PropertyFilter::AND ) : + case ( PropertyFilter::OR ) : BAMTOOLS_ASSERT_MESSAGE(false, "Cannot use a binary compare operator on 1 value"); + default : BAMTOOLS_ASSERT_UNREACHABLE; + } + return false; // unreachable + } + + // property unknown to this filter + else return true; +} + +template +bool FilterEngine::parseToken(const std::string& token, T& value, PropertyFilterValue::ValueCompareType& type) { + + // skip if token is empty + if ( token.empty() ) return false; + + // will store token after special chars are removed + std::string strippedToken; + + // if only single character + if ( token.length() == 1 ) { + strippedToken = token; + type = PropertyFilterValue::EXACT; + } + + // more than one character, check for special chars + else { + const int firstChar = (int)token.at(0); + + switch ( (int)firstChar ) { + + case ( (int)FilterEngine::NOT_CHAR ) : + + strippedToken = token.substr(1); + type = PropertyFilterValue::NOT; + + break; + + case ( (int)FilterEngine::GREATER_THAN_CHAR ) : + + // check for '>=' case + if ( token.at(1) == FilterEngine::EQUAL_CHAR ) { + if ( token.length() == 2 ) return false; + strippedToken = token.substr(2); + type = PropertyFilterValue::GREATER_THAN_EQUAL; + } + + // otherwise only '>' + else { + strippedToken = token.substr(1); + type = PropertyFilterValue::GREATER_THAN; + } + + break; + + case ( (int)FilterEngine::LESS_THAN_CHAR ) : + + // check for '<=' case + if ( token.at(1) == FilterEngine::EQUAL_CHAR ) { + if ( token.length() == 2 ) return false; + strippedToken = token.substr(2); + type = PropertyFilterValue::LESS_THAN_EQUAL; + } + + // otherwise only '<' + else { + strippedToken = token.substr(1); + type = PropertyFilterValue::LESS_THAN; + } + + break; + + case ( (int)FilterEngine::WILDCARD_CHAR ) : + + // check for *str* case (CONTAINS) + if ( token.at( token.length() - 1 ) == FilterEngine::WILDCARD_CHAR ) { + if ( token.length() == 2 ) return false; + strippedToken = token.substr(1, token.length() - 2); + type = PropertyFilterValue::CONTAINS; + } + + // otherwise *str case (ENDS_WITH) + else { + strippedToken = token.substr(1); + type = PropertyFilterValue::ENDS_WITH; + } + + break; + + + default : + + // check for str* case (STARTS_WITH) + if ( token.at( token.length() - 1 ) == FilterEngine::WILDCARD_CHAR ) { + if ( token.length() == 2 ) return false; + strippedToken = token.substr(0, token.length() - 1); + type = PropertyFilterValue::STARTS_WITH; + } + + // otherwise EXACT + else { + strippedToken = token; + type = PropertyFilterValue::EXACT; + } + + break; + } + } + + // convert stripped token to value + std::stringstream stream(strippedToken); + if ( strippedToken == "true" || strippedToken == "false" ) + stream >> std::boolalpha >> value; + else + stream >> value; + + // check for valid CompareType on type T + Variant variantCheck = value; + + // if T is not string AND CompareType is for string values, return false + if ( !variantCheck.is_type() ) { + if ( type == PropertyFilterValue::CONTAINS || + type == PropertyFilterValue::ENDS_WITH || + type == PropertyFilterValue::STARTS_WITH ) + + return false; + } + + // return success + return true; +} + +// sets property filter (value, type) for propertyName, on a particular filter set +// setProperty("filter1", "mapQuality", 50, GREATER_THAN_EQUAL) +template +bool FilterEngine::setProperty(const std::string& filterName, + const std::string& propertyName, + const T& value, + const PropertyFilterValue::ValueCompareType& type) +{ + // lookup filter by name, return false if not found + FilterMap::iterator filterIter = m_filters.find(filterName); + if ( filterIter == m_filters.end() ) return false; + + // lookup property for filter, add new PropertyFilterValue if not found, modify if already exists + PropertyFilter& filter = (*filterIter).second; + PropertyMap::iterator propertyIter = filter.Properties.find(propertyName); + + bool success; + + // property not found for this filter, create new entry + if ( propertyIter == filter.Properties.end() ) + success = (filter.Properties.insert(std::make_pair(propertyName, PropertyFilterValue(value, type)))).second; + + // property already exists, modify + else { + PropertyFilterValue& filterValue = (*propertyIter).second; + filterValue.Value = value; + filterValue.Type = type; + success = true; + } + + // if error so far, return false + if ( !success ) return false; + + // -------------------------------------------- + // otherwise, set Property.IsEnabled to true + + // lookup property + std::vector::iterator knownPropertyIter = std::find( m_properties.begin(), m_properties.end(), propertyName); + + // if not found, create a new (enabled) entry (& re-sort list) + if ( knownPropertyIter == m_properties.end() ) { + m_properties.push_back( Property(propertyName, true) ); + std::sort( m_properties.begin(), m_properties.end() ); + } + + // property already known, set as enabled + else + (*knownPropertyIter).IsEnabled = true; + + // return success + return true; +} + +// returns false if query does not pass any filters on 'propertyName' +// returns true if property unknown (i.e. nothing has been set for this property... so query is considered to pass filter) +template +bool FilterEngine::check(const std::string& propertyName, const T& query) { + + // check enabled properties list + // return true if no properties enabled at all OR if property is unknown to FilterEngine + const std::vector enabledProperties = enabledPropertyNames(); + if ( enabledProperties.empty() ) return true; + const bool found = std::binary_search( enabledProperties.begin(), enabledProperties.end(), propertyName ); + if ( !found ) return true; + + // iterate over all filters in FilterEngine + FilterMap::const_iterator filterIter = m_filters.begin(); + FilterMap::const_iterator filterEnd = m_filters.end(); + for ( ; filterIter != filterEnd; ++filterIter ) { + + // check query against this filter + const PropertyFilter& filter = (*filterIter).second; + if ( filter.check(propertyName, query) ) return true; + } + + // query passes none of the filters with current property enabled + return false; +} + +} // namespace BamTools + +#endif // BAMTOOLS_FILTER_ENGINE_H \ No newline at end of file diff --git a/src/utils/bamtools_options.cpp b/src/utils/bamtools_options.cpp index 931fbd8..71aa2eb 100644 --- a/src/utils/bamtools_options.cpp +++ b/src/utils/bamtools_options.cpp @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 2 June 2010 +// Last modified: 7 September 2010 // --------------------------------------------------------------------------- // Parses command line arguments and creates a help menu // --------------------------------------------------------------------------- @@ -11,8 +11,7 @@ // The Mosaik suite's command line parser class: COptions // (c) 2006 - 2009 Michael Str�mberg // Marth Lab, Department of Biology, Boston College -// Dual licenced under the GNU General Public License 2.0+ license or as -// a commercial license with the Marth Lab. +// Re-licensed under MIT License with author's permission. // // * Modified slightly to fit BamTools, otherwise code is same. // * (BamTools namespace, added stdin/stdout) (DB) diff --git a/src/utils/bamtools_options.h b/src/utils/bamtools_options.h index 54c4764..668ac52 100644 --- a/src/utils/bamtools_options.h +++ b/src/utils/bamtools_options.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 1 June 2010 +// Last modified: 7 September 2010 // --------------------------------------------------------------------------- // Parses command line arguments and creates a help menu // --------------------------------------------------------------------------- @@ -11,10 +11,10 @@ // The Mosaik suite's command line parser class: COptions // (c) 2006 - 2009 Michael Str�mberg // Marth Lab, Department of Biology, Boston College -// Dual licenced under the GNU General Public License 2.0+ license or as -// a commercial license with the Marth Lab. +// Re-licensed under MIT License with author's permission. // -// * Modified to fit BamTools code-style, otherwise code is same. (DB) +// * Modified slightly to fit BamTools, otherwise code is same. +// * (BamTools namespace, added stdin/stdout) (DB) // *************************************************************************** #ifndef BAMTOOLS_OPTIONS_H diff --git a/src/utils/bamtools_utilities.cpp b/src/utils/bamtools_utilities.cpp index 7772587..b39e60d 100644 --- a/src/utils/bamtools_utilities.cpp +++ b/src/utils/bamtools_utilities.cpp @@ -3,20 +3,26 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 2 June 2010 +// Last modified: 3 September 2010 // --------------------------------------------------------------------------- // Provides general utilities used by BamTools sub-tools. // *************************************************************************** #include -#include +#include +#include #include "bamtools_utilities.h" #include "BamReader.h" #include "BamMultiReader.h" - using namespace std; using namespace BamTools; +// check if a file exists +bool Utilities::FileExists(const std::string& filename) { + ifstream f(filename.c_str(), ifstream::in); + return !f.fail(); +} + // Parses a region string, does validation (valid ID's, positions), stores in Region struct // Returns success (true/false) bool Utilities::ParseRegionString(const std::string& regionString, const BamReader& reader, BamRegion& region) { @@ -232,10 +238,3 @@ bool Utilities::ParseRegionString(const std::string& regionString, const BamMult return true; } - -bool Utilities::FileExists(const std::string& filename) { - - struct stat fileInfo; - return stat(filename.c_str(), &fileInfo) == 0; - -} diff --git a/src/utils/bamtools_utilities.h b/src/utils/bamtools_utilities.h index 4f6928b..f72897a 100644 --- a/src/utils/bamtools_utilities.h +++ b/src/utils/bamtools_utilities.h @@ -3,7 +3,7 @@ // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- -// Last modified: 2 June 2010 +// Last modified: 30 August 2010 // --------------------------------------------------------------------------- // Provides general utilities used by BamTools sub-tools. // *************************************************************************** @@ -11,9 +11,14 @@ #ifndef BAMTOOLS_UTILITIES_H #define BAMTOOLS_UTILITIES_H +#include +#include #include #include "BamAux.h" +#define BAMTOOLS_ASSERT_UNREACHABLE assert( false ) +#define BAMTOOLS_ASSERT_MESSAGE( condition, message ) if (!( condition )) throw std::runtime_error( message ); + namespace BamTools { class BamReader; @@ -21,15 +26,17 @@ class BamMultiReader; class Utilities { - public: + public: + // check if a file exists + static bool FileExists(const std::string& fname); + // Parses a region string, uses reader to do validation (valid ID's, positions), stores in Region struct // Returns success (true/false) static bool ParseRegionString(const std::string& regionString, const BamReader& reader, BamRegion& region); // Same as above, but accepts a BamMultiReader static bool ParseRegionString(const std::string& regionString, const BamMultiReader& reader, BamRegion& region); - // check if a file exists - static bool FileExists(const std::string& fname); + }; } // namespace BamTools diff --git a/src/utils/bamtools_variant.h b/src/utils/bamtools_variant.h index 4927de9..93ef8ab 100644 --- a/src/utils/bamtools_variant.h +++ b/src/utils/bamtools_variant.h @@ -27,34 +27,35 @@ namespace BamTools { class Variant { public: - Variant(void) : data (NULL) { } + Variant(void) : data(NULL) { } Variant(const Variant& other) { - if(other.data != NULL) + if ( other.data != NULL ) other.data->AddRef(); data = other.data; } ~Variant(void) { - if(data != NULL) data->Release(); + if ( data != NULL ) + data->Release(); } // NOTE: This code takes care of self-assignment. // DO NOT CHANGE THE ORDER of the statements. - Variant& operator=(const Variant& rhs) { - if(rhs.data != NULL) + Variant& operator= (const Variant& rhs) { + if ( rhs.data != NULL ) rhs.data->AddRef(); - if(data != NULL) + if ( data != NULL ) data->Release(); data = rhs.data; - return * this; + return *this; } // This member template constructor allows you to // instance a variant_t object with a value of any type. template - Variant(T v) - : data(new Impl(v)) + Variant(T v) + : data(new Impl(v)) { data->AddRef(); } @@ -88,13 +89,13 @@ class Variant { private: struct ImplBase { - ImplBase() : refs(0) {} - virtual ~ImplBase() {} + ImplBase() : refs(0) { } + virtual ~ImplBase(void) { } - void AddRef(void) { refs ++; } + void AddRef(void) { ++refs; } void Release(void) { --refs; - if(refs == 0) delete this; + if ( refs == 0 ) delete this; } size_t refs; @@ -102,7 +103,7 @@ class Variant { template struct Impl : ImplBase { - Impl(T v) : data (v) { } + Impl(T v) : data(v) { } ~Impl(void) { } T data; }; @@ -113,9 +114,9 @@ class Variant { static Impl* CastFromBase(ImplBase* v) { // This upcast will fail if T is other than the T used // with the constructor of variant_t. - Impl* p = dynamic_cast*> (v); - if (p == NULL) - throw std::invalid_argument(typeid(T).name()+std::string(" is not a valid type")); + Impl* p = dynamic_cast< Impl* > (v); + if ( p == NULL ) + throw std::invalid_argument( typeid(T).name() + std::string(" is not a valid type") ); return p; }