1 // Copyright 2007-2010 Baptiste Lepilleur
2 // Distributed under MIT license, or public domain if desired and
3 // recognized in your jurisdiction.
4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 #include "json_reader.h"
7 #include "json_value.h"
16 #if _MSC_VER >= 1400 // VC++ 8.0
17 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated.
22 // Implementation of class Features
23 // ////////////////////////////////
26 : allowComments_( true )
27 , strictRoot_( false )
40 Features::strictMode()
43 features.allowComments_ = false;
44 features.strictRoot_ = true;
48 // Implementation of class Reader
49 // ////////////////////////////////
53 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 )
55 return c == c1 || c == c2 || c == c3 || c == c4;
59 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 )
61 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
66 containsNewLine( Reader::Location begin,
67 Reader::Location end )
69 for ( ;begin < end; ++begin )
70 if ( *begin == '\n' || *begin == '\r' )
77 // //////////////////////////////////////////////////////////////////
80 : features_( Features::all() )
85 Reader::Reader( const Features &features )
86 : features_( features )
92 Reader::parse( const std::string &document,
94 bool collectComments )
97 const char *begin = document_.c_str();
98 const char *end = begin + document_.length();
99 return parse( begin, end, root, collectComments );
104 Reader::parse( std::istream& sin,
106 bool collectComments )
108 //std::istream_iterator<char> begin(sin);
109 //std::istream_iterator<char> end;
110 // Those would allow streamed input from a file, if parse() were a
111 // template function.
113 // Since std::string is reference-counted, this at least does not
114 // create an extra copy.
116 std::getline(sin, doc, (char)EOF);
117 return parse( doc, root, collectComments );
121 Reader::parse( const char *beginDoc, const char *endDoc,
123 bool collectComments )
125 if ( !features_.allowComments_ )
127 collectComments = false;
132 collectComments_ = collectComments;
136 commentsBefore_ = "";
138 while ( !nodes_.empty() )
140 nodes_.push( &root );
142 bool successful = readValue();
144 skipCommentTokens( token );
145 if ( collectComments_ && !commentsBefore_.empty() )
146 root.setComment( commentsBefore_, commentAfter );
147 if ( features_.strictRoot_ )
149 if ( !root.isArray() && !root.isObject() )
151 // Set error location to start of doc, ideally should be first token found in doc
152 token.type_ = tokenError;
153 token.start_ = beginDoc;
155 addError( "A valid JSON document must be either an array or an object value.",
168 skipCommentTokens( token );
169 bool successful = true;
171 if ( collectComments_ && !commentsBefore_.empty() )
173 currentValue().setComment( commentsBefore_, commentBefore );
174 commentsBefore_ = "";
178 switch ( token.type_ )
180 case tokenObjectBegin:
181 successful = readObject( token );
183 case tokenArrayBegin:
184 successful = readArray( token );
187 successful = decodeNumber( token );
190 successful = decodeString( token );
193 currentValue() = true;
196 currentValue() = false;
199 currentValue() = Value();
202 return addError( "Syntax error: value, object or array expected.", token );
205 if ( collectComments_ )
207 lastValueEnd_ = current_;
208 lastValue_ = ¤tValue();
216 Reader::skipCommentTokens( Token &token )
218 if ( features_.allowComments_ )
224 while ( token.type_ == tokenComment );
234 Reader::expectToken( TokenType type, Token &token, const char *message )
237 if ( token.type_ != type )
238 return addError( message, token );
244 Reader::readToken( Token &token )
247 token.start_ = current_;
248 Char c = getNextChar();
253 token.type_ = tokenObjectBegin;
256 token.type_ = tokenObjectEnd;
259 token.type_ = tokenArrayBegin;
262 token.type_ = tokenArrayEnd;
265 token.type_ = tokenString;
269 token.type_ = tokenComment;
283 token.type_ = tokenNumber;
287 token.type_ = tokenTrue;
288 ok = match( "rue", 3 );
291 token.type_ = tokenFalse;
292 ok = match( "alse", 4 );
295 token.type_ = tokenNull;
296 ok = match( "ull", 3 );
299 token.type_ = tokenArraySeparator;
302 token.type_ = tokenMemberSeparator;
305 token.type_ = tokenEndOfStream;
312 token.type_ = tokenError;
313 token.end_ = current_;
321 while ( current_ != end_ )
324 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' )
333 Reader::match( Location pattern,
336 if ( end_ - current_ < patternLength )
338 int index = patternLength;
340 if ( current_[index] != pattern[index] )
342 current_ += patternLength;
348 Reader::readComment()
350 Location commentBegin = current_ - 1;
351 Char c = getNextChar();
352 bool successful = false;
354 successful = readCStyleComment();
356 successful = readCppStyleComment();
360 if ( collectComments_ )
362 CommentPlacement placement = commentBefore;
363 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) )
365 if ( c != '*' || !containsNewLine( commentBegin, current_ ) )
366 placement = commentAfterOnSameLine;
369 addComment( commentBegin, current_, placement );
376 Reader::addComment( Location begin,
378 CommentPlacement placement )
380 assert( collectComments_ );
381 if ( placement == commentAfterOnSameLine )
383 assert( lastValue_ != 0 );
384 lastValue_->setComment( std::string( begin, end ), placement );
388 if ( !commentsBefore_.empty() )
389 commentsBefore_ += "\n";
390 commentsBefore_ += std::string( begin, end );
396 Reader::readCStyleComment()
398 while ( current_ != end_ )
400 Char c = getNextChar();
401 if ( c == '*' && *current_ == '/' )
404 return getNextChar() == '/';
409 Reader::readCppStyleComment()
411 while ( current_ != end_ )
413 Char c = getNextChar();
414 if ( c == '\r' || c == '\n' )
424 while ( current_ != end_ )
426 if ( !(*current_ >= '0' && *current_ <= '9') &&
427 !in( *current_, '.', 'e', 'E', '+', '-' ) )
437 while ( current_ != end_ )
450 Reader::readObject( Token &tokenStart )
454 currentValue() = Value( objectValue );
455 while ( readToken( tokenName ) )
457 bool initialTokenOk = true;
458 while ( tokenName.type_ == tokenComment && initialTokenOk )
459 initialTokenOk = readToken( tokenName );
460 if ( !initialTokenOk )
462 if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object
464 if ( tokenName.type_ != tokenString )
468 if ( !decodeString( tokenName, name ) )
469 return recoverFromError( tokenObjectEnd );
472 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator )
474 return addErrorAndRecover( "Missing ':' after object member name",
478 Value &value = currentValue()[ name ];
479 nodes_.push( &value );
480 bool ok = readValue();
482 if ( !ok ) // error already set
483 return recoverFromError( tokenObjectEnd );
486 if ( !readToken( comma )
487 || ( comma.type_ != tokenObjectEnd &&
488 comma.type_ != tokenArraySeparator &&
489 comma.type_ != tokenComment ) )
491 return addErrorAndRecover( "Missing ',' or '}' in object declaration",
495 bool finalizeTokenOk = true;
496 while ( comma.type_ == tokenComment &&
498 finalizeTokenOk = readToken( comma );
499 if ( comma.type_ == tokenObjectEnd )
502 return addErrorAndRecover( "Missing '}' or object member name",
509 Reader::readArray( Token &tokenStart )
511 currentValue() = Value( arrayValue );
513 if ( *current_ == ']' ) // empty array
516 readToken( endArray );
522 Value &value = currentValue()[ index++ ];
523 nodes_.push( &value );
524 bool ok = readValue();
526 if ( !ok ) // error already set
527 return recoverFromError( tokenArrayEnd );
530 // Accept Comment after last item in the array.
531 ok = readToken( token );
532 while ( token.type_ == tokenComment && ok )
534 ok = readToken( token );
536 bool badTokenType = ( token.type_ == tokenArraySeparator &&
537 token.type_ == tokenArrayEnd );
538 if ( !ok || badTokenType )
540 return addErrorAndRecover( "Missing ',' or ']' in array declaration",
544 if ( token.type_ == tokenArrayEnd )
552 Reader::decodeNumber( Token &token )
554 bool isDouble = false;
555 for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
558 || in( *inspect, '.', 'e', 'E', '+' )
559 || ( *inspect == '-' && inspect != token.start_ );
562 return decodeDouble( token );
563 // Attempts to parse the number as an integer. If the number is
564 // larger than the maximum supported value of an integer then
565 // we decode the number as a double.
566 Location current = token.start_;
567 bool isNegative = *current == '-';
570 Value::UInt maxIntegerValue = isNegative ? Value::UInt(-Value::minInt)
572 Value::UInt threshold = maxIntegerValue / 10;
573 Value::UInt lastDigitThreshold = maxIntegerValue % 10;
574 assert( lastDigitThreshold >=0 && lastDigitThreshold <= 9 );
575 Value::UInt value = 0;
576 while ( current < token.end_ )
579 if ( c < '0' || c > '9' )
580 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
581 Value::UInt digit(c - '0');
582 if ( value >= threshold )
584 // If the current digit is not the last one, or if it is
585 // greater than the last digit of the maximum integer value,
586 // the parse the number as a double.
587 if ( current != token.end_ || digit > lastDigitThreshold )
589 return decodeDouble( token );
592 value = value * 10 + digit;
595 currentValue() = -Value::Int( value );
596 else if ( value <= Value::UInt(Value::maxInt) )
597 currentValue() = Value::Int( value );
599 currentValue() = value;
605 Reader::decodeDouble( Token &token )
608 const int bufferSize = 32;
610 int length = int(token.end_ - token.start_);
611 if ( length <= bufferSize )
613 Char buffer[bufferSize];
614 memcpy( buffer, token.start_, length );
616 count = sscanf( buffer, "%lf", &value );
620 std::string buffer( token.start_, token.end_ );
621 count = sscanf( buffer.c_str(), "%lf", &value );
625 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
626 currentValue() = value;
632 Reader::decodeString( Token &token )
635 if ( !decodeString( token, decoded ) )
637 currentValue() = decoded;
643 Reader::decodeString( Token &token, std::string &decoded )
645 decoded.reserve( token.end_ - token.start_ - 2 );
646 Location current = token.start_ + 1; // skip '"'
647 Location end = token.end_ - 1; // do not include '"'
648 while ( current != end )
653 else if ( c == '\\' )
655 if ( current == end )
656 return addError( "Empty escape sequence in string", token, current );
657 Char escape = *current++;
660 case '"': decoded += '"'; break;
661 case '/': decoded += '/'; break;
662 case '\\': decoded += '\\'; break;
663 case 'b': decoded += '\b'; break;
664 case 'f': decoded += '\f'; break;
665 case 'n': decoded += '\n'; break;
666 case 'r': decoded += '\r'; break;
667 case 't': decoded += '\t'; break;
670 unsigned int unicode;
671 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
673 decoded += codePointToUTF8(unicode);
677 return addError( "Bad escape sequence in string", token, current );
689 Reader::decodeUnicodeCodePoint( Token &token,
692 unsigned int &unicode )
695 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
697 if (unicode >= 0xD800 && unicode <= 0xDBFF)
700 if (end - current < 6)
701 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
702 unsigned int surrogatePair;
703 if (*(current++) == '\\' && *(current++)== 'u')
705 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
707 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
713 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
719 Reader::decodeUnicodeEscapeSequence( Token &token,
722 unsigned int &unicode )
724 if ( end - current < 4 )
725 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
727 for ( int index =0; index < 4; ++index )
731 if ( c >= '0' && c <= '9' )
733 else if ( c >= 'a' && c <= 'f' )
734 unicode += c - 'a' + 10;
735 else if ( c >= 'A' && c <= 'F' )
736 unicode += c - 'A' + 10;
738 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
745 Reader::addError( const std::string &message,
751 info.message_ = message;
753 errors_.push_back( info );
759 Reader::recoverFromError( TokenType skipUntilToken )
761 int errorCount = int(errors_.size());
765 if ( !readToken(skip) )
766 errors_.resize( errorCount ); // discard errors caused by recovery
767 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream )
770 errors_.resize( errorCount );
776 Reader::addErrorAndRecover( const std::string &message,
778 TokenType skipUntilToken )
780 addError( message, token );
781 return recoverFromError( skipUntilToken );
786 Reader::currentValue()
788 return *(nodes_.top());
793 Reader::getNextChar()
795 if ( current_ == end_ )
802 Reader::getLocationLineAndColumn( Location location,
806 Location current = begin_;
807 Location lastLineStart = current;
809 while ( current < location && current != end_ )
814 if ( *current == '\n' )
816 lastLineStart = current;
819 else if ( c == '\n' )
821 lastLineStart = current;
825 // column & line start at 1
826 column = int(location - lastLineStart) + 1;
832 Reader::getLocationLineAndColumn( Location location ) const
835 getLocationLineAndColumn( location, line, column );
836 char buffer[18+16+16+1];
837 sprintf( buffer, "Line %d, Column %d", line, column );
843 Reader::getFormatedErrorMessages() const
845 std::string formattedMessage;
846 for ( Errors::const_iterator itError = errors_.begin();
847 itError != errors_.end();
850 const ErrorInfo &error = *itError;
851 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
852 formattedMessage += " " + error.message_ + "\n";
854 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
856 return formattedMessage;
860 std::istream& operator>>( std::istream &sin, Value &root )
863 bool ok = reader.parse(sin, root, true);
865 if (!ok) throw std::runtime_error(reader.getFormatedErrorMessages());