1 /************************************************************************
\r
3 ** Copyright (C) 2010 Strahinja Markovic
\r
5 ** This file is part of FlightCrew.
\r
7 ** FlightCrew is free software: you can redistribute it and/or modify
\r
8 ** it under the terms of the GNU Lesser General Public License as published
\r
9 ** by the Free Software Foundation, either version 3 of the License, or
\r
10 ** (at your option) any later version.
\r
12 ** FlightCrew is distributed in the hope that it will be useful,
\r
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
15 ** GNU Lesser General Public License for more details.
\r
17 ** You should have received a copy of the GNU Lesser General Public License
\r
18 ** along with FlightCrew. If not, see <http://www.gnu.org/licenses/>.
\r
20 *************************************************************************/
\r
25 #include "UsesUnicode.h"
\r
26 #include "Misc/Utilities.h"
\r
30 static const int NUM_PEEK_CHARS_FOR_XML_DECLARATION = 80;
\r
32 namespace FlightCrew
\r
35 std::vector< Result > UsesUnicode::ValidateFile( const fs::path &filepath )
\r
37 std::vector< Result > results;
\r
41 std::string inital_chars = Util::GetFirstNumCharsFromFile( filepath, NUM_PEEK_CHARS_FOR_XML_DECLARATION );
\r
43 if ( FileIsValidUtf8( filepath ) )
\r
45 if ( !FileDeclaresUtf8( inital_chars ) )
\r
47 results.push_back( Result( ERROR_XML_SPECIFIES_NEITHER_UTF8_NOR_UTF16 )
\r
49 .AddMessageArgument( GetDeclaredEncoding( inital_chars ) ) );
\r
52 // else everything ok
\r
57 if ( !inital_chars.empty() )
\r
61 if ( !FileDeclaresUtf16( inital_chars ) )
\r
63 results.push_back( Result( ERROR_XML_SPECIFIES_NEITHER_UTF8_NOR_UTF16 )
\r
65 .AddMessageArgument( GetDeclaredEncoding( inital_chars ) ) );
\r
68 // else everything ok
\r
73 results.push_back( Result( ERROR_XML_BYTESTREAM_NEITHER_UTF8_NOR_UTF16 )
\r
74 .SetErrorLine( 1 ) );
\r
81 catch ( ExceptionBase& )
\r
84 results.push_back( Result( UNABLE_TO_PERFORM_VALIDATION ) );
\r
87 return Util::AddPathToResults( results, filepath );
\r
91 bool UsesUnicode::FileIsValidUtf8( const fs::path &filepath )
\r
93 fs::ifstream file( filepath, std::ios::in | std::ios::binary );
\r
94 std::istreambuf_iterator<char> it( file.rdbuf() );
\r
95 std::istreambuf_iterator<char> eos;
\r
97 return utf8::is_valid( it, eos );
\r
101 bool UsesUnicode::FileDeclaresUtf8( const std::string &line )
\r
103 if ( HasXmlDeclaration( line ) )
\r
105 std::string encoding = boost::to_upper_copy( GetDeclaredEncoding( line ) );
\r
107 // Empty still counts as utf-8 as per spec
\r
108 if ( encoding.empty() || encoding == "UTF-8" )
\r
115 // No xml declaration means
\r
116 // UTF-8 according to the spec
\r
121 bool UsesUnicode::FileDeclaresUtf16( const std::string &line )
\r
123 if ( HasXmlDeclaration( line ) )
\r
125 if ( boost::to_upper_copy( GetDeclaredEncoding( line ) ) == "UTF-16" )
\r
136 bool UsesUnicode::HasXmlDeclaration( const std::string &line )
\r
138 return boost::contains( line, "<?" );
\r
142 std::string UsesUnicode::GetDeclaredEncoding( const std::string &line )
\r
144 boost::regex expression( "encoding\\s*=\\s*(?:\"|')([^\"']+)(?:\"|')" );
\r
145 boost::match_results< std::string::const_iterator > matches;
\r
147 // FIXME: return only when regex_search returns true
\r
148 boost::regex_search( line, matches, expression );
\r
149 return matches[ 1 ];
\r
152 } //namespace FlightCrew
\r