1 /************************************************************************
\r
3 ** Copyright (C) 2010 Strahinja Markovic
\r
5 ** This file is part of FlightCrew.
\r
7 ** FlightCrew is free software: you can redistribute it and/or modify
\r
8 ** it under the terms of the GNU Lesser General Public License as published
\r
9 ** by the Free Software Foundation, either version 3 of the License, or
\r
10 ** (at your option) any later version.
\r
12 ** FlightCrew is distributed in the hope that it will be useful,
\r
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
15 ** GNU Lesser General Public License for more details.
\r
17 ** You should have received a copy of the GNU Lesser General Public License
\r
18 ** along with FlightCrew. If not, see <http://www.gnu.org/licenses/>.
\r
20 *************************************************************************/
\r
25 #include <zipios++/zipextraction.h>
\r
26 #include "Misc/TempFolder.h"
\r
27 #include "Misc/Utilities.h"
\r
28 #include "Validators/Xml/WellFormedXml.h"
\r
29 #include <XmlUtils.h>
\r
30 #include <XercesInit.h>
\r
31 #include <FromXercesStringConverter.h>
\r
32 #include <ToXercesStringConverter.h>
\r
33 #include <xercesc/util/XMLUri.hpp>
\r
34 #include "flightcrew_p.h"
\r
35 #include "Validators/Ocf/ContainerSatisfiesSchema.h"
\r
36 #include "Validators/Ocf/EncryptionSatisfiesSchema.h"
\r
37 #include "Validators/Ocf/SignaturesSatisfiesSchema.h"
\r
38 #include "Validators/Ocf/ContainerListsOpf.h"
\r
39 #include "Validators/Ocf/ContainerListedOpfPresent.h"
\r
40 #include "Validators/Ocf/MimetypeBytesValid.h"
\r
41 #include "Validators/Xml/UsesUnicode.h"
\r
43 namespace FlightCrew
\r
46 const std::string CONTAINER_XML_NAMESPACE = "urn:oasis:names:tc:opendocument:xmlns:container";
\r
49 std::vector< Result > ValidateMetaInf( const fs::path &path_to_meta_inf )
\r
51 fs::path container_xml( path_to_meta_inf / "container.xml" );
\r
52 fs::path signatures_xml( path_to_meta_inf / "signatures.xml" );
\r
53 fs::path manifest_xml( path_to_meta_inf / "manifest.xml" );
\r
54 fs::path rights_xml( path_to_meta_inf / "rights.xml" );
\r
55 fs::path metadata_xml( path_to_meta_inf / "metadata.xml" );
\r
56 fs::path encryption_xml( path_to_meta_inf / "encryption.xml" );
\r
58 std::vector< Result > results;
\r
60 if ( fs::exists( container_xml ) )
\r
62 Util::Extend( results, ContainerSatisfiesSchema() .ValidateFile( container_xml ) );
\r
63 Util::Extend( results, ContainerListsOpf() .ValidateFile( container_xml ) );
\r
64 Util::Extend( results, ContainerListedOpfPresent().ValidateFile( container_xml ) );
\r
69 results.push_back( Result( ERROR_EPUB_NO_CONTAINER_XML ) );
\r
72 if ( fs::exists( encryption_xml ) )
\r
74 Util::Extend( results, EncryptionSatisfiesSchema().ValidateFile( encryption_xml ) );
\r
76 if ( fs::exists( signatures_xml ) )
\r
78 Util::Extend( results, SignaturesSatisfiesSchema().ValidateFile( signatures_xml ) );
\r
80 std::vector< fs::path > all_files;
\r
81 all_files.push_back( container_xml );
\r
82 all_files.push_back( signatures_xml );
\r
83 all_files.push_back( encryption_xml );
\r
84 all_files.push_back( manifest_xml );
\r
85 all_files.push_back( rights_xml );
\r
86 all_files.push_back( metadata_xml );
\r
88 foreach( fs::path file, all_files )
\r
90 if ( fs::exists( file ) )
\r
92 Util::Extend( results, UsesUnicode().ValidateFile( file ) );
\r
95 // i starts at 3 because we already (implicitly) checked well-formedness
\r
96 // for container.xml, signatures.xml and encryption.xml so
\r
97 // we don't want to check it again.
\r
98 for ( uint i = 3; i < all_files.size(); ++i )
\r
100 if ( fs::exists( all_files[ i ] ) )
\r
102 Util::Extend( results, WellFormedXml().ValidateFile( all_files[ i ] ) );
\r
105 // There are some possible duplicates
\r
106 Util::RemoveDuplicates( results );
\r
111 fs::path GetRelativePathToNcx( const xc::DOMDocument &opf )
\r
113 std::vector< xc::DOMElement* > items = xe::GetElementsByQName(
\r
114 opf, QName( "item", OPF_XML_NAMESPACE ) );
\r
116 foreach( xc::DOMElement* item, items )
\r
118 std::string href = fromX( item->getAttribute( toX( "href" ) ) );
\r
119 std::string media_type = fromX( item->getAttribute( toX( "media-type" ) ) );
\r
121 if ( xc::XMLUri::isValidURI( true, toX( href ) ) &&
\r
122 media_type == NCX_MIME )
\r
124 return Util::Utf8PathToBoostPath( Util::UrlDecode( href ) );
\r
132 std::vector< fs::path > GetRelativePathsToXhtmlDocuments( const xc::DOMDocument &opf )
\r
134 std::vector< xc::DOMElement* > items = xe::GetElementsByQName(
\r
135 opf, QName( "item", OPF_XML_NAMESPACE ) );
\r
137 std::vector< fs::path > paths;
\r
139 foreach( xc::DOMElement* item, items )
\r
141 std::string href = fromX( item->getAttribute( toX( "href" ) ) );
\r
142 std::string media_type = fromX( item->getAttribute( toX( "media-type" ) ) );
\r
144 if ( xc::XMLUri::isValidURI( true, toX( href ) ) &&
\r
145 ( media_type == XHTML_MIME || media_type == OEB_DOC_MIME ) )
\r
147 paths.push_back( Util::Utf8PathToBoostPath( Util::UrlDecode( href ) ) );
\r
155 std::vector< Result > DescendToOpf( const fs::path &path_to_opf )
\r
157 WellFormedXml wf_validator;
\r
159 // We can't continue if the OPF is not well-formed.
\r
160 // ValidateOpf will take care of returning any
\r
161 // validation results for the OPF
\r
162 if ( !wf_validator.ValidateFile( path_to_opf ).empty() )
\r
164 return std::vector< Result >();
\r
166 xc::DOMDocument& opf = wf_validator.GetDocument();
\r
167 std::vector< Result > results;
\r
169 fs::path opf_parent = path_to_opf.parent_path();
\r
170 fs::path rel_ncx_path = GetRelativePathToNcx( opf );
\r
171 fs::path full_ncx_path = opf_parent / GetRelativePathToNcx( opf );
\r
173 if ( !rel_ncx_path.empty() && fs::exists( full_ncx_path ) )
\r
175 Util::Extend( results, ValidateNcx( full_ncx_path ) );
\r
177 std::vector< fs::path > xhtml_paths = GetRelativePathsToXhtmlDocuments( opf );
\r
179 foreach( fs::path rel_xhtml_path, xhtml_paths )
\r
181 fs::path full_xhtml_path = opf_parent / rel_xhtml_path;
\r
183 if ( !rel_xhtml_path.empty() && fs::exists( full_xhtml_path ) )
\r
185 Util::Extend( results, ValidateXhtml( full_xhtml_path ) );
\r
192 fs::path GetRelativeOpfPath( const xc::DOMDocument &content_xml )
\r
194 std::vector< xc::DOMElement* > rootfiles = xe::GetElementsByQName(
\r
195 content_xml, QName( "rootfile", CONTAINER_XML_NAMESPACE ) );
\r
197 foreach( xc::DOMElement* rootfile, rootfiles )
\r
199 std::string full_path_attribute = fromX( rootfile->getAttribute( toX( "full-path" ) ) );
\r
200 std::string media_type = fromX( rootfile->getAttribute( toX( "media-type" ) ) );
\r
202 if ( media_type == OEBPS_MIME )
\r
204 return Util::Utf8PathToBoostPath( full_path_attribute );
\r
211 std::vector< Result > DescendToContentXml( const fs::path &path_to_content_xml )
\r
213 WellFormedXml wf_validator;
\r
215 // We can't continue if content.xml is not well-formed.
\r
216 // ValidateMetaInf will take care of returning any
\r
217 // validation results for content.xml
\r
218 if ( !wf_validator.ValidateFile( path_to_content_xml ).empty() )
\r
220 return std::vector< Result >();
\r
222 // The base path for the OPF is the publication root path
\r
223 fs::path root_path = path_to_content_xml.parent_path().parent_path();
\r
224 fs::path rel_opf_path = GetRelativeOpfPath( wf_validator.GetDocument() );
\r
225 fs::path full_opf_path = root_path / rel_opf_path;
\r
227 std::vector< Result > results;
\r
229 if ( !rel_opf_path.empty() && fs::exists( full_opf_path ) )
\r
231 Util::Extend( results, ValidateOpf( full_opf_path ) );
\r
232 Util::Extend( results, DescendToOpf( full_opf_path ) );
\r
238 void RemoveBasePathFromResultPaths( std::vector< Result > &results, const fs::path &basepath )
\r
240 std::string path_prefix = Util::BoostPathToUtf8Path( basepath );
\r
242 foreach( Result &result, results )
\r
244 std::string result_path = result.GetFilepath();
\r
246 if ( !result_path.empty() )
\r
248 std::string relative_path = boost::erase_first_copy( result_path, path_prefix );
\r
250 // We don't want it to look like an absolute path
\r
251 // because it's not.
\r
252 if ( boost::starts_with( relative_path, "/" ) )
\r
254 boost::erase_first( relative_path, "/" );
\r
256 result.SetFilepath( relative_path );
\r
262 void AddEpubFilenameToResultPaths( std::vector< Result > &results, const std::string &epub_name )
\r
264 foreach( Result &result, results )
\r
266 std::string result_path = result.GetFilepath();
\r
268 if ( !result_path.empty() )
\r
270 result.SetFilepath( epub_name + "/" + result_path );
\r
274 result.SetFilepath( epub_name );
\r
279 std::vector< Result > ValidateEpubRootFolder( const fs::path &root_folder_path )
\r
281 xe::XercesInit init;
\r
283 if ( !fs::exists( root_folder_path ) )
\r
285 boost_throw( FileDoesNotExistEx() << ei_FilePath( Util::BoostPathToUtf8Path( root_folder_path ) ) );
\r
287 std::vector< Result > results;
\r
288 Util::Extend( results, ValidateMetaInf( root_folder_path / "META-INF" ) );
\r
290 fs::path path_to_content_xml = root_folder_path / "META-INF/container.xml";
\r
292 if ( !fs::exists( path_to_content_xml ) )
\r
297 Util::Extend( results, DescendToContentXml( path_to_content_xml ) );
\r
299 RemoveBasePathFromResultPaths( results, root_folder_path );
\r
304 std::vector< Result > ValidateEpub( const fs::path &filepath )
\r
306 TempFolder temp_folder;
\r
308 std::vector< Result > results;
\r
312 zipios::ExtractZipToFolder( filepath, temp_folder.GetPath() );
\r
315 catch ( std::exception& exception )
\r
317 results.push_back( Result( ERROR_EPUB_NOT_VALID_ZIP_ARCHIVE )
\r
318 .SetCustomMessage( exception.what() ) );
\r
322 Util::Extend( results, MimetypeBytesValid().ValidateFile( filepath ) );
\r
323 RemoveBasePathFromResultPaths( results, temp_folder.GetPath() );
\r
325 Util::Extend( results, ValidateEpubRootFolder( temp_folder.GetPath() ) );
\r
326 AddEpubFilenameToResultPaths( results, Util::BoostPathToUtf8Path( filepath.filename() ) );
\r
330 } // namespace FlightCrew
\r