Skip to content
This repository has been archived by the owner on Aug 12, 2022. It is now read-only.

Commit

Permalink
Add support for old EPUB 2 metadata (dc-metadata and x-metadata tags)
Browse files Browse the repository at this point in the history
  • Loading branch information
mickael-menu committed Jul 31, 2020
1 parent 4eec6ac commit f744e6c
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 9 deletions.
22 changes: 15 additions & 7 deletions r2-streamer-swift/Parser/EPUB/EPUBMetadataParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,14 @@ final class EPUBMetadataParser: Loggable {

/// Parse and return the Epub unique identifier.
/// https://github.com/readium/architecture/blob/master/streamer/parser/metadata.md#identifier
private lazy var uniqueIdentifier: String? = metadataElement?
.firstChild(xpath:"dc:identifier[@id=/opf:package/@unique-identifier]")?
.stringValue
private lazy var uniqueIdentifier: String? =
dcElement(tag: "identifier[@id=/opf:package/@unique-identifier]")?
.stringValue

/// https://github.com/readium/architecture/blob/master/streamer/parser/metadata.md#publication-date
private lazy var publishedDate = metadataElement?
.firstChild(xpath: "dc:date[not(@opf:event) or @opf:event='publication']")?
.stringValue.dateFromISO8601
private lazy var publishedDate =
dcElement(tag: "date[not(@opf:event) or @opf:event='publication']")?
.stringValue.dateFromISO8601

/// Parse the modifiedDate (date of last modification of the EPUB).
/// https://github.com/readium/architecture/blob/master/streamer/parser/metadata.md#modification-date
Expand All @@ -210,7 +210,7 @@ final class EPUBMetadataParser: Loggable {
.first
}
let epub2Date = {
self.metadataElement?.firstChild(xpath: "dc:date[@opf:event='modification']")?
self.dcElement(tag: "date[@opf:event='modification']")?
.stringValue.dateFromISO8601
}
return epub3Date() ?? epub2Date()
Expand Down Expand Up @@ -479,4 +479,12 @@ final class EPUBMetadataParser: Loggable {
}
}

/// Returns the given `dc:` tag in the `metadata` element.
///
/// This looks under `metadata/dc-metadata` as well, to be compatible with old EPUB 2 files.
private func dcElement(tag: String) -> XMLElement? {
return metadataElement?
.firstChild(xpath:"(.|opf:dc-metadata)/dc:\(tag)")
}

}
5 changes: 3 additions & 2 deletions r2-streamer-swift/Parser/EPUB/OPFMeta.swift
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,10 @@ struct OPFMetaList {
let prefixes = OPFVocabulary.prefixes(in: document)
document.definePrefix("opf", forNamespace: "http://www.idpf.org/2007/opf")
document.definePrefix("dc", forNamespace: "http://purl.org/dc/elements/1.1/")

// Parses `<meta>` and `<dc:x>` tags in order of appearance.
self.metas = document.xpath("/opf:package/opf:metadata/opf:meta|/opf:package/opf:metadata/dc:*")
let root = "/opf:package/opf:metadata"
self.metas = document.xpath("\(root)/opf:meta|\(root)/dc:*|\(root)/opf:dc-metadata/dc:*|\(root)/opf:x-metadata/opf:meta")
.compactMap { meta in
if meta.tag == "meta" {
// EPUB 3
Expand Down
11 changes: 11 additions & 0 deletions r2-streamer-swiftTests/Parser/EPUB/EPUBMetadataParserTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,17 @@ class EPUBMetadataParserTests: XCTestCase {
))
}

/// Old EPUB 2 files sometimes contain the `dc` tags under `dc-metadata` and `x-metadata`.
/// See http://idpf.org/epub/20/spec/OPF_2.0_final_spec.html#Section2.2
func testParseUnderDCMetadataElement() throws {
let sut = try parseMetadata("dc-metadata")

XCTAssertEqual(sut.identifier, "urn:uuid:1a16ce38-82bd-4e9b-861e-773c2e787a50")
XCTAssertEqual(sut.title, "Alice's Adventures in Wonderland")
XCTAssertEqual(sut.modified, "2012-04-02T12:47:00Z".dateFromISO8601)
XCTAssertEqual(sut.authors, [Contributor(name: "Lewis Carroll")])
}

func testParseMainTitle() throws {
let sut = try parseMetadata("title-main")
XCTAssertEqual(sut.title, "Main title takes precedence")
Expand Down
25 changes: 25 additions & 0 deletions r2-streamer-swiftTests/Samples/OPF/dc-metadata.opf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<?xml version="1.0"?>
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="bookid" version="2.0">
<!--
Old EPUB 2 files sometimes contain the `dc` tags under `dc-metadata` and `x-metadata`.
See http://idpf.org/epub/20/spec/OPF_2.0_final_spec.html#Section2.2
-->
<metadata>
<dc-metadata xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<dc:title>Alice's Adventures in Wonderland</dc:title>
<dc:creator>Lewis Carroll</dc:creator>
<dc:identifier id="bookid">urn:uuid:1a16ce38-82bd-4e9b-861e-773c2e787a50</dc:identifier>
</dc-metadata>
<x-metadata>
<meta property="dcterms:modified">2012-04-02T12:47:00Z</meta>
</x-metadata>
</metadata>
<manifest>
<item id="titlepage" href="titlepage.xhtml" media-type="application/xhtml+xml" />
</manifest>
<spine>
<itemref idref="titlepage"/>
</spine>
</package>

0 comments on commit f744e6c

Please sign in to comment.