Skip to content

Commit

Permalink
Extract the table of contents from an LCP-protected PDF (#480)
Browse files Browse the repository at this point in the history
  • Loading branch information
mickael-menu authored Nov 18, 2024
1 parent 3ecc6fa commit 60aa620
Show file tree
Hide file tree
Showing 10 changed files with 165 additions and 22 deletions.
13 changes: 12 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,18 @@ All notable changes to this project will be documented in this file. Take a look

**Warning:** Features marked as *alpha* may change or be removed in a future release without notice. Use with caution.

<!-- ## [Unreleased] -->
## [Unreleased]

### Added

#### Shared

* `TableOfContentsService` can now be used to customize the computation of `publication.tableOfContents()`.

#### LCP

* The table of contents of an LCP-protected PDF is now extracted directly from the PDF if the `tableOfContents` property in `manifest.json` is empty.


## [3.0.0-alpha.3]

Expand Down
16 changes: 11 additions & 5 deletions Sources/Navigator/EPUB/EPUBNavigatorViewController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,9 @@ open class EPUBNavigatorViewController: UIViewController,
}

/// Mapping between reading order hrefs and the table of contents title.
private lazy var tableOfContentsTitleByHref: [AnyURL: String] = {
private lazy var tableOfContentsTitleByHref = memoize(computeTableOfContentsTitleByHref)

private func computeTableOfContentsTitleByHref() async -> [AnyURL: String] {
func fulfill(linkList: [Link]) -> [AnyURL: String] {
var result = [AnyURL: String]()

Expand All @@ -465,8 +467,12 @@ open class EPUBNavigatorViewController: UIViewController,
return result
}

return fulfill(linkList: publication.tableOfContents)
}()
guard let toc = try? await publication.tableOfContents().get() else {
return [:]
}

return fulfill(linkList: toc)
}

/// Goes to the next or previous page in the given scroll direction.
private func go(to direction: EPUBSpreadView.Direction, options: NavigatorGoOptions) async -> Bool {
Expand Down Expand Up @@ -642,8 +648,8 @@ open class EPUBNavigatorViewController: UIViewController,
{
// Gets the current locator from the positionList, and fill its missing data.
let positionIndex = Int(ceil(progression * Double(positionList.count - 1)))
return positionList[positionIndex].copy(
title: tableOfContentsTitleByHref[equivalent: href],
return await positionList[positionIndex].copy(
title: tableOfContentsTitleByHref()[equivalent: href],
locations: { $0.progression = progression }
)
} else {
Expand Down
4 changes: 1 addition & 3 deletions Sources/Shared/Publication/Publication.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import Foundation

/// Shared model for a Readium Publication.
public class Publication: Closeable, Loggable {
private var manifest: Manifest
public var manifest: Manifest
private let container: Container
private let services: [PublicationService]

Expand All @@ -20,8 +20,6 @@ public class Publication: Closeable, Loggable {
public var readingOrder: [Link] { manifest.readingOrder }
/// Identifies resources that are necessary for rendering the publication.
public var resources: [Link] { manifest.resources }
/// Identifies the collection that contains a table of contents.
public var tableOfContents: [Link] { manifest.tableOfContents }
public var subcollections: [String: [PublicationCollection]] { manifest.subcollections }

public init(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public class StringSearchService: SearchService {
return []
}

let title = publication.tableOfContents.titleMatchingHREF(link.href)
let title = await publication.tableOfContents().getOrNil()?.titleMatchingHREF(link.href)
resourceLocator = resourceLocator.copy(
title: Optional(title ?? link.title)
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//
// Copyright 2024 Readium Foundation. All rights reserved.
// Use of this source code is governed by the BSD-style license
// available in the top-level LICENSE file of the project.
//

import Foundation

public typealias TableOfContentsServiceFactory = (PublicationServiceContext) -> TableOfContentsService?

/// Returns or computes a table of contents for the publication.
public protocol TableOfContentsService: PublicationService {
func tableOfContents() async -> ReadResult<[Link]>
}

// MARK: Publication Helpers

public extension Publication {
/// Returns the table of contents for this publication.
func tableOfContents() async -> ReadResult<[Link]> {
if let service = findService(TableOfContentsService.self) {
return await service.tableOfContents()
} else {
return .success(manifest.tableOfContents)
}
}
}

// MARK: PublicationServicesBuilder Helpers

public extension PublicationServicesBuilder {
mutating func setTableOfContentsServiceFactory(_ factory: TableOfContentsServiceFactory?) {
if let factory = factory {
set(TableOfContentsService.self, factory)
} else {
remove(TableOfContentsService.self)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//
// Copyright 2024 Readium Foundation. All rights reserved.
// Use of this source code is governed by the BSD-style license
// available in the top-level LICENSE file of the project.
//

import Foundation
import ReadiumInternal
import ReadiumShared

/// This ``TableOfContentsService`` will load the table of contents of the
/// single PDF resource in an LCPDF package, if the table of contents is missing
/// from the `manifest.json` file.
final class LCPDFTableOfContentsService: TableOfContentsService, PDFPublicationService, Loggable {
private let manifest: Manifest
private let container: Container
var pdfFactory: PDFDocumentFactory

init(
manifest: Manifest,
container: Container,
pdfFactory: PDFDocumentFactory
) {
self.manifest = manifest
self.container = container
self.pdfFactory = pdfFactory
}

func tableOfContents() async -> ReadResult<[Link]> {
await _tableOfContents()
}

private lazy var _tableOfContents = memoize(makeTableOfContents)

private func makeTableOfContents() async -> ReadResult<[Link]> {
guard
manifest.tableOfContents.isEmpty,
manifest.readingOrder.count == 1,
let url = manifest.readingOrder.first?.url(),
let resource = container[url]
else {
return .success(manifest.tableOfContents)
}

do {
let toc = try await pdfFactory.open(resource: resource, at: url, password: nil).tableOfContents()
return .success(toc.linksWithDocumentHREF(url))
} catch {
return .failure(.decoding(error))
}
}

static func makeFactory(pdfFactory: PDFDocumentFactory) -> (PublicationServiceContext) -> LCPDFTableOfContentsService? {
{ context in
LCPDFTableOfContentsService(
manifest: context.manifest,
container: context.container,
pdfFactory: pdfFactory
)
}
}
}
1 change: 1 addition & 0 deletions Sources/Streamer/Parser/Readium/ReadiumWebPubParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ public class ReadiumWebPubParser: PublicationParser, Loggable {
$0.setLocatorServiceFactory(AudioLocatorService.makeFactory())

} else if manifest.conforms(to: .pdf), format.conformsTo(.lcp), let pdfFactory = pdfFactory {
$0.setTableOfContentsServiceFactory(LCPDFTableOfContentsService.makeFactory(pdfFactory: pdfFactory))
$0.setPositionsServiceFactory(LCPDFPositionsService.makeFactory(pdfFactory: pdfFactory))
}

Expand Down
3 changes: 3 additions & 0 deletions Support/Carthage/.xcodegen
Original file line number Diff line number Diff line change
Expand Up @@ -13841,6 +13841,8 @@
../../Sources/Shared/Publication/Services/Search
../../Sources/Shared/Publication/Services/Search/SearchService.swift
../../Sources/Shared/Publication/Services/Search/StringSearchService.swift
../../Sources/Shared/Publication/Services/Table Of Contents
../../Sources/Shared/Publication/Services/Table Of Contents/TableOfContentsService.swift
../../Sources/Shared/Publication/Subject.swift
../../Sources/Shared/Publication/User Settings
../../Sources/Shared/Publication/User Settings/UserProperties.swift
Expand Down Expand Up @@ -14011,6 +14013,7 @@
../../Sources/Streamer/Parser/PDF/PDFParser.swift
../../Sources/Streamer/Parser/PDF/Services
../../Sources/Streamer/Parser/PDF/Services/LCPDFPositionsService.swift
../../Sources/Streamer/Parser/PDF/Services/LCPDFTableOfContentsService.swift
../../Sources/Streamer/Parser/PDF/Services/PDFPositionsService.swift
../../Sources/Streamer/Parser/PublicationParser.swift
../../Sources/Streamer/Parser/Readium
Expand Down
16 changes: 16 additions & 0 deletions Support/Carthage/Readium.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
0A6BF62D6FE0C04DA8B8D3CA /* AnyURL.swift in Sources */ = {isa = PBXBuildFile; fileRef = AE350D88BC82408491D8B516 /* AnyURL.swift */; };
0AF2BBF12939AFBF6173E333 /* Observable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5BC6AE42A31D77B548CB0BB4 /* Observable.swift */; };
0B9AC6EF44DA518E9F37FB49 /* ContentService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 18E809378D79D09192A0AAE1 /* ContentService.swift */; };
0BFCDAEC82CFF09AFC53A5D0 /* LCPDFTableOfContentsService.swift in Sources */ = {isa = PBXBuildFile; fileRef = 94414130EC3731CD9920F27D /* LCPDFTableOfContentsService.swift */; };
0ECE94F27E005FC454EA9D12 /* DecorableNavigator.swift in Sources */ = {isa = PBXBuildFile; fileRef = 626CFFF131E0E840B76428F1 /* DecorableNavigator.swift */; };
0F1AAB56A6ADEDDE2AD7E41E /* Content.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1039900AC78465AD989D7464 /* Content.swift */; };
1004CE1C72C85CC3702C09C0 /* Asset.swift in Sources */ = {isa = PBXBuildFile; fileRef = AC811653B33761089E270C4A /* Asset.swift */; };
Expand Down Expand Up @@ -314,6 +315,7 @@
C9DAA3C193FA36B843113EC6 /* HTTPContainer.swift in Sources */ = {isa = PBXBuildFile; fileRef = C4BFD453E8BF6FA24F340EE0 /* HTTPContainer.swift */; };
C9FBD23E459FB395377E149E /* ReadiumWebPubParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = E6E97CCA91F910315C260373 /* ReadiumWebPubParser.swift */; };
CAEBD6BA3F2F88E8752CB987 /* KeyEvent.swift in Sources */ = {isa = PBXBuildFile; fileRef = 422C1DA91ED351C9ABA139DF /* KeyEvent.swift */; };
CB95F5EAA4D0DB5177FED4F7 /* TableOfContentsService.swift in Sources */ = {isa = PBXBuildFile; fileRef = C5BCDE636CED5B883CC5F2B4 /* TableOfContentsService.swift */; };
CC85122A71D3145940827338 /* Comparable.swift in Sources */ = {isa = PBXBuildFile; fileRef = F90C4D94134D9F741D38D8AA /* Comparable.swift */; };
CCAF8FB4DBD81448C99D589A /* Language.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BB152578CBA091A41A51B25 /* Language.swift */; };
CD0243B5EB8B408E34786214 /* ReadiumInternal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 42FD63C2720614E558522675 /* ReadiumInternal.framework */; };
Expand Down Expand Up @@ -688,6 +690,7 @@
925CDE3176715EBEBF40B21F /* GeneratedCoverService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeneratedCoverService.swift; sourceTree = "<group>"; };
93BF3947EBA8736BF20F36FB /* WebView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = WebView.swift; sourceTree = "<group>"; };
9407E818636BEA4550E57F57 /* ReadiumNavigator.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = ReadiumNavigator.framework; sourceTree = BUILT_PRODUCTS_DIR; };
94414130EC3731CD9920F27D /* LCPDFTableOfContentsService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LCPDFTableOfContentsService.swift; sourceTree = "<group>"; };
9627A9AFF7C08010248E1700 /* Publication+Deprecated.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Publication+Deprecated.swift"; sourceTree = "<group>"; };
968B4EB4AD29DFA430C8A563 /* LicenseDocument.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LicenseDocument.swift; sourceTree = "<group>"; };
97BC822B36D72EF548162129 /* ReadiumShared.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = ReadiumShared.framework; sourceTree = BUILT_PRODUCTS_DIR; };
Expand Down Expand Up @@ -764,6 +767,7 @@
C51C74A5990A3BA93B3DC587 /* ZIPArchiveOpener.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ZIPArchiveOpener.swift; sourceTree = "<group>"; };
C57EC6B0ADED2B0D395F2AEA /* ContentProtection.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentProtection.swift; sourceTree = "<group>"; };
C59803AADFCF32C93C9D9D29 /* ExplodedArchive.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ExplodedArchive.swift; sourceTree = "<group>"; };
C5BCDE636CED5B883CC5F2B4 /* TableOfContentsService.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TableOfContentsService.swift; sourceTree = "<group>"; };
C5E7CEDF6EA681FE8119791B /* Feed.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Feed.swift; sourceTree = "<group>"; };
C96FD34093B3C3E83827B70C /* FileSystemError.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FileSystemError.swift; sourceTree = "<group>"; };
CAD79372361D085CA0500CF4 /* Properties+OPDS.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Properties+OPDS.swift"; sourceTree = "<group>"; };
Expand Down Expand Up @@ -1085,6 +1089,7 @@
isa = PBXGroup;
children = (
47B9196192A22B8AB80E6B2F /* LCPDFPositionsService.swift */,
94414130EC3731CD9920F27D /* LCPDFTableOfContentsService.swift */,
D0C2A38D366CE8560BCBAC8B /* PDFPositionsService.swift */,
);
path = Services;
Expand Down Expand Up @@ -1247,6 +1252,14 @@
path = Toolkit;
sourceTree = "<group>";
};
402E67E11F98508D372AC2BA /* Table Of Contents */ = {
isa = PBXGroup;
children = (
C5BCDE636CED5B883CC5F2B4 /* TableOfContentsService.swift */,
);
path = "Table Of Contents";
sourceTree = "<group>";
};
40D18A37080F5B1D114CE2E1 /* Extensions */ = {
isa = PBXGroup;
children = (
Expand Down Expand Up @@ -1288,6 +1301,7 @@
3118D7E15D685347720A0651 /* Locator */,
5BC52D8F4F854FDA56D10A8E /* Positions */,
F818D082B369A3D4BE617D46 /* Search */,
402E67E11F98508D372AC2BA /* Table Of Contents */,
);
path = Services;
sourceTree = "<group>";
Expand Down Expand Up @@ -2391,6 +2405,7 @@
914DEDFE5594761D3F180491 /* EPUBPositionsService.swift in Sources */,
EF15E9163EBC82672B22F6E0 /* ImageParser.swift in Sources */,
FCFFE5305127D9FC72549EAA /* LCPDFPositionsService.swift in Sources */,
0BFCDAEC82CFF09AFC53A5D0 /* LCPDFTableOfContentsService.swift in Sources */,
C1A94B2A9C446CB03650DC47 /* NCXParser.swift in Sources */,
01AD628D6DE82E1C1C4C281D /* NavigationDocumentParser.swift in Sources */,
2B8BC06B6B366E67C716DDA1 /* OPFMeta.swift in Sources */,
Expand Down Expand Up @@ -2667,6 +2682,7 @@
4DB4C10CB9AB5D38C56C1609 /* StringEncoding.swift in Sources */,
E6AC10CCF9711168BE2BE85C /* StringSearchService.swift in Sources */,
3E9F244ACDA938D330B9EAEA /* Subject.swift in Sources */,
CB95F5EAA4D0DB5177FED4F7 /* TableOfContentsService.swift in Sources */,
96048047B4205636ABB66DC9 /* TextTokenizer.swift in Sources */,
40A44414CC911BF49BB5EE60 /* Tokenizer.swift in Sources */,
035807359AFA2EE23E00F8AB /* TransformingContainer.swift in Sources */,
Expand Down
31 changes: 19 additions & 12 deletions TestApp/Sources/Reader/Common/Outline/OutlineTableView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,28 @@ struct OutlineTableView: View {
@State private var selectedSection: OutlineSection = .tableOfContents

// Outlines (list of links) to display for each section.
private var outlines: [OutlineSection: [(level: Int, link: ReadiumShared.Link)]] = [:]
@State private var outlines: [OutlineSection: [(level: Int, link: ReadiumShared.Link)]] = [:]

init(publication: Publication, bookId: Book.Id, bookmarkRepository: BookmarkRepository, highlightRepository: HighlightRepository) {
self.publication = publication
bookmarksModel = BookmarksViewModel(bookId: bookId, repository: bookmarkRepository)
highlightsModel = HighlightsViewModel(bookId: bookId, repository: highlightRepository)

func flatten(_ links: [ReadiumShared.Link], level: Int = 0) -> [(level: Int, link: ReadiumShared.Link)] {
links.flatMap { [(level, $0)] + flatten($0.children, level: level + 1) }
}

outlines = [
.tableOfContents: flatten(
!publication.tableOfContents.isEmpty
? publication.tableOfContents
: publication.readingOrder
),
.tableOfContents: [],
.landmarks: flatten(publication.landmarks),
.pageList: flatten(publication.pageList),
]
}

private func loadTableOfContents() async {
guard let toc = try? await publication.tableOfContents().get() else {
return
}

outlines[.tableOfContents] = flatten(!toc.isEmpty ? toc : publication.readingOrder)
}

var body: some View {
VStack {
OutlineTablePicker(selectedSection: $selectedSection)
Expand All @@ -67,8 +67,6 @@ struct OutlineTableView: View {
}
}
}
} else {
preconditionFailure("Outline \(selectedSection) can't be nil!")
}

case .bookmarks:
Expand All @@ -93,6 +91,11 @@ struct OutlineTableView: View {
}
}
.frame(maxWidth: .infinity, maxHeight: .infinity, alignment: .top)
.onAppear {
Task {
await loadTableOfContents()
}
}
}

private let locatorSubject = PassthroughSubject<Locator, Never>()
Expand Down Expand Up @@ -123,3 +126,7 @@ enum OutlineTableViewConstants {
static let tabLandmarks = NSLocalizedString("reader_outline_tab_landmarks", comment: "Outline landmarks tab name")
static let tabHighlights = NSLocalizedString("reader_outline_tab_highlights", comment: "Outline highlights tab name")
}

private func flatten(_ links: [ReadiumShared.Link], level: Int = 0) -> [(level: Int, link: ReadiumShared.Link)] {
links.flatMap { [(level, $0)] + flatten($0.children, level: level + 1) }
}

0 comments on commit 60aa620

Please sign in to comment.