PlatSim_Genova/TestEnvironment/env/site-packages/fpdf/linearization.py
2026-01-30 16:38:33 +01:00

291 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# pylint: disable=fixme,protected-access
"""
This module is in work-in-progress state.
Hint tables / hint streams have not been implemented yet,
and there are a few "TODO" comment remaining.
cf. https://github.com/py-pdf/fpdf2/issues/62
"""
from .output import ContentWithoutID, OutputProducer, PDFHeader
from .sign import sign_content
from .syntax import PDFArray, PDFContentStream, PDFObject
from .syntax import iobj_ref as pdf_ref
from .util import buffer_subst
try:
from endesive import signer
except ImportError:
signer = None
HINT_STREAM_OFFSET_LENGTH_PLACEHOLDER = "0%1%2%3%4%5%6%7%8%9%a%b%c%d"
FIRST_PAGE_END_OFFSET_PLACEHOLDER = "1%2%3%4%5%6%"
MAIN_XREF_1ST_ENTRY_OFFSET_PLACEHOLDER = "2%3%4%5%6%7%"
FILE_LENGTH_PLACEHOLDER = "3%4%5%6%7%8%"
class PDFLinearization(PDFObject):
def __init__(self, pages_count):
super().__init__()
self.linearized = "1" # Version
self.n = pages_count
# Primary hint stream offset and length (part 5):
self.h = HINT_STREAM_OFFSET_LENGTH_PLACEHOLDER
self.o = None # Object number of first pages page object (part 6)
self.e = FIRST_PAGE_END_OFFSET_PLACEHOLDER # Offset of end of first page
# Offset of first entry in main cross-reference table (part 11):
self.t = MAIN_XREF_1ST_ENTRY_OFFSET_PLACEHOLDER
self.l = FILE_LENGTH_PLACEHOLDER # The length of the entire file in bytes
class PDFXrefAndTrailer(ContentWithoutID):
PREV_MAIN_XREF_START_PLACEHOLDER = "0%1*2+3-2/1^"
def __init__(self, output_builder):
self.output_builder = output_builder
self.count = output_builder.obj_id + 1
self.start_obj_id = 1
# Must be set before the call to serialize():
self.catalog_obj = None
self.info_obj = None
self.first_xref = None
self.main_xref = None
# Computed at serialize() time based on output_builder.buffer size:
self.startxref = None
@property
def is_first_xref(self):
return bool(self.main_xref)
@property
def is_main_xref(self):
return bool(self.first_xref)
def serialize(self, _security_handler=None):
builder = self.output_builder
out = []
self.startxref = str(len(builder.buffer))
if self.is_main_xref:
builder.buffer = buffer_subst(
builder.buffer,
self.PREV_MAIN_XREF_START_PLACEHOLDER,
self.startxref.rjust(12, " "),
)
out.append("xref")
out.append(f"{0 if self.start_obj_id == 1 else self.start_obj_id} {self.count}")
if not self.is_first_xref:
out.append("0000000000 65535 f ")
assert (
len(builder.offsets) > 1
), "TODO: how to know the offsets in the 1st xref at this stage?"
for obj_id in range(self.start_obj_id, self.start_obj_id + self.count):
out.append(f"{builder.offsets[obj_id]:010} 00000 n ")
out.append("trailer")
out.append("<<")
if self.is_main_xref:
out.append(f"/Size {self.count - self.first_xref.count}")
else:
if self.is_first_xref:
out.append(f"/Size {self.main_xref.count}")
out.append(f"/Prev {self.PREV_MAIN_XREF_START_PLACEHOLDER}")
else:
out.append(f"/Size {self.count}")
out.append(f"/Root {pdf_ref(self.catalog_obj.id)}")
out.append(f"/Info {pdf_ref(self.info_obj.id)}")
fpdf = builder.fpdf
file_id = fpdf.file_id()
if file_id == -1:
file_id = fpdf._default_file_id(builder.buffer)
if file_id:
out.append(f"/ID [{file_id}]")
out.append(">>")
out.append("startxref")
startxref = self.startxref
if self.is_main_xref:
startxref = self.first_xref.startxref
if self.is_first_xref:
startxref = "0"
out.append(startxref)
out.append("%%EOF")
return "\n".join(out)
class PDFHintStream(PDFContentStream):
def __init__(self, contents, compress=False):
super().__init__(contents=contents, compress=compress)
self.s = None # (Required) Shared object hint table
self.t = None # (Present only if thumbnail images exist) Thumbnail hint table
self.o = None # (Present only if a document outline exists) Outline hint table
self.a = None # (Present only if article threads exist) Thread information hint table
self.e = None # (Present only if named destinations exist) Named destination hint table
self.v = None # (Present only if an interactive form dictionary exists) Interactive form hint table
self.i = None # (Present only if a document information dictionary exists) Information dictionary hint table
self.c = None # (Present only if a logical structure hierarchy exists; PDF 1.3) Logical structure hint table
self.l = None # (PDF 1.3) Page label hint table
self.r = None # (Present only if a renditions name tree exists; PDF 1.5) Renditions name tree hint table
self.b = None # (Present only if embedded file streams exist; PDF 1.5) Embedded file stream hint table
class LinearizedOutputProducer(OutputProducer):
def bufferize(self):
fpdf = self.fpdf
# 1. Setup - Insert all PDF objects
# (in the order required to build a linearized PDF),
# and assign unique consecutive numeric IDs to all of them
# Part 1: Header
self.pdf_objs.append(PDFHeader(fpdf.pdf_version))
# Part 2: Linearization parameter dictionary
linearization_obj = PDFLinearization(fpdf.pages_count)
self._add_pdf_obj(linearization_obj)
# Part 3: First-page cross-reference table and trailer
first_xref = PDFXrefAndTrailer(self)
self.pdf_objs.append(first_xref)
# Part 4: Document catalogue and other required document-level objects
catalog_obj = self._add_catalog()
# Part 5: Primary hint stream (may precede or follow part 6)
hint_stream_obj = PDFHintStream("") # TODO
self.pdf_objs.append(hint_stream_obj)
# Part 6: First-page section (may precede or follow part 5)
page_objs = self._add_pages(slice(0, 1))
# The following objects shall be contained in the first-page section:
# + This page object shall explicitly specify all required attributes, e.g. Resources, MediaBox
# + The entire outline hierarchy, if the PageMode entry in the catalogue is UseOutlines
# + All objects that the page object refers to [including] Contents, Resources, Annots
# TODO
first_xref.count = self.obj_id + 1
first_xref_pdf_objs = list(self.pdf_objs)
self.obj_id = 0
# Part 7: Remaining pages
page_objs.extend(self._add_pages(slice(1, None)))
# Part 8: Shared objects for all pages except the first
# = resources, that are referenced from more than one page but [not] from the first page
pages_root_obj = self._add_pages_root()
sig_annotation_obj = self._add_annotations_as_objects()
font_objs_per_index = self._add_fonts()
img_objs_per_index = self._add_images()
gfxstate_objs_per_name = self._add_gfxstates()
resources_dict_obj = self._add_resources_dict(
font_objs_per_index, img_objs_per_index, gfxstate_objs_per_name
)
# Part 9: Objects not associated with pages, if any
for embedded_file in fpdf.embedded_files:
self._add_pdf_obj(embedded_file, "embedded_files")
struct_tree_root_obj = self._add_structure_tree()
outline_dict_obj, outline_items = self._add_document_outline()
xmp_metadata_obj = self._add_xmp_metadata()
info_obj = self._add_info()
# Part 11: Main cross-reference table and trailer
main_xref = PDFXrefAndTrailer(self)
self.pdf_objs.append(main_xref)
# Re-assigning IDs of all PDF objects in the 1st xref table:
first_xref.start_obj_id = self.obj_id + 1
for pdf_obj in first_xref_pdf_objs:
if (
not isinstance(pdf_obj, ContentWithoutID)
and pdf_obj is not hint_stream_obj
):
self.obj_id += 1
pdf_obj.obj_id = self.obj_id
# The hint streams shall be assigned the last object numbers in the file:
self.obj_id += 1
hint_stream_obj.id = self.obj_id
# 2. Plumbing - Inject all PDF object references required:
linearization_obj.o = page_objs[0].id
pages_root_obj.kids = PDFArray(page_objs)
self._finalize_catalog(
catalog_obj,
pages_root_obj=pages_root_obj,
first_page_obj=page_objs[0],
sig_annotation_obj=sig_annotation_obj,
xmp_metadata_obj=xmp_metadata_obj,
struct_tree_root_obj=struct_tree_root_obj,
outline_dict_obj=outline_dict_obj,
)
dests = []
for page_obj in page_objs:
page_obj.parent = pages_root_obj
page_obj.resources = resources_dict_obj
for annot in page_obj.annots:
if annot.dest:
dests.append(annot.dest)
if annot.a and hasattr(annot.a, "dest"):
dests.append(annot.a.dest)
if not page_obj.annots:
# Avoid serializing an empty PDFArray:
page_obj.annots = None
for outline_item in outline_items:
dests.append(outline_item.dest)
# Assigning the .page_ref property of all Destination objects:
for dest in dests:
dest.page_ref = pdf_ref(page_objs[dest.page_number - 1].id)
for struct_elem in fpdf.struct_builder.doc_struct_elem.k:
struct_elem.pg = page_objs[struct_elem.page_number() - 1]
main_xref.first_xref = first_xref
first_xref.main_xref = main_xref
for xref in [main_xref, first_xref]:
xref.catalog_obj = catalog_obj
xref.info_obj = info_obj
# 3. Serializing - Append all PDF objects to the buffer:
assert (
not self.buffer
), f"Nothing should have been appended to the .buffer at this stage: {self.buffer}"
assert (
not self.offsets
), f"No offset should have been set at this stage: {len(self.offsets)}"
for pdf_obj in self.pdf_objs:
if isinstance(pdf_obj, ContentWithoutID):
# top header, xref table & trailer:
trace_label = None
else:
self.offsets[pdf_obj.id] = len(self.buffer)
trace_label = self.trace_labels_per_obj_id.get(pdf_obj.id)
if trace_label:
with self._trace_size(trace_label):
self._out(pdf_obj.serialize())
else:
self._out(pdf_obj.serialize())
self._log_final_sections_sizes()
# Now that the file size & all the offsets are known,
# substitute the values of the Linearization properties:
hs1_offset = self.offsets[hint_stream_obj.id]
hs1_length = len(hint_stream_obj.serialize())
self.buffer = buffer_subst(
self.buffer,
HINT_STREAM_OFFSET_LENGTH_PLACEHOLDER,
f"[{hs1_offset: 12d} {hs1_length: 12d}]",
)
self.buffer = buffer_subst(
self.buffer,
FIRST_PAGE_END_OFFSET_PLACEHOLDER,
f"{self.offsets[page_objs[0].id + 1]: 12d}",
)
self.buffer = buffer_subst(
self.buffer,
MAIN_XREF_1ST_ENTRY_OFFSET_PLACEHOLDER,
f"{self.offsets[main_xref.start_obj_id]: 12d}",
)
self.buffer = buffer_subst(
self.buffer,
FILE_LENGTH_PLACEHOLDER,
f"{len(self.buffer): 12d}",
)
if fpdf._sign_key:
self.buffer = sign_content(
signer,
self.buffer,
fpdf._sign_key,
fpdf._sign_cert,
fpdf._sign_extra_certs,
fpdf._sign_hashalgo,
fpdf._sign_time,
)
return self.buffer