Source code for pdfnaut.objects.page

from typing import Literal, cast

from typing_extensions import Self

from ..common.dictmodels import dictmodel, field
from ..cos.helpers import is_null_like
from ..cos.objects.base import PdfName, PdfReference
from ..cos.objects.containers import PdfArray, PdfDictionary
from ..cos.objects.stream import PdfStream
from ..cos.parser import PdfParser
from ..cos.tokenizer import ContentStreamTokenizer
from .annotations import AnnotationList

TabOrder = Literal["R", "C", "S", "A", "W"]


[docs] @dictmodel(init=False) class Page(PdfDictionary): """A page in a PDF document (see ISO 32000-2:2020 ยง 7.7.3.3 "Page objects"). Arguments: size (tuple[float, float]): The width and height of the physical medium in which the page should be printed or displayed. Values shall be provided in multiples of 1/72 of an inch (points). pdf (PdfParser, optional): The PDF document that this page belongs to. In typical usage, this value need not be specified. pdfnaut will take care of populating it. indirect_ref (PdfReference, optional): The indirect reference that this page object is referred to by. As with ``pdf``, this value need not be specified in typical usage. """ mediabox: list[float] = field("MediaBox", encoder=PdfArray, decoder=list) """A rectangle defining the boundaries of the physical medium in which the page should be printed or displayed.""" cropbox: list[float] | None = field("CropBox", default=None, encoder=PdfArray, decoder=list) """A rectangle defining the visible region of the page. If none, the cropbox is the same as the mediabox. """ bleedbox: list[float] | None = field("BleedBox", default=None, encoder=PdfArray, decoder=list) """A rectangle defining the region to which the contents of the page shall be clipped when output in a production environment. If none, the bleedbox is the same as the cropbox. """ trimbox: list[float] | None = field("TrimBox", default=None, encoder=PdfArray, decoder=list) """A rectangle defining the intended dimensions of the finished page after trimming. If none, the trimbox is the same as the cropbox. """ artbox: list[float] | None = field("ArtBox", default=None, encoder=PdfArray, decoder=list) """A rectangle defining the extent of the page's meaningful content as intended by the page's creator. If none, the artbox is the same as the cropbox. """ resources: PdfDictionary | None = None """Resources required by the page contents. If the page requires no resources, this should return an empty resource dictionary. If the page inherits its resources from an ancestor, this should return None. """ tab_order: TabOrder | None = field("Tabs", default=None) """(optional; PDF 1.5) The tab order to be used for annotations on the page. If present, it shall be one of the following values: - R: Row order - C: Column order - S: Logical structure order - A: Annotations array order (PDF 2.0) - W: Widget order (PDF 2.0) """ user_unit: float = 1 """The size of a user space unit, in multiples of 1/72 of an inch (by default, 1).""" rotation: int = field("Rotate", default=0) """The number of degrees by which the page shall be visually rotated clockwise. The value is a multiple of 90 (by default, 0).""" metadata: PdfStream | None = None """A metadata stream, generally written in XMP, containing information about this page.""" @classmethod def from_dict( cls, mapping: PdfDictionary, pdf: PdfParser | None = None, indirect_ref: PdfReference | None = None, ) -> Self: dictionary = cls(size=(0, 0), pdf=pdf, indirect_ref=indirect_ref) dictionary.data = mapping.data return dictionary
[docs] def __init__( self, size: tuple[float, float], *, pdf: PdfParser | None = None, indirect_ref: PdfReference | None = None, ) -> None: super().__init__() self.pdf = pdf self.indirect_ref = indirect_ref self["Type"] = PdfName(b"Page") self["MediaBox"] = PdfArray([0, 0, *size])
def __repr__(self) -> str: return f"<{self.__class__.__name__} mediabox={self.mediabox!r} rotation={self.rotation!r}>" @property def content_stream(self) -> ContentStreamTokenizer | None: """An iterator over the instructions producing the contents of this page.""" contents = self.get("Contents") if is_null_like(contents): return contents = cast("PdfStream | PdfArray[PdfStream]", contents) if isinstance(contents, PdfArray): # when Contents is an array, it shall be concatenated into a single # content stream with at least one whitespace character in between. return ContentStreamTokenizer(b"\n".join(stm.decode() for stm in contents)) return ContentStreamTokenizer(contents.decode()) @property def annotations(self) -> AnnotationList | None: """All annotations associated with this page. If a page does not specify a list of annotations, this field is none.""" annots = self.get("Annots") if is_null_like(annots): return annots = cast(PdfArray, annots) return AnnotationList(annots, pdf=self.pdf)
[docs] def new_annotations(self) -> None: """Creates a new annotation list.""" self["Annots"] = PdfArray()