Source code for pdfnaut.cos.objects.stream
from dataclasses import dataclass, field
from typing import Any, cast
from typing_extensions import Self
from ...exceptions import PdfFilterError
from ...filters import SUPPORTED_FILTERS
from .base import PdfName, PdfNull, PdfObject
from .containers import PdfArray, PdfDictionary
[docs]
@dataclass
class PdfStream:
"""A sequence of bytes that may be of unlimited length. Objects with a large
amount of data like images or fonts are usually represented by streams
(see ISO 32000-2:2020 § 7.3.8 "Stream objects")."""
details: PdfDictionary[str, PdfObject]
"""The stream extent dictionary as described in ISO 32000-2:2020 § 7.3.8.2 "Stream extent"."""
raw: bytes = field(repr=False)
"""The raw data in the stream."""
_crypt_params: PdfDictionary[str, Any] = field(default_factory=PdfDictionary, repr=False)
"""Parameters specific to the Crypt filter."""
[docs]
def decode(self) -> bytes:
"""Returns the decoded contents of the stream. If no filter is defined,
it returns the original contents.
Raises :class:`.pdfnaut.exceptions.PdfFilterError` if a filter used is unsupported.
"""
filters = cast("PdfName | PdfArray[PdfName] | None", self.details.get("Filter"))
decode_params = cast(
"PdfDictionary | PdfArray[PdfDictionary]", self.details.get("DecodeParms")
)
if filters is None:
return self.raw
if isinstance(filters, PdfName):
filters = PdfArray([filters])
if not isinstance(decode_params, PdfArray):
decode_params = PdfArray([decode_params])
output = self.raw
for filt, params in zip(filters, decode_params):
if filt.value not in SUPPORTED_FILTERS:
raise PdfFilterError(f"{filt.value.decode()}: Filter is unsupported.")
if isinstance(params, PdfNull) or params is None:
params = PdfDictionary()
if filt.value == b"Crypt" and self._crypt_params.get("Handler"):
params.update(self._crypt_params)
output = SUPPORTED_FILTERS[filt.value]().decode(self.raw, params=params)
return output
[docs]
@classmethod
def create(
cls,
raw: bytes,
details: PdfDictionary | None = None,
crypt_params: PdfDictionary | None = None,
) -> Self:
"""Creates a stream from unencoded data ``raw`` applying the filter(s) specified in
``details``. The length of the encoded output will automatically be appended
to ``details``.
Raises :class:`.pdfnaut.exceptions.PdfFilterError` if a filter used is unsupported.
"""
if details is None:
details = PdfDictionary()
filters = cast("PdfName | PdfArray[PdfName] | None", details.get("Filter"))
params = cast("PdfDictionary | PdfArray[PdfDictionary]", details.get("DecodeParms"))
if filters is None:
details["Length"] = len(raw)
return cls(details, raw)
if crypt_params is None:
crypt_params = PdfDictionary()
if isinstance(filters, PdfName):
filters = PdfArray([filters])
if not isinstance(params, PdfArray):
params = PdfArray([params])
# Filters are applied from last to first
for filt, params in zip(reversed(filters), reversed(params)):
if filt.value not in SUPPORTED_FILTERS:
raise PdfFilterError(f"{filt.value.decode()}: Filter is unsupported.")
if isinstance(params, PdfNull) or params is None:
params = PdfDictionary()
if filt.value == b"Crypt" and crypt_params.get("Handler"):
params.update(crypt_params)
raw = SUPPORTED_FILTERS[filt.value]().encode(raw, params=params)
details["Length"] = len(raw)
return cls(details, raw, crypt_params)
[docs]
def modify(self, raw: bytes) -> None:
"""Modifies this stream in place by encoding the ``raw`` data according to
the parameters specified in the stream's extent."""
filters = cast("PdfName | PdfArray[PdfName] | None", self.details.get("Filter"))
params = cast("PdfDictionary | PdfArray[PdfDictionary]", self.details.get("DecodeParms"))
if filters is None:
self.raw = raw
self.details["Length"] = len(self.raw)
return
if isinstance(filters, PdfName):
filters = PdfArray([filters])
if not isinstance(params, PdfArray):
params = PdfArray([params])
# Filters are applied from last to first
for filt, params in zip(reversed(filters), reversed(params)):
if filt.value not in SUPPORTED_FILTERS:
raise PdfFilterError(f"{filt.value.decode()}: Filter is unsupported.")
if isinstance(params, PdfNull) or params is None:
params = PdfDictionary()
if filt.value == b"Crypt" and self._crypt_params.get("Handler"):
params.update(self._crypt_params)
raw = SUPPORTED_FILTERS[filt.value]().encode(raw, params=params)
self.raw = raw
self.details["Length"] = len(self.raw)
def __hash__(self) -> int:
return hash((self.__class__, hash(self.details), self.raw, self._crypt_params))