Source code for pdfnaut.cos.objects.stream

from dataclasses import dataclass, field
from typing import Any, cast

from typing_extensions import Self

from ...exceptions import PdfFilterError
from ...filters import SUPPORTED_FILTERS
from .base import PdfName, PdfNull, PdfObject
from .containers import PdfArray, PdfDictionary


[docs] @dataclass class PdfStream: """A sequence of bytes that may be of unlimited length. Objects with a large amount of data like images or fonts are usually represented by streams (see ISO 32000-2:2020 § 7.3.8 "Stream objects").""" details: PdfDictionary[str, PdfObject] """The stream extent dictionary as described in ISO 32000-2:2020 § 7.3.8.2 "Stream extent".""" raw: bytes = field(repr=False) """The raw data in the stream.""" _crypt_params: PdfDictionary[str, Any] = field(default_factory=PdfDictionary, repr=False) """Parameters specific to the Crypt filter."""
[docs] def decode(self) -> bytes: """Returns the decoded contents of the stream. If no filter is defined, it returns the original contents. Raises :class:`.pdfnaut.exceptions.PdfFilterError` if a filter used is unsupported. """ filters = cast("PdfName | PdfArray[PdfName] | None", self.details.get("Filter")) decode_params = cast( "PdfDictionary | PdfArray[PdfDictionary]", self.details.get("DecodeParms") ) if filters is None: return self.raw if isinstance(filters, PdfName): filters = PdfArray([filters]) if not isinstance(decode_params, PdfArray): decode_params = PdfArray([decode_params]) output = self.raw for filt, params in zip(filters, decode_params): if filt.value not in SUPPORTED_FILTERS: raise PdfFilterError(f"{filt.value.decode()}: Filter is unsupported.") if isinstance(params, PdfNull) or params is None: params = PdfDictionary() if filt.value == b"Crypt" and self._crypt_params.get("Handler"): params.update(self._crypt_params) output = SUPPORTED_FILTERS[filt.value]().decode(self.raw, params=params) return output
[docs] @classmethod def create( cls, raw: bytes, details: PdfDictionary | None = None, crypt_params: PdfDictionary | None = None, ) -> Self: """Creates a stream from unencoded data ``raw`` applying the filter(s) specified in ``details``. The length of the encoded output will automatically be appended to ``details``. Raises :class:`.pdfnaut.exceptions.PdfFilterError` if a filter used is unsupported. """ if details is None: details = PdfDictionary() filters = cast("PdfName | PdfArray[PdfName] | None", details.get("Filter")) params = cast("PdfDictionary | PdfArray[PdfDictionary]", details.get("DecodeParms")) if filters is None: details["Length"] = len(raw) return cls(details, raw) if crypt_params is None: crypt_params = PdfDictionary() if isinstance(filters, PdfName): filters = PdfArray([filters]) if not isinstance(params, PdfArray): params = PdfArray([params]) # Filters are applied from last to first for filt, params in zip(reversed(filters), reversed(params)): if filt.value not in SUPPORTED_FILTERS: raise PdfFilterError(f"{filt.value.decode()}: Filter is unsupported.") if isinstance(params, PdfNull) or params is None: params = PdfDictionary() if filt.value == b"Crypt" and crypt_params.get("Handler"): params.update(crypt_params) raw = SUPPORTED_FILTERS[filt.value]().encode(raw, params=params) details["Length"] = len(raw) return cls(details, raw, crypt_params)
[docs] def modify(self, raw: bytes) -> None: """Modifies this stream in place by encoding the ``raw`` data according to the parameters specified in the stream's extent.""" filters = cast("PdfName | PdfArray[PdfName] | None", self.details.get("Filter")) params = cast("PdfDictionary | PdfArray[PdfDictionary]", self.details.get("DecodeParms")) if filters is None: self.raw = raw self.details["Length"] = len(self.raw) return if isinstance(filters, PdfName): filters = PdfArray([filters]) if not isinstance(params, PdfArray): params = PdfArray([params]) # Filters are applied from last to first for filt, params in zip(reversed(filters), reversed(params)): if filt.value not in SUPPORTED_FILTERS: raise PdfFilterError(f"{filt.value.decode()}: Filter is unsupported.") if isinstance(params, PdfNull) or params is None: params = PdfDictionary() if filt.value == b"Crypt" and self._crypt_params.get("Handler"): params.update(self._crypt_params) raw = SUPPORTED_FILTERS[filt.value]().encode(raw, params=params) self.raw = raw self.details["Length"] = len(self.raw)
def __hash__(self) -> int: return hash((self.__class__, hash(self.details), self.raw, self._crypt_params))