Source code for pdfnaut.security.standard_handler

from hashlib import md5
from typing import Literal

from ..cos.helpers import into_bytes, is_null_like
from ..cos.objects import PdfDictionary, PdfHexString, PdfName, PdfReference, PdfStream
from ..exceptions import MissingCryptProviderError
from .providers import CRYPT_PROVIDERS, CryptProvider

CryptMethod = Literal["Identity", "ARC4", "AESV2"]
Encryptable = PdfStream | PdfHexString | bytes

PASSWORD_PADDING = b"(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz"


[docs] def pad_password(password: bytes) -> bytes: """Pads or truncates the input ``password`` to exactly 32 bytes. - If ``password`` is longer than 32 bytes, it shall be truncated. - If ``password`` is shorter than 32 bytes, it shall be padded by appending data \ from :const:`.PASSWORD_PADDING` as needed. """ return password[:32] + PASSWORD_PADDING[: 32 - len(password)]
[docs] class StandardSecurityHandler: """An implementation of ISO 32000-2:2020 § 7.6.4 "Standard security handler". The standard security handler includes access permissions and allows up to 2 passwords: the owner password, which has all permissions, and the user password, which should only have the permissions specified by the document. """
[docs] def __init__(self, encryption: PdfDictionary, ids: list[PdfHexString | bytes]) -> None: """ Arguments: encryption (PdfDictionary): The standard encryption dictionary specified in the document's trailer (see ISO 32000-2:2020 § 7.6.4 "Standard encryption dictionary" for details). ids (PdfArray[PdfHexString | bytes]): The ID array specified in the document's trailer. """ self.encryption = encryption self.ids = ids
@property def key_length(self) -> int: """The length of the encryption key in bytes.""" length = self.encryption.get("Length") if is_null_like(length): length = 40 return length // 8
[docs] def compute_encryption_key(self, password: bytes) -> bytes: """Computes an encryption key from ``password`` according to ISO 32000-2:2020 § 7.6.4.3.2 "Algorithm 2: Computing a file encryption key in order to encrypt a document (revision 4 and earlier)".""" # a) Pad or truncate the password string to exactly 32 bytes. padded_password = pad_password(password) # b) Initialize the MD5 hash function with the padded string. psw_hash = md5(padded_password) # c) Pass the value of the O entry in the Encrypt dictionary. psw_hash.update(into_bytes(self.encryption["O"])) # d) Pass the value of the P entry as a 32-bit unsigned integer. # P may be negative, so it's wrapped into unsigned beforehand. perms = (self.encryption["P"] + 2**32) % 2**32 psw_hash.update(perms.to_bytes(4, "little")) # e) Pass the first element of the file identifier array. psw_hash.update(into_bytes(self.ids[0])) # f) If the handler is revision 4 or greater, and the metadata is not being # encrypted, pass 4 bytes to the hash function. encrypt_metadata = self.encryption.get("EncryptMetadata") if is_null_like(encrypt_metadata): encrypt_metadata = True if self.encryption["R"] >= 4 and not encrypt_metadata: psw_hash.update(b"\xff\xff\xff\xff") # g) Finish the hash. # h) If the handler is revision 3 or greater, for 50 times, take the output from # the previous MD5 hash and pass the first "key length" bytes of the output # as input to a new MD5 hash. if self.encryption["R"] >= 3: for _ in range(50): psw_hash = md5(psw_hash.digest()[: self.key_length]) # i) Truncate the final hash to "key length" bytes and return. return psw_hash.digest()[: self.key_length]
[docs] def compute_owner_password(self, owner_password: bytes, user_password: bytes) -> bytes: """Computes the O (``owner_password``) value in the Encrypt dictionary according to ISO 32000-2:2020 § 7.6.4.4.2 "Algorithm 3: Computing the encryption dictionary's O-entry value (revision 4 and earlier)". As a fallback in case there is no owner password, a ``user_password`` must also be specified. """ # a) Pad or truncate the password string to exactly 32 bytes. The password string # is the owner password, or in case there is none, the user password. padded = pad_password(owner_password or user_password) # b) Initialize the MD5 hash function with the result as input. owner_digest = md5(padded).digest() # c) If the handler is revision 3 or greater, for 50 times, pass the result # of the output digest as the input of a new MD5 hash. if self.encryption["R"] >= 3: for _ in range(50): owner_digest = md5(owner_digest).digest() # d) Create the RC4 file encryption key by truncating the result to "key length". owner_cipher = owner_digest[: self.key_length] # e) Pad or truncate the user password string. padded_user_psw = pad_password(user_password) # f) Encrypt the result of (e) using ARC4 with the key generated in (d) arc4 = self._get_provider("ARC4") owner_crypt = arc4(owner_cipher).encrypt(padded_user_psw) # g) If the handler is revision 3 or greater, for 19 times, take the output from # the previous invocation of the ARC4 function and pass it as input to a new # invocation; use a file encryption key generated by taking each byte of the # encryption key obtained in step (d) and performing an XOR operation between # that byte and the single-byte value of the iteration counter. if self.encryption["R"] >= 3: for i in range(1, 20): owner_crypt = arc4(bytearray(b ^ i for b in owner_cipher)).encrypt(owner_crypt) # h) Return the resulting owner password. return owner_crypt
[docs] def compute_user_password(self, password: bytes) -> bytes: """Computes the U (user password) value in the Encrypt dictionary according to the algorithms for revision 2 (Algorithm 4 in ISO 32000-2:2020 § 7.6.4.4.3) and revisions 3 and 4 (Algorithm 5 in ISO 32000-2:2020 § 7.6.4.4.4). """ arc4 = self._get_provider("ARC4") # a) Create a file encryption key based on the user password. # This applies for both algorithms. encr_key = self.compute_encryption_key(password) if self.encryption["R"] == 2: # b) Encrypt the 32 byte padding string with RC4 using the key from step (a) padding_crypt = arc4(encr_key).encrypt(PASSWORD_PADDING) # c) We are done! return padding_crypt else: # b) Initialize the MD5 hash function with the 32-byte padding string. # c) Pass the first element of the file identifier array and finish. padded_id_hash = md5(PASSWORD_PADDING + into_bytes(self.ids[0])) # d) Encrypt the digest from (c) using ARC4 with the key from (a) user_cipher = arc4(encr_key).encrypt(padded_id_hash.digest()) # e) Same process as step (g) from 7.6.4.4.2, but with the user password instead. for i in range(1, 20): user_cipher = arc4(bytearray(b ^ i for b in encr_key)).encrypt(user_cipher) # f) Pad the string and return. return pad_password(user_cipher)
[docs] def authenticate_user_password(self, password: bytes) -> tuple[bytes, bool]: """Authenticates the provided user ``password`` according to ISO 32000-2:2020 § 7.6.4.4.5 "Algorithm 6: Authenticating the user password (Security handlers of revision 4 and earlier)". Returns a tuple of two values: the encryption key that should decrypt the document and whether authentication was successful. """ arc4 = self._get_provider("ARC4") # a) Perform everything but the last step from Algorithms 4 and 5. # Algorithms 4 and 5, step (a) encryption_key = self.compute_encryption_key(password) stored_password = into_bytes(self.encryption["U"]) if self.encryption["R"] == 2: # Algorithm 4, step (b) user_cipher = arc4(encryption_key).encrypt(PASSWORD_PADDING) # b) If the result of step (a) is equal to the value of the encryption # dictionary's U entry, the password supplied is the correct user # password and the file encryption key from (a) shall be used to # decrypt the document. return (encryption_key, True) if stored_password == user_cipher else (b"", False) else: # Algorithm 5, steps (b) and (c) padded_id_hash = md5(PASSWORD_PADDING + into_bytes(self.ids[0])) # Algorithm 5, step (d) user_cipher = arc4(encryption_key).encrypt(padded_id_hash.digest()) # Algorithm 5, step (e) for i in range(1, 20): user_cipher = arc4(bytearray(b ^ i for b in encryption_key)).encrypt(user_cipher) # b) For the comparison, both values -- the stored password and the # computed one -- shall be truncated to 16 bytes. return ( (encryption_key, True) if stored_password[:16] == user_cipher[:16] else (b"", False) )
[docs] def authenticate_owner_password(self, password: bytes) -> tuple[bytes, bool]: """Authenticates the provided owner ``password`` (or user ``password`` if none) according to ISO 32000-2:2020 § 7.6.4.4.6 "Algorithm 7: Authenticating the owner password (Security handlers of revision 4 and earlier)". Returns a tuple of two values: the encryption key that should decrypt the document and whether authentication was successful. """ # a) Perform steps (a) to (d) from Algorithm 3 to compute a file encryption key # from the supplied password string. padded_password = pad_password(password) digest = md5(padded_password).digest() if self.encryption["R"] >= 3: for _ in range(50): digest = md5(digest).digest() cipher_key = digest[: self.key_length] user_cipher = into_bytes(self.encryption["O"]) arc4 = self._get_provider("ARC4") if self.encryption["R"] == 2: # b) If the handler is revision 2, decrypt the O value from the encryption # dictionary using the computed encryption key as the key. user_cipher = arc4(cipher_key).decrypt(user_cipher) else: # b) If the handler is revision 3 or greater, for 20 times, decrypt the # encryption dictionary's O entry (first iteration) or the output from the # previous iteration (subsequent iterations), using an ARC4 function with a # key generated by taking the original key from step (a) and performing an # XOR between each byte of the key and the single byte value of the # iteration counter (from 19 to 0). for i in range(19, -1, -1): user_cipher = arc4(bytearray(b ^ i for b in cipher_key)).encrypt(user_cipher) # c) The result of step (b) is presumably the user password. If authentication of # the user password succeeds, the supplied password is the owner password. return self.authenticate_user_password(user_cipher)
[docs] def compute_object_crypt( self, encryption_key: bytes, contents: Encryptable, reference: PdfReference, *, crypt_filter: PdfDictionary | None = None, ) -> tuple[CryptMethod, bytes, bytes]: """Computes all parameters needed to encrypt or decrypt ``contents`` according to ISO 32000-2:2020 § 7.6.3.2, "Algorithm 1: Encryption of data using the RC4 and AES algorithms". This algorithm is only applicable for Encrypt versions 1 through 4 (deprecated in PDF 2.0). Version 5 uses a simpler algorithm described in ISO 32000-2:2020 § 7.6.3.2. Arguments: encryption_key (bytes): An encryption key generated by the algorithm implemented in :meth:`.compute_encryption_key`. contents (PdfStream | PdfHexString | bytes): The contents to encrypt/decrypt. The type of object will determine what crypt filter will be used for decryption (StmF for streams, StrF for hex and literal strings). reference (PdfReference): The reference of either the object itself (in the case of a stream) or the object containing it (in the case of a string). crypt_filter (PdfDictionary, optional, keyword only): The specific crypt filter to be referenced when decrypting the document. If not specified, the default for this type of ``contents`` will be used. Returns a tuple of 3 values specifying, in order, the crypt method to apply (AES-CBC or ARC4), the key to use with this method, and the data to encrypt or decrypt. """ # a) Obtain the object number and generation number from the object identifier of # the contents to encrypt. This is satisfied by the "reference" argument. # b) For all strings and streams without crypt filter specifier; treating # treating the object number and generation number as binary integers, # extend the original "key length" file encryption key by 5 bytes by # appending the low-order 3 bytes of the object number and the low-order 2 # bytes of the generation number, in that order, low-order byte first. generation = reference.generation.to_bytes(4, "little") object_number = reference.object_number.to_bytes(4, "little") extended_key = encryption_key + object_number[:3] + generation[:2] # b) If using the AES algorithm, extend the file encryption key an additional # 4 bytes by adding the value "sAlT". method = ( self._get_cfm_method(crypt_filter) if crypt_filter else self._get_crypt_method(contents) ) if method == "AESV2": extended_key += bytes([0x73, 0x41, 0x6C, 0x54]) # c) Initialise the MD5 hash function with the result of step (b) as input. # d) Use the first "key length" + 5 bytes, up to a maximum of 16 bytes, # as the key of the encryption algorithm. crypt_key = md5(extended_key).digest()[: self.key_length + 5][:16] if isinstance(contents, PdfStream): data = contents.raw elif isinstance(contents, PdfHexString): data = contents.value elif isinstance(contents, bytes): data = contents else: raise TypeError("'contents' argument must be a PDF stream or string.") return (method, crypt_key, data)
[docs] def encrypt_object( self, encryption_key: bytes, contents: Encryptable, reference: PdfReference, *, crypt_filter: PdfDictionary | None = None, ) -> bytes: """Encrypts the specified ``contents`` according to ISO 32000-2:2020 § 7.6.3.2. For details on parameters, see :meth:`.compute_object_crypt`.""" crypt_method, key, decrypted = self.compute_object_crypt( encryption_key, contents, reference, crypt_filter=crypt_filter ) return self._get_provider(crypt_method)(key).encrypt(decrypted)
[docs] def decrypt_object( self, encryption_key: bytes, contents: Encryptable, reference: PdfReference, *, crypt_filter: PdfDictionary | None = None, ) -> bytes: """Decrypts the specified ``contents`` according to ISO 32000-2:2020 § 7.6.3.2. For details on parameters, see :meth:`.compute_object_crypt`.""" crypt_method, key, encrypted = self.compute_object_crypt( encryption_key, contents, reference, crypt_filter=crypt_filter ) return self._get_provider(crypt_method)(key).decrypt(encrypted)
def _get_provider(self, name: str) -> type[CryptProvider]: provider = CRYPT_PROVIDERS.get(name) if provider is None: raise MissingCryptProviderError( f"No crypt provider available for {name!r}. You must register one or " f"install a compatible module." ) return provider def _get_crypt_method(self, contents: Encryptable) -> CryptMethod: version = self.encryption.get("V") if is_null_like(version): version = 0 if version != 4: # todo: should we assume ARC4? return "ARC4" if isinstance(contents, PdfStream): cf_name = self.encryption.get("StmF", PdfName(b"Identity")) elif isinstance(contents, (bytes, PdfHexString)): cf_name = self.encryption.get("StrF", PdfName(b"Identity")) else: raise TypeError("'contents' argument must be a PDF stream or string.") if cf_name.value == b"Identity": return "Identity" # No processing needed crypt_filters = self.encryption.get("CF", {}) crypter = crypt_filters.get(cf_name.value.decode(), {}) return self._get_cfm_method(crypter) def _get_cfm_method(self, crypt_filter: PdfDictionary) -> CryptMethod: cf_name = crypt_filter.get("CFM", PdfName(b"Identity")) if cf_name.value == b"Identity": return "Identity" elif cf_name.value == b"AESV2": return "AESV2" elif cf_name.value == b"V2": return "ARC4" raise ValueError(f"Unknown crypt filter for Standard security handler: {cf_name.value!r}")