Source code for pdfnaut.common.dates

"""Utilities for parsing and encoding date formats: ISO 8601 and ISO 8824."""

import datetime
import re


[docs] def has_date(date: datetime.datetime) -> bool: """Returns whether ``date`` has a date component. In this case, if either the year, month or day isn't a default value.""" return date.year > 1 or date.month > 1 or date.day > 1
[docs] def has_time(date: datetime.datetime) -> bool: """Returns whether ``date`` has a time component. In this case, if either the hour, minute, second or microsecond isn't a default value.""" return date.hour > 0 or date.minute > 0 or date.second > 0 or date.microsecond > 0
[docs] def has_timezone(date: datetime.datetime) -> bool: """Returns whether ``date`` specifies a timezone other than UTC.""" offset = date.utcoffset() return offset is not None and offset.total_seconds() != 0
[docs] def parse_iso8824(date_string: str) -> datetime.datetime: """Parses an ISO/IEC 8824 date string into a :class:`datetime.datetime` object (for example, ``D:20010727133720``). This is the type of date string described in ISO 32000-2:2020 ยง 7.9.4 "Dates". """ # dates may end with an apostrophe (pdf 1.7 and below) if date_string.endswith("'"): date_string = date_string[:-1] pattern = re.compile( r"""^D:(?P<year>\d{4})(?P<month>\d{2})?(?P<day>\d{2})? # date (?P<hour>\d{2})?(?P<minute>\d{2})?(?P<second>\d{2})? # time (?P<offset>[-+Z])?(?P<offset_hour>\d{2})?(?P<offset_minute>'\d{2})?$ # offset """, re.X, ) mat = pattern.match(date_string) if not mat: raise ValueError(f"Invalid date format: {date_string!r}") offset_sign = mat.group("offset") if offset_sign is None or offset_sign == "Z": offset_hour = 0 offset_minute = 0 else: offset_hour = int(mat.group("offset_hour") or 0) offset_minute = int((mat.group("offset_minute") or "'0")[1:]) if offset_sign == "-": offset_hour = -offset_hour delta = datetime.timedelta(hours=offset_hour, minutes=offset_minute) return datetime.datetime( year=int(mat.group("year")), month=int(mat.group("month") or 1), day=int(mat.group("day") or 1), hour=int(mat.group("hour") or 0), minute=int(mat.group("minute") or 0), second=int(mat.group("second") or 0), tzinfo=datetime.timezone(delta), )
[docs] def encode_iso8824(date: datetime.datetime, *, full: bool = True) -> str: """Encodes a :class:`datetime.datetime` object into an ISO 8824 date string suitable for storage in a PDF file. If ``full`` is True, this function will encode all date and time values. Otherwise, the function will perform partial encoding, only including components that aren't their default values. """ datestr = f"D:{date.year}" if has_date(date) or full: datestr += f"{date.month:02}" if date.month > 1 or date.day > 1 or full else "" datestr += f"{date.day:02}" if date.day > 1 or date.second > 0 or full else "" if has_time(date) or has_timezone(date) or full: datestr += f"{date.hour:02}" if date.hour > 0 or date.minute > 0 or full else "" datestr += f"{date.minute:02}" if date.minute > 0 or date.second > 0 or full else "" datestr += f"{date.second:02}" if date.second > 0 or has_timezone(date) or full else "" offset = date.utcoffset() # If no offset, assume UTC if offset is None or offset.total_seconds() == 0: return datestr + "Z" offset_hours, offset_seconds = divmod(offset.total_seconds(), 3600) offset_minutes = int(offset_seconds / 60) return datestr + f"{offset_hours:+03n}'{offset_minutes:02}" return datestr
[docs] def parse_iso8601(date_string: str) -> datetime.datetime: """Parses a date string conforming to the ISO 8601 profile specified in https://www.w3.org/TR/NOTE-datetime into a :class:`datetime.datetime` object.""" pattern = re.compile( r"""^(?P<year>\d{4})(?:-(?P<month>\d{2}))?(?:-(?P<day>\d{2}))? # yyyy-mm-dd (?:T(?P<hour>\d{2}):(?P<minute>\d{2}) # hh-mm (?::(?P<second>\d{2})(?:\.(?P<fraction>\d+))?)? # ss.s (?P<tzd>Z|[-+]\d{2}:\d{2})?)?$ # Z or +hh:mm """, re.X, ) mat = pattern.match(date_string) if not mat: raise ValueError(f"Expected an ISO 8601 string, received {date_string!r}") tzd = mat.group("tzd") if tzd is None or tzd == "Z": tz_offset = datetime.timedelta(hours=0, minutes=0) else: hh, mm = tzd.split(":") tz_offset = datetime.timedelta(hours=int(hh), minutes=int(mm)) fraction_str = mat.group("fraction") or "0" fraction_micro = (int(fraction_str) / 10 ** len(fraction_str)) * 1_000_000 return datetime.datetime( year=int(mat.group("year")), month=int(mat.group("month") or 1), day=int(mat.group("day") or 1), hour=int(mat.group("hour") or 0), minute=int(mat.group("minute") or 0), second=int(mat.group("second") or 0), microsecond=round(fraction_micro), tzinfo=datetime.timezone(tz_offset), )
[docs] def encode_iso8601(date: datetime.datetime, *, full: bool = True) -> str: """Encodes a :class:`datetime.datetime` object into a date string conforming to the ISO 6801 profile specified in https://www.w3.org/TR/NOTE-datetime. If ``full`` is True, this function will encode all date and time values. Otherwise, the function will perform partial encoding, only including components that aren't their default values. """ datestr = str(date.year) if has_date(date) or full: datestr += f"-{date.month:02}" # Append day if present or if hour present datestr += f"-{date.day:02}" if date.day > 1 or date.hour > 0 or full else "" if has_time(date) or has_timezone(date) or full: datestr += f"T{date.hour:02}:{date.minute:02}" # Whether we have a second if date.second > 0 or date.microsecond > 0 or full: datestr += f":{date.second:02}" datestr += f".{date.microsecond:06n}" if date.microsecond > 0 else "" offset = date.utcoffset() # If no offset, assume UTC if offset is None or offset.total_seconds() == 0: return datestr + "Z" offset_hours, offset_seconds = divmod(offset.total_seconds(), 3600) offset_minutes = int(offset_seconds / 60) return datestr + f"{offset_hours:+03n}:{offset_minutes:02}" return datestr