Source code for loguru._parser

import re
from os import PathLike


[docs]class Parser:
    """An object to more easily parse generated logs.

    The |Parser| provide a set of handful methods likely to be used while parsing logs for
    post-processing.

    You should not instaniate a |Parser| by yourself, use ``from loguru import parser`` instead.

    .. |Parser| replace:: :class:`~loguru._parser.Parser`

    .. |dict| replace:: :class:`dict`
    .. |str| replace:: :class:`str`
    .. |int| replace:: :class:`int`
    .. |Path| replace:: :class:`pathlib.Path`
    .. |match.groupdict| replace:: :meth:`re.Match.groupdict()`

    .. |file-like object| replace:: ``file-like object``
    .. _file-like object: https://docs.python.org/3/glossary.html#term-file-object
    .. |re.Pattern| replace:: ``re.Pattern``
    .. _re.Pattern: https://docs.python.org/3/library/re.html#re-objects
    .. |re.Match| replace:: ``re.Match``
    .. _re.Match: https://docs.python.org/3/library/re.html#match-objects
    """

[docs]    @staticmethod
    def cast(_dict, **kwargs):
        """Convert values of a dict to others defined types.

        This is a convenient function used to cast dict values resulting from parsed logs from
        |str| to a more appropriate type.

        Parameters
        ----------
        _dict : |dict|
            The dict to which values type should be changed.
        **kwargs
            Mapping between keys of the input ``_dict`` and the function that should be used to
            convert the associated value.

        Returns
        -------
        :class:`dict`
            A copy of the input dictionnary with values converted to the appropriate type.

        Example
        -------
        >>> dico = {"some": "text", "num": "42", "date": "2018-09-12 22:23:24"}
        >>> parser.cast(dico, num=int, date=lambda t: datetime.strptime(t, "%Y-%m-%d %H:%M:%S")
        {'some': 'text', 'num': 42, 'date': datetime.datetime(2018, 9, 12, 22, 23, 24)}
        """
        dict_ = _dict.copy()
        for key, converter in kwargs.items():
            if key in dict_:
                dict_[key] = converter(dict_[key])
        return dict_

[docs]    @staticmethod
    def parse(file, pattern, *, chunk=2 ** 16):
        """
        Parse raw logs to extract each entry as a |dict|.

        The logging format has to be specified as the regex ``pattern``, it will then be
        used to parse the ``file`` and retrieve each entries based on the named groups present
        in the regex.

        Parameters
        ----------
        file : |str|, |Path| or |file-like object|_
            The path of the log file to be parsed, or alternatively an already opened file object.
        pattern : |str| or |re.Pattern|_
            The regex to use for logs parsing, it should contain named groups which will be included
            in the returned dict.
        chunk : |int|, optional
            The number of bytes read while iterating through the logs, this avoid having to load the
            whole file in memory.

        Yields
        ------
        :class:`dict`
            The dict mapping regex named groups to matched values, as returned by |match.groupdict|.

        Examples
        --------
        >>> reg = r"(?P<lvl>[0-9]+): (?P<msg>.*)"    # If log format is "{level.no} - {message}"
        >>> for e in parser.parse("file.log", reg):  # A file line could be "10 - A debug message"
        ...     print(e)                             # => {'lvl': '10', 'msg': 'A debug message'}
        """
        if isinstance(file, (str, PathLike)):
            should_close = True
            fileobj = open(str(file))
        elif hasattr(file, "read") and callable(file.read):
            should_close = False
            fileobj = file
        else:
            raise ValueError(
                "Invalid file, it should be a string path or a file object, not: '%s'"
                % type(file).__name__
            )

        try:
            regex = re.compile(pattern)
        except TypeError:
            raise ValueError(
                "Invalid pattern, it should be a string or a compiled regex, not: '%s'"
                % type(pattern).__name__
            )

        matches = Parser._find_iter(fileobj, regex, chunk)

        for match in matches:
            yield match.groupdict()

        if should_close:
            fileobj.close()

    @staticmethod
    def _find_iter(fileobj, regex, chunk):
        buffer = fileobj.read(0)

        while 1:
            text = fileobj.read(chunk)
            buffer += text
            matches = list(regex.finditer(buffer))

            if not text:
                yield from matches
                break

            if len(matches) > 1:
                end = matches[-2].end()
                buffer = buffer[end:]
                yield from matches[:-1]