# $Id: misc.py 9358 2023-04-19 23:31:13Z milde $
# Authors: David Goodger <goodger@python.org>; Dethe Elza
# Copyright: This module has been placed in the public domain.

"""Miscellaneous directives."""

__docformat__ = 'reStructuredText'

from pathlib import Path
import os
import re
import time
from urllib.request import urlopen
from urllib.error import URLError

import docutils
from docutils import io, nodes, statemachine, utils
from docutils.parsers.rst import Directive, convert_directive_function
from docutils.parsers.rst import directives, roles, states
from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
from docutils.transforms import misc


class Include(Directive):

    """
    Include content read from a separate source file.

    Content may be parsed by the parser, or included as a literal
    block.  The encoding of the included file can be specified.  Only
    a part of the given file argument may be included by specifying
    start and end line or text to match before and/or after the text
    to be used.

    https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    option_spec = {'literal': directives.flag,
                   'code': directives.unchanged,
                   'encoding': directives.encoding,
                   'parser': directives.parser_name,
                   'tab-width': int,
                   'start-line': int,
                   'end-line': int,
                   'start-after': directives.unchanged_required,
                   'end-before': directives.unchanged_required,
                   # ignored except for 'literal' or 'code':
                   'number-lines': directives.unchanged,  # integer or None
                   'class': directives.class_option,
                   'name': directives.unchanged}

    standard_include_path = Path(docutils._datadir(states.__file__)) / 'include'

    def run(self):
        """Include a file as part of the content of this reST file.

        Depending on the options, the file (or a clipping) is
        converted to nodes and returned or inserted into the input stream.
        """
        if not self.state.document.settings.file_insertion_enabled:
            raise self.warning('"%s" directive disabled.' % self.name)
        current_source = self.state.document.current_source
        path = directives.path(self.arguments[0])
        if path.startswith('<') and path.endswith('>'):
            _base = self.standard_include_path
            path = path[1:-1]
        else:
            _base = Path(current_source).parent
        path = utils.relative_path(None, _base/path)
        encoding = self.options.get(
            'encoding', self.state.document.settings.input_encoding)
        e_handler = self.state.document.settings.input_encoding_error_handler
        tab_width = self.options.get(
            'tab-width', self.state.document.settings.tab_width)
        try:
            include_file = io.FileInput(source_path=path,
                                        encoding=encoding,
                                        error_handler=e_handler)
        except UnicodeEncodeError:
            raise self.severe(f'Problems with "{self.name}" directive path:\n'
                              f'Cannot encode input file path "{path}" '
                              '(wrong locale?).')
        except OSError as error:
            raise self.severe(f'Problems with "{self.name}" directive '
                              f'path:\n{io.error_string(error)}.')
        else:
            self.state.document.settings.record_dependencies.add(path)

        # Get to-be-included content
        startline = self.options.get('start-line', None)
        endline = self.options.get('end-line', None)
        try:
            if startline or (endline is not None):
                lines = include_file.readlines()
                rawtext = ''.join(lines[startline:endline])
            else:
                rawtext = include_file.read()
        except UnicodeError as error:
            raise self.severe(f'Problem with "{self.name}" directive:\n'
                              + io.error_string(error))
        # start-after/end-before: no restrictions on newlines in match-text,
        # and no restrictions on matching inside lines vs. line boundaries
        after_text = self.options.get('start-after', None)
        if after_text:
            # skip content in rawtext before *and incl.* a matching text
            after_index = rawtext.find(after_text)
            if after_index < 0:
                raise self.severe('Problem with "start-after" option of "%s" '
                                  'directive:\nText not found.' % self.name)
            rawtext = rawtext[after_index + len(after_text):]
        before_text = self.options.get('end-before', None)
        if before_text:
            # skip content in rawtext after *and incl.* a matching text
            before_index = rawtext.find(before_text)
            if before_index < 0:
                raise self.severe('Problem with "end-before" option of "%s" '
                                  'directive:\nText not found.' % self.name)
            rawtext = rawtext[:before_index]

        include_lines = statemachine.string2lines(rawtext, tab_width,
                                                  convert_whitespace=True)
        for i, line in enumerate(include_lines):
            if len(line) > self.state.document.settings.line_length_limit:
                raise self.warning('"%s": line %d exceeds the'
                                   ' line-length-limit.' % (path, i+1))

        if 'literal' in self.options:
            # Don't convert tabs to spaces, if `tab_width` is negative.
            if tab_width >= 0:
                text = rawtext.expandtabs(tab_width)
            else:
                text = rawtext
            literal_block = nodes.literal_block(
                                rawtext, source=path,
                                classes=self.options.get('class', []))
            literal_block.line = 1
            self.add_name(literal_block)
            if 'number-lines' in self.options:
                try:
                    startline = int(self.options['number-lines'] or 1)
                except ValueError:
                    raise self.error(':number-lines: with non-integer '
                                     'start value')
                endline = startline + len(include_lines)
                if text.endswith('\n'):
                    text = text[:-1]
                tokens = NumberLines([([], text)], startline, endline)
                for classes, value in tokens:
                    if classes:
                        literal_block += nodes.inline(value, value,
                                                      classes=classes)
                    else:
                        literal_block += nodes.Text(value)
            else:
                literal_block += nodes.Text(text)
            return [literal_block]

        if 'code' in self.options:
            self.options['source'] = path
            # Don't convert tabs to spaces, if `tab_width` is negative:
            if tab_width < 0:
                include_lines = rawtext.splitlines()
            codeblock = CodeBlock(self.name,
                                  [self.options.pop('code')],  # arguments
                                  self.options,
                                  include_lines,  # content
                                  self.lineno,
                                  self.content_offset,
                                  self.block_text,
                                  self.state,
                                  self.state_machine)
            return codeblock.run()

        # Prevent circular inclusion:
        clip_options = (startline, endline, before_text, after_text)
        include_log = self.state.document.include_log
        # log entries are tuples (<source>, <clip-options>)
        if not include_log:  # new document, initialize with document source
            include_log.append((utils.relative_path(None, current_source),
                                (None, None, None, None)))
        if (path, clip_options) in include_log:
            master_paths = (pth for (pth, opt) in reversed(include_log))
            inclusion_chain = '\n> '.join((path, *master_paths))
            raise self.warning('circular inclusion in "%s" directive:\n%s'
                               % (self.name, inclusion_chain))

        if 'parser' in self.options:
            # parse into a dummy document and return created nodes
            document = utils.new_document(path, self.state.document.settings)
            document.include_log = include_log + [(path, clip_options)]
            parser = self.options['parser']()
            parser.parse('\n'.join(include_lines), document)
            # clean up doctree and complete parsing
            document.transformer.populate_from_components((parser,))
            document.transformer.apply_transforms()
            return document.children

        # Include as rST source:
        #
        # mark end (cf. parsers.rst.states.Body.comment())
        include_lines += ['', '.. end of inclusion from "%s"' % path]
        self.state_machine.insert_input(include_lines, path)
        # update include-log
        include_log.append((path, clip_options))
        return []


class Raw(Directive):

    """
    Pass through content unchanged

    Content is included in output based on type argument

    Content may be included inline (content section of directive) or
    imported from a file or url.
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    option_spec = {'file': directives.path,
                   'url': directives.uri,
                   'encoding': directives.encoding,
                   'class': directives.class_option}
    has_content = True

    def run(self):
        if (not self.state.document.settings.raw_enabled
            or (not self.state.document.settings.file_insertion_enabled
                and ('file' in self.options
                     or 'url' in self.options))):
            raise self.warning('"%s" directive disabled.' % self.name)
        attributes = {'format': ' '.join(self.arguments[0].lower().split())}
        encoding = self.options.get(
            'encoding', self.state.document.settings.input_encoding)
        e_handler = self.state.document.settings.input_encoding_error_handler
        if self.content:
            if 'file' in self.options or 'url' in self.options:
                raise self.error(
                    '"%s" directive may not both specify an external file '
                    'and have content.' % self.name)
            text = '\n'.join(self.content)
        elif 'file' in self.options:
            if 'url' in self.options:
                raise self.error(
                    'The "file" and "url" options may not be simultaneously '
                    'specified for the "%s" directive.' % self.name)
            path = self.options['file']
            _base = Path(self.state.document.current_source).parent
            path = utils.relative_path(None, _base/path)
            try:
                raw_file = io.FileInput(source_path=path,
                                        encoding=encoding,
                                        error_handler=e_handler)
            except OSError as error:
                raise self.severe(f'Problems with "{self.name}" directive '
                                  f'path:\n{io.error_string(error)}.')
            else:
                # TODO: currently, raw input files are recorded as
                # dependencies even if not used for the chosen output format.
                self.state.document.settings.record_dependencies.add(path)
            try:
                text = raw_file.read()
            except UnicodeError as error:
                raise self.severe(f'Problem with "{self.name}" directive:\n'
                                  + io.error_string(error))
            attributes['source'] = path
        elif 'url' in self.options:
            source = self.options['url']
            try:
                raw_text = urlopen(source).read()
            except (URLError, OSError) as error:
                raise self.severe(f'Problems with "{self.name}" directive URL '
                                  f'"{self.options["url"]}":\n'
                                  f'{io.error_string(error)}.')
            raw_file = io.StringInput(source=raw_text, source_path=source,
                                      encoding=encoding,
                                      error_handler=e_handler)
            try:
                text = raw_file.read()
            except UnicodeError as error:
                raise self.severe(f'Problem with "{self.name}" directive:\n'
                                  + io.error_string(error))
            attributes['source'] = source
        else:
            # This will always fail because there is no content.
            self.assert_has_content()
        raw_node = nodes.raw('', text, classes=self.options.get('class', []),
                             **attributes)
        (raw_node.source,
         raw_node.line) = self.state_machine.get_source_and_line(self.lineno)
        return [raw_node]


class Replace(Directive):

    has_content = True

    def run(self):
        if not isinstance(self.state, states.SubstitutionDef):
            raise self.error(
                'Invalid context: the "%s" directive can only be used within '
                'a substitution definition.' % self.name)
        self.assert_has_content()
        text = '\n'.join(self.content)
        element = nodes.Element(text)
        self.state.nested_parse(self.content, self.content_offset,
                                element)
        # element might contain [paragraph] + system_message(s)
        node = None
        messages = []
        for elem in element:
            if not node and isinstance(elem, nodes.paragraph):
                node = elem
            elif isinstance(elem, nodes.system_message):
                elem['backrefs'] = []
                messages.append(elem)
            else:
                return [
                    self.reporter.error(
                        f'Error in "{self.name}" directive: may contain '
                        'a single paragraph only.', line=self.lineno)]
        if node:
            return messages + node.children
        return messages


class Unicode(Directive):

    r"""
    Convert Unicode character codes (numbers) to characters.  Codes may be
    decimal numbers, hexadecimal numbers (prefixed by ``0x``, ``x``, ``\x``,
    ``U+``, ``u``, or ``\u``; e.g. ``U+262E``), or XML-style numeric character
    entities (e.g. ``&#x262E;``).  Text following ".." is a comment and is
    ignored.  Spaces are ignored, and any other text remains as-is.
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    option_spec = {'trim': directives.flag,
                   'ltrim': directives.flag,
                   'rtrim': directives.flag}

    comment_pattern = re.compile(r'( |\n|^)\.\. ')

    def run(self):
        if not isinstance(self.state, states.SubstitutionDef):
            raise self.error(
                'Invalid context: the "%s" directive can only be used within '
                'a substitution definition.' % self.name)
        substitution_definition = self.state_machine.node
        if 'trim' in self.options:
            substitution_definition.attributes['ltrim'] = 1
            substitution_definition.attributes['rtrim'] = 1
        if 'ltrim' in self.options:
            substitution_definition.attributes['ltrim'] = 1
        if 'rtrim' in self.options:
            substitution_definition.attributes['rtrim'] = 1
        codes = self.comment_pattern.split(self.arguments[0])[0].split()
        element = nodes.Element()
        for code in codes:
            try:
                decoded = directives.unicode_code(code)
            except ValueError as error:
                raise self.error('Invalid character code: %s\n%s'
                                 % (code, io.error_string(error)))
            element += nodes.Text(decoded)
        return element.children


class Class(Directive):

    """
    Set a "class" attribute on the directive content or the next element.
    When applied to the next element, a "pending" element is inserted, and a
    transform does the work later.
    """

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True
    has_content = True

    def run(self):
        try:
            class_value = directives.class_option(self.arguments[0])
        except ValueError:
            raise self.error(
                'Invalid class attribute value for "%s" directive: "%s".'
                % (self.name, self.arguments[0]))
        node_list = []
        if self.content:
            container = nodes.Element()
            self.state.nested_parse(self.content, self.content_offset,
                                    container)
            for node in container:
                node['classes'].extend(class_value)
            node_list.extend(container.children)
        else:
            pending = nodes.pending(
                misc.ClassAttribute,
                {'class': class_value, 'directive': self.name},
                self.block_text)
            self.state_machine.document.note_pending(pending)
            node_list.append(pending)
        return node_list


class Role(Directive):

    has_content = True

    argument_pattern = re.compile(r'(%s)\s*(\(\s*(%s)\s*\)\s*)?$'
                                  % ((states.Inliner.simplename,) * 2))

    def run(self):
        """Dynamically create and register a custom interpreted text role."""
        if self.content_offset > self.lineno or not self.content:
            raise self.error('"%s" directive requires arguments on the first '
                             'line.' % self.name)
        args = self.content[0]
        match = self.argument_pattern.match(args)
        if not match:
            raise self.error('"%s" directive arguments not valid role names: '
                             '"%s".' % (self.name, args))
        new_role_name = match.group(1)
        base_role_name = match.group(3)
        messages = []
        if base_role_name:
            base_role, messages = roles.role(
                base_role_name, self.state_machine.language, self.lineno,
                self.state.reporter)
            if base_role is None:
                error = self.state.reporter.error(
                    'Unknown interpreted text role "%s".' % base_role_name,
                    nodes.literal_block(self.block_text, self.block_text),
                    line=self.lineno)
                return messages + [error]
        else:
            base_role = roles.generic_custom_role
        assert not hasattr(base_role, 'arguments'), (
            'Supplemental directive arguments for "%s" directive not '
            'supported (specified by "%r" role).' % (self.name, base_role))
        try:
            converted_role = convert_directive_function(base_role)
            (arguments, options, content, content_offset
             ) = self.state.parse_directive_block(
                    self.content[1:], self.content_offset,
                    converted_role, option_presets={})
        except states.MarkupError as detail:
            error = self.reporter.error(
                'Error in "%s" directive:\n%s.' % (self.name, detail),
                nodes.literal_block(self.block_text, self.block_text),
                line=self.lineno)
            return messages + [error]
        if 'class' not in options:
            try:
                options['class'] = directives.class_option(new_role_name)
            except ValueError as detail:
                error = self.reporter.error(
                    'Invalid argument for "%s" directive:\n%s.'
                    % (self.name, detail),
                    nodes.literal_block(self.block_text, self.block_text),
                    line=self.lineno)
                return messages + [error]
        role = roles.CustomRole(new_role_name, base_role, options, content)
        roles.register_local_role(new_role_name, role)
        return messages


class DefaultRole(Directive):

    """Set the default interpreted text role."""

    optional_arguments = 1
    final_argument_whitespace = False

    def run(self):
        if not self.arguments:
            if '' in roles._roles:
                # restore the "default" default role
                del roles._roles['']
            return []
        role_name = self.arguments[0]
        role, messages = roles.role(role_name, self.state_machine.language,
                                    self.lineno, self.state.reporter)
        if role is None:
            error = self.state.reporter.error(
                'Unknown interpreted text role "%s".' % role_name,
                nodes.literal_block(self.block_text, self.block_text),
                line=self.lineno)
            return messages + [error]
        roles._roles[''] = role
        return messages


class Title(Directive):

    required_arguments = 1
    optional_arguments = 0
    final_argument_whitespace = True

    def run(self):
        self.state_machine.document['title'] = self.arguments[0]
        return []


class MetaBody(states.SpecializedBody):

    def field_marker(self, match, context, next_state):
        """Meta element."""
        node, blank_finish = self.parsemeta(match)
        self.parent += node
        return [], next_state, []

    def parsemeta(self, match):
        name = self.parse_field_marker(match)
        name = nodes.unescape(utils.escape2null(name))
        (indented, indent, line_offset, blank_finish
         ) = self.state_machine.get_first_known_indented(match.end())
        node = nodes.meta()
        node['content'] = nodes.unescape(utils.escape2null(
                                            ' '.join(indented)))
        if not indented:
            line = self.state_machine.line
            msg = self.reporter.info(
                  'No content for meta tag "%s".' % name,
                  nodes.literal_block(line, line))
            return msg, blank_finish
        tokens = name.split()
        try:
            attname, val = utils.extract_name_value(tokens[0])[0]
            node[attname.lower()] = val
        except utils.NameValueError:
            node['name'] = tokens[0]
        for token in tokens[1:]:
            try:
                attname, val = utils.extract_name_value(token)[0]
                node[attname.lower()] = val
            except utils.NameValueError as detail:
                line = self.state_machine.line
                msg = self.reporter.error(
                      'Error parsing meta tag attribute "%s": %s.'
                      % (token, detail), nodes.literal_block(line, line))
                return msg, blank_finish
        return node, blank_finish


class Meta(Directive):

    has_content = True

    SMkwargs = {'state_classes': (MetaBody,)}

    def run(self):
        self.assert_has_content()
        node = nodes.Element()
        new_line_offset, blank_finish = self.state.nested_list_parse(
            self.content, self.content_offset, node,
            initial_state='MetaBody', blank_finish=True,
            state_machine_kwargs=self.SMkwargs)
        if (new_line_offset - self.content_offset) != len(self.content):
            # incomplete parse of block?
            error = self.reporter.error(
                'Invalid meta directive.',
                nodes.literal_block(self.block_text, self.block_text),
                line=self.lineno)
            node += error
        # insert at begin of document
        index = self.state.document.first_child_not_matching_class(
                                        (nodes.Titular, nodes.meta)) or 0
        self.state.document[index:index] = node.children
        return []


class Date(Directive):

    has_content = True

    def run(self):
        if not isinstance(self.state, states.SubstitutionDef):
            raise self.error(
                'Invalid context: the "%s" directive can only be used within '
                'a substitution definition.' % self.name)
        format_str = '\n'.join(self.content) or '%Y-%m-%d'
        # @@@
        # Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable?
        # Pro: Docutils-generated documentation
        #      can easily be part of `reproducible software builds`__
        #
        #      __ https://reproducible-builds.org/
        #
        # Con: Changes the specs, hard to predict behaviour,
        #
        # See also the discussion about \date \time \year in TeX
        # http://tug.org/pipermail/tex-k/2016-May/002704.html
        source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
        if source_date_epoch:
            text = time.strftime(format_str,
                                 time.gmtime(int(source_date_epoch)))
        else:
            text = time.strftime(format_str)
        return [nodes.Text(text)]


class TestDirective(Directive):

    """This directive is useful only for testing purposes."""

    optional_arguments = 1
    final_argument_whitespace = True
    option_spec = {'option': directives.unchanged_required}
    has_content = True

    def run(self):
        if self.content:
            text = '\n'.join(self.content)
            info = self.reporter.info(
                'Directive processed. Type="%s", arguments=%r, options=%r, '
                'content:' % (self.name, self.arguments, self.options),
                nodes.literal_block(text, text), line=self.lineno)
        else:
            info = self.reporter.info(
                'Directive processed. Type="%s", arguments=%r, options=%r, '
                'content: None' % (self.name, self.arguments, self.options),
                line=self.lineno)
        return [info]