""" Reads the Morphalou dataset, in its TSV form """

import typing as t
from lxml import etree
from pathlib import Path
import itertools

from .word_db import Adjectif, Adverbe, Genre, Nom, Nombre, Temps, Verbe, WordDb

TSV_NS = {
    "tsv": "http://www.tei-c.org/ns/1.0",
    "xml": "http://www.w3.org/XML/1998/namespace",
}


class MorphalouSet:
    MORPHALOU_DIR_PATH = (
        Path(__file__).parent.parent
        / "data/raw/morphalou/morphalou/5/Morphalou3.1_formatTEI"
    )
    MORPHALOU_FILENAME_TEMPLATE = "{cat_name}_Morphalou3.1_TEI.xml"

    CAT_MAPPING: dict[t.Type[t.NamedTuple], str] = {
        Nom: "commonNoun",
        Adjectif: "adjective",
        Verbe: "verb",
        Adverbe: "adverb",
    }

    word_db: WordDb

    def __init__(self):
        self.word_db = WordDb()

    def parse(self):
        """Parses the dataset"""
        for cat, cat_file in self.__class__.CAT_MAPPING.items():
            word_db_elt = WordDb.CATEGORY_TO_ATTR[cat]
            setattr(
                self.word_db,
                word_db_elt,
                getattr(self, f"_parse_{word_db_elt}")(
                    self.__class__.MORPHALOU_DIR_PATH
                    / self.__class__.MORPHALOU_FILENAME_TEMPLATE.format(
                        cat_name=cat_file
                    )
                ),
            )

    def _tsv_elems(self, tsv_path: Path):
        """Opens a TSV file, and returns the <body> node, direct parent of all the
        relevant nodes"""
        with tsv_path.open("r") as h:
            tree = etree.parse(h)
        root = tree.getroot()
        body = root.find("./tsv:text/tsv:body", TSV_NS)
        return body

    def _parse_noms(self, tsv_path: Path) -> list[Nom]:
        """Parse the nouns"""
        root = self._tsv_elems(tsv_path)
        out: list[Nom] = []

        for entry in root.iterfind("./tsv:entry", TSV_NS):
            try:
                genre = self._genre(
                    entry.find(
                        "./tsv:form[@type='lemma']/tsv:gramGrp/tsv:gen", TSV_NS
                    ).text
                )
            except AttributeError:
                continue  # some nouns don't have a gender defined, somehow -- ignore

            forms = {}
            for inflected in entry.iterfind("./tsv:form[@type='inflected']", TSV_NS):
                orth = inflected.find("./tsv:orth", TSV_NS).text
                nombres = self._nombre_set(
                    inflected.find("./tsv:gramGrp/tsv:number", TSV_NS).text
                )
                for form in nombres:
                    forms[form] = orth
            try:
                out.append(
                    Nom(
                        genre=genre,
                        sing=forms[Nombre.SING],
                        plur=forms[Nombre.PLUR],
                    )
                )
            except KeyError:
                continue  # cannot be inflected to all required forms: skip

        return out

    def _parse_adjectifs(self, tsv_path: Path) -> list[Adjectif]:
        """Parse the adjectives"""
        root = self._tsv_elems(tsv_path)
        out: list[Adjectif] = []

        for entry in root.iterfind("./tsv:entry", TSV_NS):
            forms = {}
            for inflected in entry.iterfind("./tsv:form[@type='inflected']", TSV_NS):
                orth = inflected.find("./tsv:orth", TSV_NS).text
                gram_grp = inflected.find("./tsv:gramGrp", TSV_NS)
                genres = self._genre_set(gram_grp.find("./tsv:gen", TSV_NS).text)
                nombres = self._nombre_set(gram_grp.find("./tsv:number", TSV_NS).text)

                for form in itertools.product(genres, nombres):
                    forms[form] = orth
            try:
                out.append(
                    Adjectif(
                        masc_sing=forms[Genre.MASC, Nombre.SING],
                        masc_plur=forms[Genre.MASC, Nombre.PLUR],
                        fem_sing=forms[Genre.FEM, Nombre.SING],
                        fem_plur=forms[Genre.FEM, Nombre.PLUR],
                    )
                )
            except KeyError:
                continue  # cannot be inflected to all required forms: skip

        return out

    def _parse_verbes(self, tsv_path: Path) -> list[Verbe]:
        """Parse the verbs"""
        root = self._tsv_elems(tsv_path)
        out: list[Verbe] = []

        for entry in root.iterfind("./tsv:entry", TSV_NS):
            forms = {}
            for inflected in entry.iterfind("./tsv:form[@type='inflected']", TSV_NS):
                gram_grp = inflected.find("./tsv:gramGrp", TSV_NS)

                # Order of tests is important! If mood == 'participle', there is no
                # 'person' defined.
                if (
                    gram_grp.find("./tsv:mood", TSV_NS).text != "indicative"
                    or gram_grp.find("./tsv:per", TSV_NS).text != "thirdPerson"
                ):
                    continue  # irrelevant for us

                temps = self._tense(gram_grp.find("./tsv:tns", TSV_NS).text)
                if temps is None:
                    continue  # irrelevant for us

                nombres = self._nombre_set(gram_grp.find("./tsv:number", TSV_NS).text)

                orth = inflected.find("./tsv:orth", TSV_NS).text
                for nombre in nombres:
                    forms[(temps, nombre)] = orth
            try:
                out.append(
                    Verbe(
                        present_sing=forms[Temps.PRESENT, Nombre.SING],
                        present_plur=forms[Temps.PRESENT, Nombre.PLUR],
                        futur_sing=forms[Temps.FUTUR, Nombre.SING],
                        futur_plur=forms[Temps.FUTUR, Nombre.PLUR],
                        imparfait_sing=forms[Temps.IMPARFAIT, Nombre.SING],
                        imparfait_plur=forms[Temps.IMPARFAIT, Nombre.PLUR],
                    )
                )
            except KeyError:
                continue  # cannot be inflected to all required forms: skip

        return out

    def _parse_adverbes(self, tsv_path: Path) -> list[Adverbe]:
        """Parse the adverbs"""
        root = self._tsv_elems(tsv_path)
        out: list[Adverbe] = []

        for entry in root.iterfind("./tsv:entry", TSV_NS):
            # We're only interested in the lemma form
            orth = entry.find("./tsv:form[@type='lemma']/tsv:orth", TSV_NS)
            assert orth is not None
            adv = orth.text
            out.append(Adverbe(adv=adv))

        return out

    @staticmethod
    def _genre_set(genre: str) -> list[Genre]:
        return {
            "masculine": [Genre.MASC],
            "feminine": [Genre.FEM],
            "invariable": [Genre.MASC, Genre.FEM],
        }[genre]

    @staticmethod
    def _genre(genre: str) -> Genre:
        return {
            "masculine": Genre.MASC,
            "feminine": Genre.FEM,
            "invariable": Genre.INV,
        }[genre]

    @staticmethod
    def _nombre(nombre: str) -> Nombre:
        return {
            "singular": Nombre.SING,
            "plural": Nombre.PLUR,
        }[nombre]

    @staticmethod
    def _nombre_set(nombre: str) -> list[Nombre]:
        return {
            "singular": [Nombre.SING],
            "plural": [Nombre.PLUR],
            "invariable": [Nombre.SING, Nombre.PLUR],
        }[nombre]

    @staticmethod
    def _tense(tense: str) -> t.Optional[Temps]:
        return {
            "present": Temps.PRESENT,
            "imperfect": Temps.IMPARFAIT,
            "future": Temps.FUTUR,
        }.get(tense, None)