""" A pre-processed database of words, independant of their source """ import gzip import json import secrets import typing as t from enum import Enum from pathlib import Path class Genre(Enum): MASC = "masculin" FEM = "féminin" INV = "invariable" # pour les noms uniquement @classmethod def pick(cls) -> "Genre": """random-pick (avoids inv)""" return secrets.choice([cls.masc, cls.fem]) class Nombre(Enum): SING = "singulier" PLUR = "pluriel" @classmethod def pick(cls) -> "Nombre": """random-pick""" return secrets.choice(list(cls)) class Temps(Enum): PRESENT = "present" FUTUR = "futur" IMPARFAIT = "imparfait" @classmethod def pick(cls) -> "Temps": """random-pick""" return secrets.choice(list(cls)) class Nom(t.NamedTuple): """Nom commun""" genre: Genre sing: str plur: str def __str__(self) -> str: return f"{self.sing}" def accord(self, nombre: Nombre) -> str: """Accorde en nombre""" return getattr(self, nombre.name.lower()) @property def genre_or_pick(self) -> Genre: """Genre of the noun, or random-pick if invariable""" if self.genre == Genre.INV: return Genre.pick() return self.genre @property def serialized(self): return {"genre": self.genre.name, "sing": self.sing, "plur": self.plur} @classmethod def unserialized(cls, kwargs): genre = Genre[kwargs.pop("genre")] return cls(**kwargs, genre=genre) class Adjectif(t.NamedTuple): masc_sing: str masc_plur: str fem_sing: str fem_plur: str def __str__(self) -> str: return f"{self.masc_sing}/{self.fem_sing}" def accord(self, genre: Genre, nombre: Nombre) -> str: """Accorde en genre et en nombre""" return getattr(self, f"{genre.name.lower()}_{nombre.name.lower()}") @property def serialized(self): return self._asdict() @classmethod def unserialized(cls, kwargs): return cls(**kwargs) class Verbe(t.NamedTuple): present_sing: str present_plur: str futur_sing: str futur_plur: str imparfait_sing: str imparfait_plur: str def __str__(self) -> str: return f"{self.present_sing}" def accord(self, temps: Temps, nombre: Nombre) -> str: """Accorde en temps et en nombre (seule la 3è pers. est utilisée)""" return getattr(self, f"{temps.name.lower()}_{nombre.name.lower()}") @property def serialized(self): return self._asdict() @classmethod def unserialized(cls, kwargs): return cls(**kwargs) class Adverbe(t.NamedTuple): """Packed as named tuple for consistence""" adv: str def __str__(self) -> str: return self.adv def accord(self) -> str: """for consistence""" return self.adv @property def serialized(self): return self._asdict() @classmethod def unserialized(cls, kwargs): return cls(**kwargs) class WordDb: """Base de donnée de mots, sérialisable""" SERIALIZED_GZ_LOCATION = Path(__file__).parent.parent / "morphalou_full.json.gz" _serialize_data: dict[str, t.Type[t.NamedTuple]] = { "noms": Nom, "adjectifs": Adjectif, "verbes": Verbe, "adverbes": Adverbe, } CATEGORY_TO_ATTR: dict = { Nom: "noms", Adjectif: "adjectifs", Verbe: "verbes", Adverbe: "adverbes", } noms: list[Nom] adjectifs: list[Adjectif] verbes: list[Verbe] adverbes: list[Adverbe] def __init__( self, noms: t.Optional[list[Nom]] = None, adjectifs: t.Optional[list[Adjectif]] = None, verbes: t.Optional[list[Verbe]] = None, adverbes: t.Optional[list[Adverbe]] = None, ): self.noms = noms or [] self.adjectifs = adjectifs or [] self.verbes = verbes or [] self.adverbes = adverbes or [] def serialize(self) -> dict: """Serialize to plain dictionary (no classes)""" return { attr: [x.serialized for x in getattr(self, attr)] for attr in self.__class__._serialize_data } def save(self, fd): """Serialize to this stream""" json.dump(self.serialize(), fd) @classmethod @t.no_type_check # serialization is messy def unserialize(cls, data: dict) -> "WordDb": """Reverses :serialize:""" parsed = {} for attr, attr_cls in cls._serialize_data.items(): parsed[attr] = list(map(attr_cls.unserialized, data[attr])) return cls(**parsed) @classmethod def load(cls, fd) -> "WordDb": """Unserialize from this stream""" return cls.unserialize(json.load(fd)) @classmethod def autoload(cls) -> "WordDb": """Unserialize from default source""" with gzip.open(cls.SERIALIZED_GZ_LOCATION) as h: return cls.load(h)