pwgen-fr/pwgen_fr/word_db.py

140 lines
3.3 KiB
Python

""" A pre-processed database of words, independant of their source """
import typing as t
from enum import Enum
import json
class Genre(Enum):
MASC = "masculin"
FEM = "féminin"
INV = "invariable" # pour les noms uniquement
class Nombre(Enum):
SING = "singulier"
PLUR = "pluriel"
class Temps(Enum):
PRESENT = "present"
FUTUR = "futur"
IMPARFAIT = "imparfait"
class Nom(t.NamedTuple):
"""Nom commun"""
genre: Genre
sing: str
plur: str
def __str__(self) -> str:
return f"{self.sing}"
def accord(self, nombre: Nombre) -> str:
"""Accorde en nombre"""
return getattr(self, nombre.name.lower())
class Adjectif(t.NamedTuple):
masc_sing: str
masc_plur: str
fem_sing: str
fem_plur: str
def __str__(self) -> str:
return f"{self.masc_sing}/{self.fem_sing}"
def accord(self, genre: Genre, nombre: Nombre) -> str:
"""Accorde en genre et en nombre"""
return getattr(self, f"{genre.name.lower()}_{nombre.name.lower()}")
class Verbe(t.NamedTuple):
present_sing: str
present_plur: str
futur_sing: str
futur_plur: str
imparfait_sing: str
imparfait_plur: str
def __str__(self) -> str:
return f"{self.present_sing}"
def accord(self, temps: Temps, nombre: Nombre) -> str:
"""Accorde en temps et en nombre (seule la 3è pers. est utilisée)"""
return getattr(self, f"{temps.name.lower()}_{nombre.name.lower()}")
class Adverbe(t.NamedTuple):
"""Packed as named tuple for consistence"""
adv: str
def __str__(self) -> str:
return self.adv
def accord(self) -> str:
"""for consistence"""
return self.adv
class WordDb:
"""Base de donnée de mots, sérialisable"""
_serialize_data: dict[str, t.Type[t.NamedTuple]] = {
"noms": Nom,
"adjectifs": Adjectif,
"verbes": Verbe,
"adverbes": Adverbe,
}
CATEGORY_TO_ATTR: dict = {
Nom: "noms",
Adjectif: "adjectifs",
Verbe: "verbes",
Adverbe: "adverbes",
}
noms: list[Nom]
adjectifs: list[Adjectif]
verbes: list[Verbe]
adverbes: list[Adverbe]
def __init__(
self,
noms: t.Optional[list[Nom]] = None,
adjectifs: t.Optional[list[Adjectif]] = None,
verbes: t.Optional[list[Verbe]] = None,
adverbes: t.Optional[list[Adverbe]] = None,
):
self.noms = noms or []
self.adjectifs = adjectifs or []
self.verbes = verbes or []
self.adverbes = adverbes or []
def serialize(self) -> dict:
"""Serialize to plain dictionary (no classes)"""
return {
attr: [x._asdict() for x in getattr(self, attr)]
for attr in self.__class__._serialize_data
}
def save(self, fd):
"""Serialize to this stream"""
json.dump(self.serialize(), fd)
@classmethod
@t.no_type_check # serialization is messy
def unserialize(cls, data: dict) -> "WordDb":
"""Reverses :serialize:"""
parsed = {}
for attr, attr_cls in cls._serialize_data.items():
parsed[attr] = list(map(attr_cls, data[attr]))
return cls(**parsed)
@classmethod
def load(cls, fd) -> "WordDb":
"""Unserialize from this stream"""
return cls.unserialize(json.load(fd))