pwgen-fr/pwgen_fr/word_db.py

184 lines
4.3 KiB
Python
Raw Normal View History

2024-09-10 00:30:03 +02:00
""" A pre-processed database of words, independant of their source """
2024-09-16 22:41:45 +02:00
import gzip
import json
2024-09-10 00:30:03 +02:00
import typing as t
from enum import Enum
2024-09-16 22:41:45 +02:00
from pathlib import Path
2024-09-10 00:30:03 +02:00
class Genre(Enum):
MASC = "masculin"
FEM = "féminin"
2024-09-14 01:06:51 +02:00
INV = "invariable" # pour les noms uniquement
2024-09-10 00:30:03 +02:00
class Nombre(Enum):
SING = "singulier"
PLUR = "pluriel"
class Temps(Enum):
PRESENT = "present"
FUTUR = "futur"
IMPARFAIT = "imparfait"
class Nom(t.NamedTuple):
"""Nom commun"""
genre: Genre
sing: str
plur: str
def __str__(self) -> str:
return f"{self.sing}"
def accord(self, nombre: Nombre) -> str:
"""Accorde en nombre"""
return getattr(self, nombre.name.lower())
2024-09-16 18:40:49 +02:00
@property
def serialized(self):
return {"genre": self.genre.name, "sing": self.sing, "plur": self.plur}
@classmethod
2024-09-16 22:41:45 +02:00
def unserialized(cls, kwargs):
genre = Genre[kwargs.pop("genre")]
2024-09-16 18:40:49 +02:00
return cls(**kwargs, genre=genre)
2024-09-10 00:30:03 +02:00
class Adjectif(t.NamedTuple):
masc_sing: str
masc_plur: str
fem_sing: str
fem_plur: str
def __str__(self) -> str:
return f"{self.masc_sing}/{self.fem_sing}"
def accord(self, genre: Genre, nombre: Nombre) -> str:
"""Accorde en genre et en nombre"""
return getattr(self, f"{genre.name.lower()}_{nombre.name.lower()}")
2024-09-16 18:40:49 +02:00
@property
def serialized(self):
return self._asdict()
@classmethod
2024-09-16 22:41:45 +02:00
def unserialized(cls, kwargs):
2024-09-16 18:40:49 +02:00
return cls(**kwargs)
2024-09-10 00:30:03 +02:00
class Verbe(t.NamedTuple):
present_sing: str
present_plur: str
futur_sing: str
futur_plur: str
imparfait_sing: str
imparfait_plur: str
def __str__(self) -> str:
return f"{self.present_sing}"
def accord(self, temps: Temps, nombre: Nombre) -> str:
"""Accorde en temps et en nombre (seule la 3è pers. est utilisée)"""
return getattr(self, f"{temps.name.lower()}_{nombre.name.lower()}")
2024-09-16 18:40:49 +02:00
@property
def serialized(self):
return self._asdict()
@classmethod
2024-09-16 22:41:45 +02:00
def unserialized(cls, kwargs):
2024-09-16 18:40:49 +02:00
return cls(**kwargs)
2024-09-10 00:30:03 +02:00
class Adverbe(t.NamedTuple):
"""Packed as named tuple for consistence"""
adv: str
def __str__(self) -> str:
return self.adv
def accord(self) -> str:
"""for consistence"""
return self.adv
2024-09-16 18:40:49 +02:00
@property
def serialized(self):
return self._asdict()
@classmethod
2024-09-16 22:41:45 +02:00
def unserialized(cls, kwargs):
2024-09-16 18:40:49 +02:00
return cls(**kwargs)
2024-09-10 00:30:03 +02:00
class WordDb:
"""Base de donnée de mots, sérialisable"""
2024-09-16 22:41:45 +02:00
SERIALIZED_GZ_LOCATION = Path(__file__).parent.parent / "morphalou_full.json.gz"
2024-09-10 00:30:03 +02:00
_serialize_data: dict[str, t.Type[t.NamedTuple]] = {
"noms": Nom,
"adjectifs": Adjectif,
"verbes": Verbe,
"adverbes": Adverbe,
}
2024-09-14 01:06:51 +02:00
CATEGORY_TO_ATTR: dict = {
Nom: "noms",
Adjectif: "adjectifs",
Verbe: "verbes",
Adverbe: "adverbes",
}
2024-09-10 00:30:03 +02:00
noms: list[Nom]
adjectifs: list[Adjectif]
verbes: list[Verbe]
adverbes: list[Adverbe]
def __init__(
self,
noms: t.Optional[list[Nom]] = None,
adjectifs: t.Optional[list[Adjectif]] = None,
verbes: t.Optional[list[Verbe]] = None,
adverbes: t.Optional[list[Adverbe]] = None,
):
self.noms = noms or []
self.adjectifs = adjectifs or []
self.verbes = verbes or []
self.adverbes = adverbes or []
def serialize(self) -> dict:
"""Serialize to plain dictionary (no classes)"""
return {
2024-09-16 18:40:49 +02:00
attr: [x.serialized for x in getattr(self, attr)]
2024-09-10 00:30:03 +02:00
for attr in self.__class__._serialize_data
}
def save(self, fd):
"""Serialize to this stream"""
json.dump(self.serialize(), fd)
@classmethod
@t.no_type_check # serialization is messy
def unserialize(cls, data: dict) -> "WordDb":
"""Reverses :serialize:"""
parsed = {}
for attr, attr_cls in cls._serialize_data.items():
2024-09-16 18:40:49 +02:00
parsed[attr] = list(map(attr_cls.unserialized, data[attr]))
2024-09-10 00:30:03 +02:00
return cls(**parsed)
@classmethod
def load(cls, fd) -> "WordDb":
"""Unserialize from this stream"""
return cls.unserialize(json.load(fd))
2024-09-16 22:41:45 +02:00
@classmethod
def autoload(cls) -> "WordDb":
"""Unserialize from default source"""
with gzip.open(cls.SERIALIZED_GZ_LOCATION) as h:
return cls.load(h)