2024-09-10 00:30:03 +02:00
|
|
|
""" A pre-processed database of words, independant of their source """
|
|
|
|
|
2024-09-16 22:41:45 +02:00
|
|
|
import gzip
|
|
|
|
import json
|
2024-09-10 00:30:03 +02:00
|
|
|
import typing as t
|
|
|
|
from enum import Enum
|
2024-09-16 22:41:45 +02:00
|
|
|
from pathlib import Path
|
2024-09-10 00:30:03 +02:00
|
|
|
|
|
|
|
|
|
|
|
class Genre(Enum):
|
|
|
|
MASC = "masculin"
|
|
|
|
FEM = "féminin"
|
2024-09-14 01:06:51 +02:00
|
|
|
INV = "invariable" # pour les noms uniquement
|
2024-09-10 00:30:03 +02:00
|
|
|
|
|
|
|
|
|
|
|
class Nombre(Enum):
|
|
|
|
SING = "singulier"
|
|
|
|
PLUR = "pluriel"
|
|
|
|
|
|
|
|
|
|
|
|
class Temps(Enum):
|
|
|
|
PRESENT = "present"
|
|
|
|
FUTUR = "futur"
|
|
|
|
IMPARFAIT = "imparfait"
|
|
|
|
|
|
|
|
|
|
|
|
class Nom(t.NamedTuple):
|
|
|
|
"""Nom commun"""
|
|
|
|
|
|
|
|
genre: Genre
|
|
|
|
sing: str
|
|
|
|
plur: str
|
|
|
|
|
|
|
|
def __str__(self) -> str:
|
|
|
|
return f"{self.sing}"
|
|
|
|
|
|
|
|
def accord(self, nombre: Nombre) -> str:
|
|
|
|
"""Accorde en nombre"""
|
|
|
|
return getattr(self, nombre.name.lower())
|
|
|
|
|
2024-09-16 18:40:49 +02:00
|
|
|
@property
|
|
|
|
def serialized(self):
|
|
|
|
return {"genre": self.genre.name, "sing": self.sing, "plur": self.plur}
|
|
|
|
|
|
|
|
@classmethod
|
2024-09-16 22:41:45 +02:00
|
|
|
def unserialized(cls, kwargs):
|
|
|
|
genre = Genre[kwargs.pop("genre")]
|
2024-09-16 18:40:49 +02:00
|
|
|
return cls(**kwargs, genre=genre)
|
|
|
|
|
2024-09-10 00:30:03 +02:00
|
|
|
|
|
|
|
class Adjectif(t.NamedTuple):
|
|
|
|
masc_sing: str
|
|
|
|
masc_plur: str
|
|
|
|
fem_sing: str
|
|
|
|
fem_plur: str
|
|
|
|
|
|
|
|
def __str__(self) -> str:
|
|
|
|
return f"{self.masc_sing}/{self.fem_sing}"
|
|
|
|
|
|
|
|
def accord(self, genre: Genre, nombre: Nombre) -> str:
|
|
|
|
"""Accorde en genre et en nombre"""
|
|
|
|
return getattr(self, f"{genre.name.lower()}_{nombre.name.lower()}")
|
|
|
|
|
2024-09-16 18:40:49 +02:00
|
|
|
@property
|
|
|
|
def serialized(self):
|
|
|
|
return self._asdict()
|
|
|
|
|
|
|
|
@classmethod
|
2024-09-16 22:41:45 +02:00
|
|
|
def unserialized(cls, kwargs):
|
2024-09-16 18:40:49 +02:00
|
|
|
return cls(**kwargs)
|
|
|
|
|
2024-09-10 00:30:03 +02:00
|
|
|
|
|
|
|
class Verbe(t.NamedTuple):
|
|
|
|
present_sing: str
|
|
|
|
present_plur: str
|
|
|
|
futur_sing: str
|
|
|
|
futur_plur: str
|
|
|
|
imparfait_sing: str
|
|
|
|
imparfait_plur: str
|
|
|
|
|
|
|
|
def __str__(self) -> str:
|
|
|
|
return f"{self.present_sing}"
|
|
|
|
|
|
|
|
def accord(self, temps: Temps, nombre: Nombre) -> str:
|
|
|
|
"""Accorde en temps et en nombre (seule la 3è pers. est utilisée)"""
|
|
|
|
return getattr(self, f"{temps.name.lower()}_{nombre.name.lower()}")
|
|
|
|
|
2024-09-16 18:40:49 +02:00
|
|
|
@property
|
|
|
|
def serialized(self):
|
|
|
|
return self._asdict()
|
|
|
|
|
|
|
|
@classmethod
|
2024-09-16 22:41:45 +02:00
|
|
|
def unserialized(cls, kwargs):
|
2024-09-16 18:40:49 +02:00
|
|
|
return cls(**kwargs)
|
|
|
|
|
2024-09-10 00:30:03 +02:00
|
|
|
|
|
|
|
class Adverbe(t.NamedTuple):
|
|
|
|
"""Packed as named tuple for consistence"""
|
|
|
|
|
|
|
|
adv: str
|
|
|
|
|
|
|
|
def __str__(self) -> str:
|
|
|
|
return self.adv
|
|
|
|
|
|
|
|
def accord(self) -> str:
|
|
|
|
"""for consistence"""
|
|
|
|
return self.adv
|
|
|
|
|
2024-09-16 18:40:49 +02:00
|
|
|
@property
|
|
|
|
def serialized(self):
|
|
|
|
return self._asdict()
|
|
|
|
|
|
|
|
@classmethod
|
2024-09-16 22:41:45 +02:00
|
|
|
def unserialized(cls, kwargs):
|
2024-09-16 18:40:49 +02:00
|
|
|
return cls(**kwargs)
|
|
|
|
|
2024-09-10 00:30:03 +02:00
|
|
|
|
|
|
|
class WordDb:
|
|
|
|
"""Base de donnée de mots, sérialisable"""
|
|
|
|
|
2024-09-16 22:41:45 +02:00
|
|
|
SERIALIZED_GZ_LOCATION = Path(__file__).parent.parent / "morphalou_full.json.gz"
|
|
|
|
|
2024-09-10 00:30:03 +02:00
|
|
|
_serialize_data: dict[str, t.Type[t.NamedTuple]] = {
|
|
|
|
"noms": Nom,
|
|
|
|
"adjectifs": Adjectif,
|
|
|
|
"verbes": Verbe,
|
|
|
|
"adverbes": Adverbe,
|
|
|
|
}
|
|
|
|
|
2024-09-14 01:06:51 +02:00
|
|
|
CATEGORY_TO_ATTR: dict = {
|
|
|
|
Nom: "noms",
|
|
|
|
Adjectif: "adjectifs",
|
|
|
|
Verbe: "verbes",
|
|
|
|
Adverbe: "adverbes",
|
|
|
|
}
|
|
|
|
|
2024-09-10 00:30:03 +02:00
|
|
|
noms: list[Nom]
|
|
|
|
adjectifs: list[Adjectif]
|
|
|
|
verbes: list[Verbe]
|
|
|
|
adverbes: list[Adverbe]
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
noms: t.Optional[list[Nom]] = None,
|
|
|
|
adjectifs: t.Optional[list[Adjectif]] = None,
|
|
|
|
verbes: t.Optional[list[Verbe]] = None,
|
|
|
|
adverbes: t.Optional[list[Adverbe]] = None,
|
|
|
|
):
|
|
|
|
self.noms = noms or []
|
|
|
|
self.adjectifs = adjectifs or []
|
|
|
|
self.verbes = verbes or []
|
|
|
|
self.adverbes = adverbes or []
|
|
|
|
|
|
|
|
def serialize(self) -> dict:
|
|
|
|
"""Serialize to plain dictionary (no classes)"""
|
|
|
|
return {
|
2024-09-16 18:40:49 +02:00
|
|
|
attr: [x.serialized for x in getattr(self, attr)]
|
2024-09-10 00:30:03 +02:00
|
|
|
for attr in self.__class__._serialize_data
|
|
|
|
}
|
|
|
|
|
|
|
|
def save(self, fd):
|
|
|
|
"""Serialize to this stream"""
|
|
|
|
json.dump(self.serialize(), fd)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
@t.no_type_check # serialization is messy
|
|
|
|
def unserialize(cls, data: dict) -> "WordDb":
|
|
|
|
"""Reverses :serialize:"""
|
|
|
|
parsed = {}
|
|
|
|
for attr, attr_cls in cls._serialize_data.items():
|
2024-09-16 18:40:49 +02:00
|
|
|
parsed[attr] = list(map(attr_cls.unserialized, data[attr]))
|
2024-09-10 00:30:03 +02:00
|
|
|
return cls(**parsed)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def load(cls, fd) -> "WordDb":
|
|
|
|
"""Unserialize from this stream"""
|
|
|
|
return cls.unserialize(json.load(fd))
|
2024-09-16 22:41:45 +02:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def autoload(cls) -> "WordDb":
|
|
|
|
"""Unserialize from default source"""
|
|
|
|
with gzip.open(cls.SERIALIZED_GZ_LOCATION) as h:
|
|
|
|
return cls.load(h)
|