Compare commits

..

No commits in common. "3c10d987e66bf56d1ca4df75135ac6185e9febc4" and "a9c3c904056e1f6ffe00869015ae15f216e9484b" have entirely different histories.

2 changed files with 31 additions and 99 deletions

View file

@ -1,72 +1,37 @@
import secrets import secrets
from . import word_db from . import lexique
wdb = word_db.WordDb.autoload() lex = lexique.Lexique.parse()
def gen_phrase4(): def gen_phrase4():
"""Generates a sentence with four words, of structure Adjective Noun Verb Adverb""" out = []
nombre = word_db.Nombre.pick() out.append(secrets.choice(lex.most_common(lexique.CatGram.ADJECTIF)))
temps = word_db.Temps.pick() out.append(secrets.choice(lex.most_common(lexique.CatGram.NOM)))
out.append(secrets.choice(lex.most_common(lexique.CatGram.VERBE)))
adj = secrets.choice(wdb.adjectifs) out.append(secrets.choice(lex.most_common(lexique.CatGram.NOM)))
nom = secrets.choice(wdb.noms) return " ".join(map(lambda x: x.word, out))
verbe = secrets.choice(wdb.verbes)
adverbe = secrets.choice(wdb.adverbes)
return " ".join(
[
adj.accord(nom.genre_or_pick, nombre),
nom.accord(nombre),
verbe.accord(temps, nombre),
adverbe.accord(),
]
)
def gen_phrase6():
"""Generates a sentence with six words, of structure Adjective Noun Verb Adjective
Noun Adverb"""
nombres = [word_db.Nombre.pick() for _ in range(2)]
temps = word_db.Temps.pick()
adj0 = secrets.choice(wdb.adjectifs)
nom0 = secrets.choice(wdb.noms)
verbe = secrets.choice(wdb.verbes)
adj1 = secrets.choice(wdb.adjectifs)
nom1 = secrets.choice(wdb.noms)
adverbe = secrets.choice(wdb.adverbes)
return " ".join(
[
adj0.accord(nom0.genre_or_pick, nombres[0]),
nom0.accord(nombres[0]),
verbe.accord(temps, nombres[0]),
adj1.accord(nom1.genre_or_pick, nombres[1]),
nom1.accord(nombres[1]),
adverbe.accord(),
]
)
def gen_rand(n=4): def gen_rand(n=4):
"""Generates a fully random sequence of n words, without grammatical consistency"""
out = [] out = []
for _ in range(n): for _ in range(n):
word_cat = secrets.choice(list(wdb.CATEGORY_TO_ATTR)) cat = secrets.choice(
if word_cat == word_db.Nom: (
nombre = word_db.Nombre.pick() lexique.CatGram.ADJECTIF,
out.append(secrets.choice(wdb.noms).accord(nombre)) lexique.CatGram.NOM,
elif word_cat == word_db.Adjectif: lexique.CatGram.VERBE,
genre = word_db.Genre.pick() lexique.CatGram.ADVERBE,
nombre = word_db.Nombre.pick() )
out.append(secrets.choice(wdb.adjectifs).accord(genre, nombre)) )
elif word_cat == word_db.Verbe: out.append(secrets.choice(lex.most_common(cat)))
temps = word_db.Temps.pick() return " ".join(map(lambda x: x.word, out))
nombre = word_db.Nombre.pick()
out.append(secrets.choice(wdb.verbes).accord(temps, nombre))
elif word_cat == word_db.Adverbe:
out.append(secrets.choice(wdb.adverbes).accord())
return " ".join(out)
def gen_nom(n=4):
out = []
for _ in range(n):
cat = lexique.CatGram.NOM
out.append(secrets.choice(lex.most_common(cat)))
return " ".join(map(lambda x: x.word, out))

View file

@ -1,11 +1,8 @@
""" A pre-processed database of words, independant of their source """ """ A pre-processed database of words, independant of their source """
import gzip
import json
import secrets
import typing as t import typing as t
from enum import Enum from enum import Enum
from pathlib import Path import json
class Genre(Enum): class Genre(Enum):
@ -13,32 +10,17 @@ class Genre(Enum):
FEM = "féminin" FEM = "féminin"
INV = "invariable" # pour les noms uniquement INV = "invariable" # pour les noms uniquement
@classmethod
def pick(cls) -> "Genre":
"""random-pick (avoids inv)"""
return secrets.choice([cls.masc, cls.fem])
class Nombre(Enum): class Nombre(Enum):
SING = "singulier" SING = "singulier"
PLUR = "pluriel" PLUR = "pluriel"
@classmethod
def pick(cls) -> "Nombre":
"""random-pick"""
return secrets.choice(list(cls))
class Temps(Enum): class Temps(Enum):
PRESENT = "present" PRESENT = "present"
FUTUR = "futur" FUTUR = "futur"
IMPARFAIT = "imparfait" IMPARFAIT = "imparfait"
@classmethod
def pick(cls) -> "Temps":
"""random-pick"""
return secrets.choice(list(cls))
class Nom(t.NamedTuple): class Nom(t.NamedTuple):
"""Nom commun""" """Nom commun"""
@ -54,20 +36,13 @@ class Nom(t.NamedTuple):
"""Accorde en nombre""" """Accorde en nombre"""
return getattr(self, nombre.name.lower()) return getattr(self, nombre.name.lower())
@property
def genre_or_pick(self) -> Genre:
"""Genre of the noun, or random-pick if invariable"""
if self.genre == Genre.INV:
return Genre.pick()
return self.genre
@property @property
def serialized(self): def serialized(self):
return {"genre": self.genre.name, "sing": self.sing, "plur": self.plur} return {"genre": self.genre.name, "sing": self.sing, "plur": self.plur}
@classmethod @classmethod
def unserialized(cls, kwargs): def unserialized(cls, **kwargs):
genre = Genre[kwargs.pop("genre")] genre = Genre(kwargs.pop("genre"))
return cls(**kwargs, genre=genre) return cls(**kwargs, genre=genre)
@ -89,7 +64,7 @@ class Adjectif(t.NamedTuple):
return self._asdict() return self._asdict()
@classmethod @classmethod
def unserialized(cls, kwargs): def unserialized(cls, **kwargs):
return cls(**kwargs) return cls(**kwargs)
@ -113,7 +88,7 @@ class Verbe(t.NamedTuple):
return self._asdict() return self._asdict()
@classmethod @classmethod
def unserialized(cls, kwargs): def unserialized(cls, **kwargs):
return cls(**kwargs) return cls(**kwargs)
@ -134,15 +109,13 @@ class Adverbe(t.NamedTuple):
return self._asdict() return self._asdict()
@classmethod @classmethod
def unserialized(cls, kwargs): def unserialized(cls, **kwargs):
return cls(**kwargs) return cls(**kwargs)
class WordDb: class WordDb:
"""Base de donnée de mots, sérialisable""" """Base de donnée de mots, sérialisable"""
SERIALIZED_GZ_LOCATION = Path(__file__).parent.parent / "morphalou_full.json.gz"
_serialize_data: dict[str, t.Type[t.NamedTuple]] = { _serialize_data: dict[str, t.Type[t.NamedTuple]] = {
"noms": Nom, "noms": Nom,
"adjectifs": Adjectif, "adjectifs": Adjectif,
@ -198,9 +171,3 @@ class WordDb:
def load(cls, fd) -> "WordDb": def load(cls, fd) -> "WordDb":
"""Unserialize from this stream""" """Unserialize from this stream"""
return cls.unserialize(json.load(fd)) return cls.unserialize(json.load(fd))
@classmethod
def autoload(cls) -> "WordDb":
"""Unserialize from default source"""
with gzip.open(cls.SERIALIZED_GZ_LOCATION) as h:
return cls.load(h)