Compare commits
3 commits
a9c3c90405
...
3c10d987e6
Author | SHA1 | Date | |
---|---|---|---|
3c10d987e6 | |||
62bb8076e9 | |||
695813d35f |
2 changed files with 99 additions and 31 deletions
|
@ -1,37 +1,72 @@
|
||||||
import secrets
|
import secrets
|
||||||
|
|
||||||
from . import lexique
|
from . import word_db
|
||||||
|
|
||||||
lex = lexique.Lexique.parse()
|
wdb = word_db.WordDb.autoload()
|
||||||
|
|
||||||
|
|
||||||
def gen_phrase4():
|
def gen_phrase4():
|
||||||
out = []
|
"""Generates a sentence with four words, of structure Adjective Noun Verb Adverb"""
|
||||||
out.append(secrets.choice(lex.most_common(lexique.CatGram.ADJECTIF)))
|
nombre = word_db.Nombre.pick()
|
||||||
out.append(secrets.choice(lex.most_common(lexique.CatGram.NOM)))
|
temps = word_db.Temps.pick()
|
||||||
out.append(secrets.choice(lex.most_common(lexique.CatGram.VERBE)))
|
|
||||||
out.append(secrets.choice(lex.most_common(lexique.CatGram.NOM)))
|
adj = secrets.choice(wdb.adjectifs)
|
||||||
return " ".join(map(lambda x: x.word, out))
|
nom = secrets.choice(wdb.noms)
|
||||||
|
verbe = secrets.choice(wdb.verbes)
|
||||||
|
adverbe = secrets.choice(wdb.adverbes)
|
||||||
|
|
||||||
|
return " ".join(
|
||||||
|
[
|
||||||
|
adj.accord(nom.genre_or_pick, nombre),
|
||||||
|
nom.accord(nombre),
|
||||||
|
verbe.accord(temps, nombre),
|
||||||
|
adverbe.accord(),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def gen_phrase6():
|
||||||
|
"""Generates a sentence with six words, of structure Adjective Noun Verb Adjective
|
||||||
|
Noun Adverb"""
|
||||||
|
nombres = [word_db.Nombre.pick() for _ in range(2)]
|
||||||
|
temps = word_db.Temps.pick()
|
||||||
|
|
||||||
|
adj0 = secrets.choice(wdb.adjectifs)
|
||||||
|
nom0 = secrets.choice(wdb.noms)
|
||||||
|
verbe = secrets.choice(wdb.verbes)
|
||||||
|
adj1 = secrets.choice(wdb.adjectifs)
|
||||||
|
nom1 = secrets.choice(wdb.noms)
|
||||||
|
adverbe = secrets.choice(wdb.adverbes)
|
||||||
|
|
||||||
|
return " ".join(
|
||||||
|
[
|
||||||
|
adj0.accord(nom0.genre_or_pick, nombres[0]),
|
||||||
|
nom0.accord(nombres[0]),
|
||||||
|
verbe.accord(temps, nombres[0]),
|
||||||
|
adj1.accord(nom1.genre_or_pick, nombres[1]),
|
||||||
|
nom1.accord(nombres[1]),
|
||||||
|
adverbe.accord(),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def gen_rand(n=4):
|
def gen_rand(n=4):
|
||||||
|
"""Generates a fully random sequence of n words, without grammatical consistency"""
|
||||||
out = []
|
out = []
|
||||||
for _ in range(n):
|
for _ in range(n):
|
||||||
cat = secrets.choice(
|
word_cat = secrets.choice(list(wdb.CATEGORY_TO_ATTR))
|
||||||
(
|
if word_cat == word_db.Nom:
|
||||||
lexique.CatGram.ADJECTIF,
|
nombre = word_db.Nombre.pick()
|
||||||
lexique.CatGram.NOM,
|
out.append(secrets.choice(wdb.noms).accord(nombre))
|
||||||
lexique.CatGram.VERBE,
|
elif word_cat == word_db.Adjectif:
|
||||||
lexique.CatGram.ADVERBE,
|
genre = word_db.Genre.pick()
|
||||||
)
|
nombre = word_db.Nombre.pick()
|
||||||
)
|
out.append(secrets.choice(wdb.adjectifs).accord(genre, nombre))
|
||||||
out.append(secrets.choice(lex.most_common(cat)))
|
elif word_cat == word_db.Verbe:
|
||||||
return " ".join(map(lambda x: x.word, out))
|
temps = word_db.Temps.pick()
|
||||||
|
nombre = word_db.Nombre.pick()
|
||||||
|
out.append(secrets.choice(wdb.verbes).accord(temps, nombre))
|
||||||
|
elif word_cat == word_db.Adverbe:
|
||||||
|
out.append(secrets.choice(wdb.adverbes).accord())
|
||||||
|
|
||||||
|
return " ".join(out)
|
||||||
def gen_nom(n=4):
|
|
||||||
out = []
|
|
||||||
for _ in range(n):
|
|
||||||
cat = lexique.CatGram.NOM
|
|
||||||
out.append(secrets.choice(lex.most_common(cat)))
|
|
||||||
return " ".join(map(lambda x: x.word, out))
|
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
""" A pre-processed database of words, independant of their source """
|
""" A pre-processed database of words, independant of their source """
|
||||||
|
|
||||||
|
import gzip
|
||||||
|
import json
|
||||||
|
import secrets
|
||||||
import typing as t
|
import typing as t
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import json
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
class Genre(Enum):
|
class Genre(Enum):
|
||||||
|
@ -10,17 +13,32 @@ class Genre(Enum):
|
||||||
FEM = "féminin"
|
FEM = "féminin"
|
||||||
INV = "invariable" # pour les noms uniquement
|
INV = "invariable" # pour les noms uniquement
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def pick(cls) -> "Genre":
|
||||||
|
"""random-pick (avoids inv)"""
|
||||||
|
return secrets.choice([cls.masc, cls.fem])
|
||||||
|
|
||||||
|
|
||||||
class Nombre(Enum):
|
class Nombre(Enum):
|
||||||
SING = "singulier"
|
SING = "singulier"
|
||||||
PLUR = "pluriel"
|
PLUR = "pluriel"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def pick(cls) -> "Nombre":
|
||||||
|
"""random-pick"""
|
||||||
|
return secrets.choice(list(cls))
|
||||||
|
|
||||||
|
|
||||||
class Temps(Enum):
|
class Temps(Enum):
|
||||||
PRESENT = "present"
|
PRESENT = "present"
|
||||||
FUTUR = "futur"
|
FUTUR = "futur"
|
||||||
IMPARFAIT = "imparfait"
|
IMPARFAIT = "imparfait"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def pick(cls) -> "Temps":
|
||||||
|
"""random-pick"""
|
||||||
|
return secrets.choice(list(cls))
|
||||||
|
|
||||||
|
|
||||||
class Nom(t.NamedTuple):
|
class Nom(t.NamedTuple):
|
||||||
"""Nom commun"""
|
"""Nom commun"""
|
||||||
|
@ -36,13 +54,20 @@ class Nom(t.NamedTuple):
|
||||||
"""Accorde en nombre"""
|
"""Accorde en nombre"""
|
||||||
return getattr(self, nombre.name.lower())
|
return getattr(self, nombre.name.lower())
|
||||||
|
|
||||||
|
@property
|
||||||
|
def genre_or_pick(self) -> Genre:
|
||||||
|
"""Genre of the noun, or random-pick if invariable"""
|
||||||
|
if self.genre == Genre.INV:
|
||||||
|
return Genre.pick()
|
||||||
|
return self.genre
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def serialized(self):
|
def serialized(self):
|
||||||
return {"genre": self.genre.name, "sing": self.sing, "plur": self.plur}
|
return {"genre": self.genre.name, "sing": self.sing, "plur": self.plur}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def unserialized(cls, **kwargs):
|
def unserialized(cls, kwargs):
|
||||||
genre = Genre(kwargs.pop("genre"))
|
genre = Genre[kwargs.pop("genre")]
|
||||||
return cls(**kwargs, genre=genre)
|
return cls(**kwargs, genre=genre)
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,7 +89,7 @@ class Adjectif(t.NamedTuple):
|
||||||
return self._asdict()
|
return self._asdict()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def unserialized(cls, **kwargs):
|
def unserialized(cls, kwargs):
|
||||||
return cls(**kwargs)
|
return cls(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,7 +113,7 @@ class Verbe(t.NamedTuple):
|
||||||
return self._asdict()
|
return self._asdict()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def unserialized(cls, **kwargs):
|
def unserialized(cls, kwargs):
|
||||||
return cls(**kwargs)
|
return cls(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@ -109,13 +134,15 @@ class Adverbe(t.NamedTuple):
|
||||||
return self._asdict()
|
return self._asdict()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def unserialized(cls, **kwargs):
|
def unserialized(cls, kwargs):
|
||||||
return cls(**kwargs)
|
return cls(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
class WordDb:
|
class WordDb:
|
||||||
"""Base de donnée de mots, sérialisable"""
|
"""Base de donnée de mots, sérialisable"""
|
||||||
|
|
||||||
|
SERIALIZED_GZ_LOCATION = Path(__file__).parent.parent / "morphalou_full.json.gz"
|
||||||
|
|
||||||
_serialize_data: dict[str, t.Type[t.NamedTuple]] = {
|
_serialize_data: dict[str, t.Type[t.NamedTuple]] = {
|
||||||
"noms": Nom,
|
"noms": Nom,
|
||||||
"adjectifs": Adjectif,
|
"adjectifs": Adjectif,
|
||||||
|
@ -171,3 +198,9 @@ class WordDb:
|
||||||
def load(cls, fd) -> "WordDb":
|
def load(cls, fd) -> "WordDb":
|
||||||
"""Unserialize from this stream"""
|
"""Unserialize from this stream"""
|
||||||
return cls.unserialize(json.load(fd))
|
return cls.unserialize(json.load(fd))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def autoload(cls) -> "WordDb":
|
||||||
|
"""Unserialize from default source"""
|
||||||
|
with gzip.open(cls.SERIALIZED_GZ_LOCATION) as h:
|
||||||
|
return cls.load(h)
|
||||||
|
|
Loading…
Reference in a new issue