Compare commits
No commits in common. "3c10d987e66bf56d1ca4df75135ac6185e9febc4" and "a9c3c904056e1f6ffe00869015ae15f216e9484b" have entirely different histories.
3c10d987e6
...
a9c3c90405
2 changed files with 31 additions and 99 deletions
|
@ -1,72 +1,37 @@
|
||||||
import secrets
|
import secrets
|
||||||
|
|
||||||
from . import word_db
|
from . import lexique
|
||||||
|
|
||||||
wdb = word_db.WordDb.autoload()
|
lex = lexique.Lexique.parse()
|
||||||
|
|
||||||
|
|
||||||
def gen_phrase4():
|
def gen_phrase4():
|
||||||
"""Generates a sentence with four words, of structure Adjective Noun Verb Adverb"""
|
out = []
|
||||||
nombre = word_db.Nombre.pick()
|
out.append(secrets.choice(lex.most_common(lexique.CatGram.ADJECTIF)))
|
||||||
temps = word_db.Temps.pick()
|
out.append(secrets.choice(lex.most_common(lexique.CatGram.NOM)))
|
||||||
|
out.append(secrets.choice(lex.most_common(lexique.CatGram.VERBE)))
|
||||||
adj = secrets.choice(wdb.adjectifs)
|
out.append(secrets.choice(lex.most_common(lexique.CatGram.NOM)))
|
||||||
nom = secrets.choice(wdb.noms)
|
return " ".join(map(lambda x: x.word, out))
|
||||||
verbe = secrets.choice(wdb.verbes)
|
|
||||||
adverbe = secrets.choice(wdb.adverbes)
|
|
||||||
|
|
||||||
return " ".join(
|
|
||||||
[
|
|
||||||
adj.accord(nom.genre_or_pick, nombre),
|
|
||||||
nom.accord(nombre),
|
|
||||||
verbe.accord(temps, nombre),
|
|
||||||
adverbe.accord(),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def gen_phrase6():
|
|
||||||
"""Generates a sentence with six words, of structure Adjective Noun Verb Adjective
|
|
||||||
Noun Adverb"""
|
|
||||||
nombres = [word_db.Nombre.pick() for _ in range(2)]
|
|
||||||
temps = word_db.Temps.pick()
|
|
||||||
|
|
||||||
adj0 = secrets.choice(wdb.adjectifs)
|
|
||||||
nom0 = secrets.choice(wdb.noms)
|
|
||||||
verbe = secrets.choice(wdb.verbes)
|
|
||||||
adj1 = secrets.choice(wdb.adjectifs)
|
|
||||||
nom1 = secrets.choice(wdb.noms)
|
|
||||||
adverbe = secrets.choice(wdb.adverbes)
|
|
||||||
|
|
||||||
return " ".join(
|
|
||||||
[
|
|
||||||
adj0.accord(nom0.genre_or_pick, nombres[0]),
|
|
||||||
nom0.accord(nombres[0]),
|
|
||||||
verbe.accord(temps, nombres[0]),
|
|
||||||
adj1.accord(nom1.genre_or_pick, nombres[1]),
|
|
||||||
nom1.accord(nombres[1]),
|
|
||||||
adverbe.accord(),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def gen_rand(n=4):
|
def gen_rand(n=4):
|
||||||
"""Generates a fully random sequence of n words, without grammatical consistency"""
|
|
||||||
out = []
|
out = []
|
||||||
for _ in range(n):
|
for _ in range(n):
|
||||||
word_cat = secrets.choice(list(wdb.CATEGORY_TO_ATTR))
|
cat = secrets.choice(
|
||||||
if word_cat == word_db.Nom:
|
(
|
||||||
nombre = word_db.Nombre.pick()
|
lexique.CatGram.ADJECTIF,
|
||||||
out.append(secrets.choice(wdb.noms).accord(nombre))
|
lexique.CatGram.NOM,
|
||||||
elif word_cat == word_db.Adjectif:
|
lexique.CatGram.VERBE,
|
||||||
genre = word_db.Genre.pick()
|
lexique.CatGram.ADVERBE,
|
||||||
nombre = word_db.Nombre.pick()
|
)
|
||||||
out.append(secrets.choice(wdb.adjectifs).accord(genre, nombre))
|
)
|
||||||
elif word_cat == word_db.Verbe:
|
out.append(secrets.choice(lex.most_common(cat)))
|
||||||
temps = word_db.Temps.pick()
|
return " ".join(map(lambda x: x.word, out))
|
||||||
nombre = word_db.Nombre.pick()
|
|
||||||
out.append(secrets.choice(wdb.verbes).accord(temps, nombre))
|
|
||||||
elif word_cat == word_db.Adverbe:
|
|
||||||
out.append(secrets.choice(wdb.adverbes).accord())
|
|
||||||
|
|
||||||
return " ".join(out)
|
|
||||||
|
def gen_nom(n=4):
|
||||||
|
out = []
|
||||||
|
for _ in range(n):
|
||||||
|
cat = lexique.CatGram.NOM
|
||||||
|
out.append(secrets.choice(lex.most_common(cat)))
|
||||||
|
return " ".join(map(lambda x: x.word, out))
|
||||||
|
|
|
@ -1,11 +1,8 @@
|
||||||
""" A pre-processed database of words, independant of their source """
|
""" A pre-processed database of words, independant of their source """
|
||||||
|
|
||||||
import gzip
|
|
||||||
import json
|
|
||||||
import secrets
|
|
||||||
import typing as t
|
import typing as t
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pathlib import Path
|
import json
|
||||||
|
|
||||||
|
|
||||||
class Genre(Enum):
|
class Genre(Enum):
|
||||||
|
@ -13,32 +10,17 @@ class Genre(Enum):
|
||||||
FEM = "féminin"
|
FEM = "féminin"
|
||||||
INV = "invariable" # pour les noms uniquement
|
INV = "invariable" # pour les noms uniquement
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def pick(cls) -> "Genre":
|
|
||||||
"""random-pick (avoids inv)"""
|
|
||||||
return secrets.choice([cls.masc, cls.fem])
|
|
||||||
|
|
||||||
|
|
||||||
class Nombre(Enum):
|
class Nombre(Enum):
|
||||||
SING = "singulier"
|
SING = "singulier"
|
||||||
PLUR = "pluriel"
|
PLUR = "pluriel"
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def pick(cls) -> "Nombre":
|
|
||||||
"""random-pick"""
|
|
||||||
return secrets.choice(list(cls))
|
|
||||||
|
|
||||||
|
|
||||||
class Temps(Enum):
|
class Temps(Enum):
|
||||||
PRESENT = "present"
|
PRESENT = "present"
|
||||||
FUTUR = "futur"
|
FUTUR = "futur"
|
||||||
IMPARFAIT = "imparfait"
|
IMPARFAIT = "imparfait"
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def pick(cls) -> "Temps":
|
|
||||||
"""random-pick"""
|
|
||||||
return secrets.choice(list(cls))
|
|
||||||
|
|
||||||
|
|
||||||
class Nom(t.NamedTuple):
|
class Nom(t.NamedTuple):
|
||||||
"""Nom commun"""
|
"""Nom commun"""
|
||||||
|
@ -54,20 +36,13 @@ class Nom(t.NamedTuple):
|
||||||
"""Accorde en nombre"""
|
"""Accorde en nombre"""
|
||||||
return getattr(self, nombre.name.lower())
|
return getattr(self, nombre.name.lower())
|
||||||
|
|
||||||
@property
|
|
||||||
def genre_or_pick(self) -> Genre:
|
|
||||||
"""Genre of the noun, or random-pick if invariable"""
|
|
||||||
if self.genre == Genre.INV:
|
|
||||||
return Genre.pick()
|
|
||||||
return self.genre
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def serialized(self):
|
def serialized(self):
|
||||||
return {"genre": self.genre.name, "sing": self.sing, "plur": self.plur}
|
return {"genre": self.genre.name, "sing": self.sing, "plur": self.plur}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def unserialized(cls, kwargs):
|
def unserialized(cls, **kwargs):
|
||||||
genre = Genre[kwargs.pop("genre")]
|
genre = Genre(kwargs.pop("genre"))
|
||||||
return cls(**kwargs, genre=genre)
|
return cls(**kwargs, genre=genre)
|
||||||
|
|
||||||
|
|
||||||
|
@ -89,7 +64,7 @@ class Adjectif(t.NamedTuple):
|
||||||
return self._asdict()
|
return self._asdict()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def unserialized(cls, kwargs):
|
def unserialized(cls, **kwargs):
|
||||||
return cls(**kwargs)
|
return cls(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@ -113,7 +88,7 @@ class Verbe(t.NamedTuple):
|
||||||
return self._asdict()
|
return self._asdict()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def unserialized(cls, kwargs):
|
def unserialized(cls, **kwargs):
|
||||||
return cls(**kwargs)
|
return cls(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
@ -134,15 +109,13 @@ class Adverbe(t.NamedTuple):
|
||||||
return self._asdict()
|
return self._asdict()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def unserialized(cls, kwargs):
|
def unserialized(cls, **kwargs):
|
||||||
return cls(**kwargs)
|
return cls(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
class WordDb:
|
class WordDb:
|
||||||
"""Base de donnée de mots, sérialisable"""
|
"""Base de donnée de mots, sérialisable"""
|
||||||
|
|
||||||
SERIALIZED_GZ_LOCATION = Path(__file__).parent.parent / "morphalou_full.json.gz"
|
|
||||||
|
|
||||||
_serialize_data: dict[str, t.Type[t.NamedTuple]] = {
|
_serialize_data: dict[str, t.Type[t.NamedTuple]] = {
|
||||||
"noms": Nom,
|
"noms": Nom,
|
||||||
"adjectifs": Adjectif,
|
"adjectifs": Adjectif,
|
||||||
|
@ -198,9 +171,3 @@ class WordDb:
|
||||||
def load(cls, fd) -> "WordDb":
|
def load(cls, fd) -> "WordDb":
|
||||||
"""Unserialize from this stream"""
|
"""Unserialize from this stream"""
|
||||||
return cls.unserialize(json.load(fd))
|
return cls.unserialize(json.load(fd))
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def autoload(cls) -> "WordDb":
|
|
||||||
"""Unserialize from default source"""
|
|
||||||
with gzip.open(cls.SERIALIZED_GZ_LOCATION) as h:
|
|
||||||
return cls.load(h)
|
|
||||||
|
|
Loading…
Reference in a new issue