Commit 2ecfa8d3 authored by Patrick Schlindwein's avatar Patrick Schlindwein
Browse files

Merge branch 'feat/#28-Überarbeitung_ID-Generierung' into 'master'

#28 überarbeitung id generierung

See merge request !53
parents bcca6a60 f53e017d
Pipeline #72933 passed with stages
in 2 minutes and 38 seconds
......@@ -8,3 +8,9 @@ class TestIntentHandler(TestCase):
" über das Virus informieren?")
id = obj.generate_intent_id(3)
assert id == "buergerinnen_virus_informieren"
def test_normalize(self):
obj = generator.IntentHandler("Wo können sich hessische Bürger*Innen"
" über das Virus informieren?")
verb = obj.normalize("Ich bin test.")
assert verb[1] == "sein"
......@@ -10,6 +10,7 @@ class IntentHandler:
def __init__(self, intent: str):
self.intent = intent
self.nlp = spacy.load("de_core_news_md")
self.doc = IntentHandler.nlp(intent)
def generate_intent_id(self, max_tokens: int):
......@@ -44,6 +45,13 @@ class IntentHandler:
text = re.sub('[^a-zA-Z0-9]+', '', text)
return text
def normalize(self, text):
doc = self.nlp(text)
return_list = []
for token in doc:
return_list.append((token.lemma_).replace("ß", "ss"))
return return_list
# def removeStopWords(tokens: []):
# filtered = []
# nlp = German()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment