Commit f53e017d authored by Ala Rouis's avatar Ala Rouis Committed by Patrick Schlindwein
Browse files

#28 überarbeitung id generierung

parent bcca6a60
......@@ -8,3 +8,9 @@ class TestIntentHandler(TestCase):
" über das Virus informieren?")
id = obj.generate_intent_id(3)
assert id == "buergerinnen_virus_informieren"
def test_normalize(self):
obj = generator.IntentHandler("Wo können sich hessische Bürger*Innen"
" über das Virus informieren?")
verb = obj.normalize("Ich bin test.")
assert verb[1] == "sein"
......@@ -10,6 +10,7 @@ class IntentHandler:
def __init__(self, intent: str):
self.intent = intent
self.nlp = spacy.load("de_core_news_md")
self.doc = IntentHandler.nlp(intent)
def generate_intent_id(self, max_tokens: int):
......@@ -44,6 +45,13 @@ class IntentHandler:
text = re.sub('[^a-zA-Z0-9]+', '', text)
return text
def normalize(self, text):
doc = self.nlp(text)
return_list = []
for token in doc:
return_list.append((token.lemma_).replace("ß", "ss"))
return return_list
# def removeStopWords(tokens: []):
# filtered = []
# nlp = German()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment