Commit f30c8fb7 authored by Patrick Schlindwein's avatar Patrick Schlindwein
Browse files

Merge branch 'feat/#5_implement_strategy_TFIDF_for_summary' into 'master'

Feat/#5 implement strategy tfidf for summary

See merge request !30
parents a3e26980 522ec20d
Pipeline #70347 passed with stages
in 10 minutes and 48 seconds
...@@ -2,6 +2,7 @@ from fastapi import FastAPI, Request ...@@ -2,6 +2,7 @@ from fastapi import FastAPI, Request
from app.utilities import generator from app.utilities import generator
from app.summary.simple_spacy_summarizer import SimpleSpacySummarizer from app.summary.simple_spacy_summarizer import SimpleSpacySummarizer
from app.summary.summary_sentence_embedding import SentenceEmbeddingSummarizer from app.summary.summary_sentence_embedding import SentenceEmbeddingSummarizer
from app.summary.summarization_with_strategy_TFIDF import SummaryTFIDF
app = FastAPI( app = FastAPI(
title="IntentFinder: NLP-API", title="IntentFinder: NLP-API",
...@@ -11,8 +12,10 @@ app = FastAPI( ...@@ -11,8 +12,10 @@ app = FastAPI(
" IntentFinder" " IntentFinder"
) )
strategies = [SimpleSpacySummarizer(), strategies = [
SentenceEmbeddingSummarizer()] SimpleSpacySummarizer(),
SentenceEmbeddingSummarizer(),
SummaryTFIDF()]
@app.get("/") @app.get("/")
......
...@@ -38,16 +38,15 @@ class SummaryTFIDF(ISummaryStrategy): ...@@ -38,16 +38,15 @@ class SummaryTFIDF(ISummaryStrategy):
stopWords = self.nlpEng.Defaults.stop_words stopWords = self.nlpEng.Defaults.stop_words
for sent in summary: for sent in summary:
# dictionary mit 'words' als Schlüssel und ihrer 'Häufigkeit' als # dictionary with 'words' as the key
# Wert # and their 'frequency' as the value
freq_table = {} freq_table = {}
words = [word.text.lower() for word in sent if word.text.isalnum()] words = [word.text.lower() for word in sent if word.text.isalnum()]
for word in words: for word in words:
word = self.lemmatizer.lemmatize( word = self.lemmatizer.lemmatize(word) # Lemmatize the word
word) # Lemmatisieren Sie das Wort if word not in stopWords: # Reject stopWords
if word not in stopWords: # Stoppwörter ablehnen
if word in freq_table: if word in freq_table:
freq_table[word] += 1 freq_table[word] += 1
else: else:
...@@ -68,7 +67,7 @@ class SummaryTFIDF(ISummaryStrategy): ...@@ -68,7 +67,7 @@ class SummaryTFIDF(ISummaryStrategy):
tf_matrix = {} tf_matrix = {}
for sent, freq_table in freq_matrix.items(): for sent, freq_table in freq_matrix.items():
# dictionary mit 'word' selbst als Schlüssel und dessen TF als Wert # dictionary with 'word' itself as the key and its TF as the value
tf_table = {} tf_table = {}
total_words_in_sentence = len(freq_table) total_words_in_sentence = len(freq_table)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment