Commit 016d5722 authored by Patrick Schlindwein's avatar Patrick Schlindwein
Browse files

Merge branch 'feat/#65_update_ISummaryStrategy' into 'master'

#65 add max length parameter to ISummaryStrategy

See merge request !44
parents f9651faf f1eb645e
Pipeline #71729 passed with stages
in 13 minutes and 36 seconds
......@@ -47,11 +47,12 @@ async def api_strategies():
@app.get("/summarize/{strategy_id}", summary="Generate a summary of the given"
" text.")
async def summarize(strategy_id: str, text: str):
async def summarize(strategy_id: str, max_length: int, text: str):
"""
This function will summarize a given text with a given summarization
strategy
:param max_length: max number of characters in the summarization
:param strategy_id: The id of the strategy
:param text: The text to be summarized
:return: The summary, strategy and quality of the summary in JSON format
......@@ -59,7 +60,7 @@ async def summarize(strategy_id: str, text: str):
for strategy in strategies:
if strategy.id == strategy_id:
quality = 0.5
summary = strategy.summarize(text)
summary = strategy.summarize(text, max_length)
return {"strategy": strategy_id, "quality": quality,
"summary": summary}
......
......@@ -15,7 +15,7 @@ class SimpleSpacySummarizer(ISummaryStrategy):
def id(self):
return self._id
def summarize(self, text: str):
def summarize(self, text: str, max_length: int):
doc = self.nlp(text)
# Divide into tokens, vectorize and remove stop words
......
from app.summary.summary_strategy_interface import ISummaryStrategy
import spacy
import math
from nltk.stem import WordNetLemmatizer
import nltk
from app.summary.summary_strategy_interface import ISummaryStrategy
nltk.download('wordnet')
......@@ -204,7 +204,7 @@ class SummaryTFIDF(ISummaryStrategy):
return summary[1:]
def summarize(self, text: str) -> str:
def summarize(self, text: str, max_length: int) -> str:
text = self.nlpGer(text)
# put all sentences in a list
......
......@@ -23,7 +23,7 @@ class SentenceEmbeddingSummarizer(ISummaryStrategy):
def id(self):
return self._id
def summarize(self, text: str) -> str:
def summarize(self, text: str, max_length: int) -> str:
# convert the article/passage to a list of sentences using nltk’s
# sentence tokenizer.
sentences = nltk.sent_tokenize(text)
......
......@@ -11,11 +11,12 @@ class ISummaryStrategy(ABC):
raise NotImplementedError
@abstractmethod
def summarize(self, text: str) -> str:
def summarize(self, text: str, max_length: int) -> str:
"""
This methods generates the summary to a given text
:param text: text to summarize
:param max_length: max number of characters in the summarization
:returns: summary: a string representing a summarized version of the
input text
"""
......
......@@ -18,7 +18,7 @@ class WordEmbeddingSummarizer(ISummaryStrategy):
def id(self):
return self._id
def summarize(self, text: str) -> str:
def summarize(self, text: str, max_length: int) -> str:
extra_words = list(STOP_WORDS) + list(punctuation) + ['\n']
docx = self.nlp(text)
# Technik um ein Vokabular anzulegen
......
......@@ -12,7 +12,7 @@ class TestStrategy1(ISummaryStrategy):
id = "test1"
def summarize(self, text: str) -> str:
def summarize(self, text: str, max_length: int) -> str:
return text
......@@ -24,7 +24,7 @@ class TestStrategy2(ISummaryStrategy):
id = "test2"
def summarize(self, text: str) -> str:
def summarize(self, text: str, max_length: int) -> str:
return "result text"
......@@ -65,8 +65,10 @@ class TestNlpApi(TestCase):
client = TestClient(app.nlp_server.app)
# run test
response1 = client.get("/summarize/test1?text=test%20text")
response2 = client.get("/summarize/test2?text=test%20text")
response1 = client.get(
"/summarize/test1?max_length=130&text=test%20text")
response2 = client.get(
"/summarize/test2?max_length=130&text=test%20text")
# assert result
assert response1.status_code == 200
......
......@@ -53,5 +53,5 @@ class TestBasicSimpleSpacySummarization(TestCase):
# init test data
summarizer = SimpleSpacySummarizer()
summarized_text = summarizer.summarize(self.test_text)
summarized_text = summarizer.summarize(self.test_text, max_length=130)
self.assertLess(len(summarized_text), len(self.test_text))
......@@ -165,5 +165,5 @@ class TestSummarizationSpacy(TestCase):
def test_summarize_text(self):
summary_tfidf = SummaryTFIDF()
summary = summary_tfidf.summarize(self.test_text)
summary = summary_tfidf.summarize(self.test_text, max_length=130)
assert type(summary) == str
import math
from unittest import TestCase
import nltk
from unittest import TestCase
from app.summary.summary_sentence_embedding import SentenceEmbeddingSummarizer
......@@ -172,7 +171,8 @@ class TestSummarizationSentenceEmbedding(TestCase):
summary_sentence_embedding = SentenceEmbeddingSummarizer()
summary = summary_sentence_embedding.summarize(self.test_text)
summary = summary_sentence_embedding.summarize(self.test_text,
max_length=130)
assert type(summary) == str
def test_summarize_text_length(self):
......@@ -182,14 +182,18 @@ class TestSummarizationSentenceEmbedding(TestCase):
summary_sentence_embedding = SentenceEmbeddingSummarizer()
summary = summary_sentence_embedding.summarize(self.test_text)
summary = summary_sentence_embedding.summarize(self.test_text,
max_length=130)
sentences = nltk.sent_tokenize(self.test_text)
sentences = [sentence.strip() for sentence in sentences]
total_sentences_text = len(sentences)
min_num_sentences_for_summary = 1
max_num_sentences_for_summary = 10
num_sentences_for_summary = math.floor(total_sentences_text * 0.3)
num_sentences_in_percent = 0.3
num_sentences_for_summary = math.floor(total_sentences_text *
num_sentences_in_percent)
if num_sentences_for_summary < min_num_sentences_for_summary:
num_sentences_for_summary = min_num_sentences_for_summary
......
......@@ -8,7 +8,7 @@ class TestImplementationValid(ISummaryStrategy):
"""
id = "id1"
def summarize(self, text: str) -> str:
def summarize(self, text: str, max_length: int) -> str:
return text
......@@ -18,7 +18,7 @@ class TestImplementationInvalidOne(ISummaryStrategy):
This class will create a warning but the mistake is intended.
"""
def summarize(self, text: str) -> str:
def summarize(self, text: str, max_length: int) -> str:
return text
......@@ -35,7 +35,7 @@ class TestISummaryStrategy(TestCase):
# run test functions
ti = TestImplementationValid()
summary = ti.summarize(test_text)
summary = ti.summarize(test_text, max_length=130)
# assert result
self.assertEqual(summary, test_text)
......
......@@ -164,5 +164,6 @@ class TestSummarizationSentenceEmbedding(TestCase):
def test_summarize_text(self):
summary_word_embedding = WordEmbeddingSummarizer()
summary = summary_word_embedding.summarize(self.text)
summary = summary_word_embedding.summarize(self.text, max_length=130)
assert type(summary) == str
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment