Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
pse-trapp-public
IntentFinder
Commits
f1eb645e
Commit
f1eb645e
authored
May 21, 2021
by
Christof Walther
Committed by
Patrick Schlindwein
May 21, 2021
Browse files
#65
add max length parameter to ISummaryStrategy
parent
f9651faf
Changes
12
Hide whitespace changes
Inline
Side-by-side
src/nlp/app/nlp_server.py
View file @
f1eb645e
...
...
@@ -47,11 +47,12 @@ async def api_strategies():
@
app
.
get
(
"/summarize/{strategy_id}"
,
summary
=
"Generate a summary of the given"
" text."
)
async
def
summarize
(
strategy_id
:
str
,
text
:
str
):
async
def
summarize
(
strategy_id
:
str
,
max_length
:
int
,
text
:
str
):
"""
This function will summarize a given text with a given summarization
strategy
:param max_length: max number of characters in the summarization
:param strategy_id: The id of the strategy
:param text: The text to be summarized
:return: The summary, strategy and quality of the summary in JSON format
...
...
@@ -59,7 +60,7 @@ async def summarize(strategy_id: str, text: str):
for
strategy
in
strategies
:
if
strategy
.
id
==
strategy_id
:
quality
=
0.5
summary
=
strategy
.
summarize
(
text
)
summary
=
strategy
.
summarize
(
text
,
max_length
)
return
{
"strategy"
:
strategy_id
,
"quality"
:
quality
,
"summary"
:
summary
}
...
...
src/nlp/app/summary/simple_spacy_summarizer.py
View file @
f1eb645e
...
...
@@ -15,7 +15,7 @@ class SimpleSpacySummarizer(ISummaryStrategy):
def
id
(
self
):
return
self
.
_id
def
summarize
(
self
,
text
:
str
):
def
summarize
(
self
,
text
:
str
,
max_length
:
int
):
doc
=
self
.
nlp
(
text
)
# Divide into tokens, vectorize and remove stop words
...
...
src/nlp/app/summary/summarization_with_strategy_TFIDF.py
View file @
f1eb645e
from
app.summary.summary_strategy_interface
import
ISummaryStrategy
import
spacy
import
math
from
nltk.stem
import
WordNetLemmatizer
import
nltk
from
app.summary.summary_strategy_interface
import
ISummaryStrategy
nltk
.
download
(
'wordnet'
)
...
...
@@ -204,7 +204,7 @@ class SummaryTFIDF(ISummaryStrategy):
return
summary
[
1
:]
def
summarize
(
self
,
text
:
str
)
->
str
:
def
summarize
(
self
,
text
:
str
,
max_length
:
int
)
->
str
:
text
=
self
.
nlpGer
(
text
)
# put all sentences in a list
...
...
src/nlp/app/summary/summary_sentence_embedding.py
View file @
f1eb645e
...
...
@@ -23,7 +23,7 @@ class SentenceEmbeddingSummarizer(ISummaryStrategy):
def
id
(
self
):
return
self
.
_id
def
summarize
(
self
,
text
:
str
)
->
str
:
def
summarize
(
self
,
text
:
str
,
max_length
:
int
)
->
str
:
# convert the article/passage to a list of sentences using nltk’s
# sentence tokenizer.
sentences
=
nltk
.
sent_tokenize
(
text
)
...
...
src/nlp/app/summary/summary_strategy_interface.py
View file @
f1eb645e
...
...
@@ -11,11 +11,12 @@ class ISummaryStrategy(ABC):
raise
NotImplementedError
@
abstractmethod
def
summarize
(
self
,
text
:
str
)
->
str
:
def
summarize
(
self
,
text
:
str
,
max_length
:
int
)
->
str
:
"""
This methods generates the summary to a given text
:param text: text to summarize
:param max_length: max number of characters in the summarization
:returns: summary: a string representing a summarized version of the
input text
"""
...
...
src/nlp/app/summary/summary_word_embedding.py
View file @
f1eb645e
...
...
@@ -18,7 +18,7 @@ class WordEmbeddingSummarizer(ISummaryStrategy):
def
id
(
self
):
return
self
.
_id
def
summarize
(
self
,
text
:
str
)
->
str
:
def
summarize
(
self
,
text
:
str
,
max_length
:
int
)
->
str
:
extra_words
=
list
(
STOP_WORDS
)
+
list
(
punctuation
)
+
[
'
\n
'
]
docx
=
self
.
nlp
(
text
)
# Technik um ein Vokabular anzulegen
...
...
src/nlp/app/tests/test_nlp_server.py
View file @
f1eb645e
...
...
@@ -12,7 +12,7 @@ class TestStrategy1(ISummaryStrategy):
id
=
"test1"
def
summarize
(
self
,
text
:
str
)
->
str
:
def
summarize
(
self
,
text
:
str
,
max_length
:
int
)
->
str
:
return
text
...
...
@@ -24,7 +24,7 @@ class TestStrategy2(ISummaryStrategy):
id
=
"test2"
def
summarize
(
self
,
text
:
str
)
->
str
:
def
summarize
(
self
,
text
:
str
,
max_length
:
int
)
->
str
:
return
"result text"
...
...
@@ -65,8 +65,10 @@ class TestNlpApi(TestCase):
client
=
TestClient
(
app
.
nlp_server
.
app
)
# run test
response1
=
client
.
get
(
"/summarize/test1?text=test%20text"
)
response2
=
client
.
get
(
"/summarize/test2?text=test%20text"
)
response1
=
client
.
get
(
"/summarize/test1?max_length=130&text=test%20text"
)
response2
=
client
.
get
(
"/summarize/test2?max_length=130&text=test%20text"
)
# assert result
assert
response1
.
status_code
==
200
...
...
src/nlp/app/tests/test_simple_spacy_summarizer.py
View file @
f1eb645e
...
...
@@ -53,5 +53,5 @@ class TestBasicSimpleSpacySummarization(TestCase):
# init test data
summarizer
=
SimpleSpacySummarizer
()
summarized_text
=
summarizer
.
summarize
(
self
.
test_text
)
summarized_text
=
summarizer
.
summarize
(
self
.
test_text
,
max_length
=
130
)
self
.
assertLess
(
len
(
summarized_text
),
len
(
self
.
test_text
))
src/nlp/app/tests/test_summarization_with_strategy_tfidf.py
View file @
f1eb645e
...
...
@@ -165,5 +165,5 @@ class TestSummarizationSpacy(TestCase):
def
test_summarize_text
(
self
):
summary_tfidf
=
SummaryTFIDF
()
summary
=
summary_tfidf
.
summarize
(
self
.
test_text
)
summary
=
summary_tfidf
.
summarize
(
self
.
test_text
,
max_length
=
130
)
assert
type
(
summary
)
==
str
src/nlp/app/tests/test_summary_sentence_embedding.py
View file @
f1eb645e
import
math
from
unittest
import
TestCase
import
nltk
from
unittest
import
TestCase
from
app.summary.summary_sentence_embedding
import
SentenceEmbeddingSummarizer
...
...
@@ -172,7 +171,8 @@ class TestSummarizationSentenceEmbedding(TestCase):
summary_sentence_embedding
=
SentenceEmbeddingSummarizer
()
summary
=
summary_sentence_embedding
.
summarize
(
self
.
test_text
)
summary
=
summary_sentence_embedding
.
summarize
(
self
.
test_text
,
max_length
=
130
)
assert
type
(
summary
)
==
str
def
test_summarize_text_length
(
self
):
...
...
@@ -182,14 +182,18 @@ class TestSummarizationSentenceEmbedding(TestCase):
summary_sentence_embedding
=
SentenceEmbeddingSummarizer
()
summary
=
summary_sentence_embedding
.
summarize
(
self
.
test_text
)
summary
=
summary_sentence_embedding
.
summarize
(
self
.
test_text
,
max_length
=
130
)
sentences
=
nltk
.
sent_tokenize
(
self
.
test_text
)
sentences
=
[
sentence
.
strip
()
for
sentence
in
sentences
]
total_sentences_text
=
len
(
sentences
)
min_num_sentences_for_summary
=
1
max_num_sentences_for_summary
=
10
num_sentences_for_summary
=
math
.
floor
(
total_sentences_text
*
0.3
)
num_sentences_in_percent
=
0.3
num_sentences_for_summary
=
math
.
floor
(
total_sentences_text
*
num_sentences_in_percent
)
if
num_sentences_for_summary
<
min_num_sentences_for_summary
:
num_sentences_for_summary
=
min_num_sentences_for_summary
...
...
src/nlp/app/tests/test_summary_strategy_interface.py
View file @
f1eb645e
...
...
@@ -8,7 +8,7 @@ class TestImplementationValid(ISummaryStrategy):
"""
id
=
"id1"
def
summarize
(
self
,
text
:
str
)
->
str
:
def
summarize
(
self
,
text
:
str
,
max_length
:
int
)
->
str
:
return
text
...
...
@@ -18,7 +18,7 @@ class TestImplementationInvalidOne(ISummaryStrategy):
This class will create a warning but the mistake is intended.
"""
def
summarize
(
self
,
text
:
str
)
->
str
:
def
summarize
(
self
,
text
:
str
,
max_length
:
int
)
->
str
:
return
text
...
...
@@ -35,7 +35,7 @@ class TestISummaryStrategy(TestCase):
# run test functions
ti
=
TestImplementationValid
()
summary
=
ti
.
summarize
(
test_text
)
summary
=
ti
.
summarize
(
test_text
,
max_length
=
130
)
# assert result
self
.
assertEqual
(
summary
,
test_text
)
...
...
src/nlp/app/tests/test_summary_word_embedding.py
View file @
f1eb645e
...
...
@@ -164,5 +164,6 @@ class TestSummarizationSentenceEmbedding(TestCase):
def
test_summarize_text
(
self
):
summary_word_embedding
=
WordEmbeddingSummarizer
()
summary
=
summary_word_embedding
.
summarize
(
self
.
text
)
summary
=
summary_word_embedding
.
summarize
(
self
.
text
,
max_length
=
130
)
assert
type
(
summary
)
==
str
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment