Unverified Commit 7dc99552 authored by Ala Rouis's avatar Ala Rouis
Browse files

Merge branch 'master' into feat/#95-Update_im_History_tauchen_lassen_

parents 9f27d971 7d2063f7
Pipeline #79256 passed with stages
in 2 minutes and 31 seconds
package de.h_da.fbi.smebt.intentfinder.server.nlp
import de.h_da.fbi.smebt.intentfinder.server.nlp.client.RetrofitClient
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.Summary
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.SummaryBody
import java.util.concurrent.ExecutionException
import java.util.concurrent.ExecutorCompletionService
import java.util.concurrent.Executors
import kotlin.collections.ArrayList
class PythonBridge(private val client: RetrofitClient = RetrofitClient()) {
private fun getAllSummaries(text: String, maxLength: Int): List<SummaryBody> {
val strategies = client.getStrategies()
val executor = Executors.newFixedThreadPool(strategies.body.size)
val completionService = ExecutorCompletionService<Summary>(executor)
strategies.body.forEach {
completionService.submit { client.getSummary(it, text, maxLength) }
}
val summaries = ArrayList<SummaryBody>()
repeat(strategies.body.size) {
try {
val result = completionService.take()
val res = result.get()
if(res.statusCode == 200)
summaries.add(res.body)
} catch (ex: ExecutionException) {
throw ex
}
}
return summaries
}
fun getSummary(text: String, maxLength: Int): List<SummaryBody> {
return getAllSummaries(text, maxLength).sortedWith(compareByDescending { it.quality })
return client.getSummaries(text, maxLength).body
}
fun getIntentId(text: String): String {
......
package de.h_da.fbi.smebt.intentfinder.server.nlp.client
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.SummaryRequest
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.Strategies
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.Summary
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.SummarizeResponse
import okhttp3.OkHttpClient
import retrofit2.Retrofit
import retrofit2.converter.gson.GsonConverterFactory
class RetrofitClient(private val baseURL: String = "http://127.0.0.1:8000") {
class RetrofitClient {
companion object {
private const val BASE_URL = "http://127.0.0.1:8000"
}
private var client: SummarizationAPI = buildRetrofitClient()
private fun buildRetrofitClient(): SummarizationAPI {
val retrofit = Retrofit.Builder()
.baseUrl(baseURL)
.baseUrl(BASE_URL)
.addConverterFactory(GsonConverterFactory.create())
.client(OkHttpClient.Builder().build())
.build()
......@@ -21,12 +23,8 @@ class RetrofitClient(private val baseURL: String = "http://127.0.0.1:8000") {
return retrofit.create(SummarizationAPI::class.java)
}
fun getStrategies(): Strategies {
return RetrofitExecutor().execute(client.getStrategies())
}
fun getSummary(strategy: String, text: String, maxLength: Int): Summary {
return RetrofitExecutor().execute(client.getSummary(strategy, SummaryRequest(text, maxLength)))
fun getSummaries(text: String, maxLength: Int) : SummarizeResponse {
return RetrofitExecutor().execute(client.getSummaries(SummaryRequest(text, maxLength)))
}
fun getIntentId(text: String): String {
......
package de.h_da.fbi.smebt.intentfinder.server.nlp.client
import com.google.gson.JsonObject
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.SummaryRequest
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.Strategies
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.SummarizeResponse
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.Summary
import retrofit2.Call
import retrofit2.http.*
interface SummarizationAPI {
@GET("/strategies")
fun getStrategies(): Call<Strategies>
@POST("/summarize/{strategy}")
fun getSummary(@Path("strategy") strategy: String, @Body summaryRequest: SummaryRequest): Call<Summary>
@POST("/summarize")
fun getSummaries(@Body summaryRequest: SummaryRequest): Call<SummarizeResponse>
@POST("/intentid")
fun getIntentId(@Body text: String) : Call<String>
......
......@@ -3,15 +3,23 @@ package de.h_da.fbi.smebt.intentfinder.server.nlp.dto
import com.papsign.ktor.openapigen.annotations.Response
import kotlinx.serialization.Serializable
@Serializable
@Response("Summarization response.", statusCode = 200)
data class SummarizeResponse(
val statusCode: Int,
val message: String,
val body: List<SummaryBody>
)
@Serializable
@Response("Summary including used strategy and quality.", statusCode = 200)
data class SummaryBody(
val strategy: String,
val quality: Double,
val quality: Int,
val summary: String,
) {
companion object {
val EXAMPLE = SummaryBody("UsedStrategy", 0.9, "My short and good strategy")
val EXAMPLE = SummaryBody("UsedStrategy", 7, "My short and good strategy")
}
}
......
......@@ -107,7 +107,7 @@ data class Dialog (
"message",
SummaryBody(
"strategy",
0.0,
0,
"summary"
)
)
......
......@@ -2,7 +2,7 @@ package de.h_da.fbi.smebt.intentfinder.server.sources
import de.h_da.fbi.smebt.intentfinder.server.nlp.PythonBridge
import de.h_da.fbi.smebt.intentfinder.server.nlp.client.RetrofitClient
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.Strategies
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.SummarizeResponse
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.Summary
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.SummaryBody
import org.hamcrest.CoreMatchers
......@@ -29,40 +29,33 @@ class PythonBridgeTest {
fun `returns strategy with the highest quality`() {
val strategyNameOne = "first"
val strategyNameTwo = "second"
val strategies = Strategies(
200,
"ok",
listOf(strategyNameOne, strategyNameTwo)
)
val summaryOne = Summary(
200,
"",
val summaryOne =
SummaryBody(
strategyNameOne,
0.7, ""
7, ""
)
)
val summaryTwo = Summary(
200,
"",
val summaryTwo =
SummaryBody(
strategyNameTwo,
0.8,
8,
""
)
)
`when`(retrofitClient.getStrategies()).thenReturn(strategies)
`when`(retrofitClient.getSummary(strategyNameOne, "", Integer.MAX_VALUE)).thenReturn(summaryOne)
`when`(retrofitClient.getSummary(strategyNameTwo, "", Integer.MAX_VALUE)).thenReturn(summaryTwo)
`when`(retrofitClient.getSummaries("", Integer.MAX_VALUE)).thenReturn(
SummarizeResponse(
200,
"ok",
listOf(summaryTwo, summaryOne)
)
)
val result = bridge.getSummary("", Integer.MAX_VALUE)
verify(retrofitClient, times(1)).getStrategies()
verify(retrofitClient, times(2)).getSummary(anyString(), anyString(), anyInt())
verify(retrofitClient, times(1)).getSummaries(anyString(), anyInt())
assertEquals(2, result.size)
assertThat(result, CoreMatchers.`is`(listOf(summaryTwo.body, summaryOne.body)))
assertThat(result, CoreMatchers.`is`(listOf(summaryTwo, summaryOne)))
}
}
\ No newline at end of file
}
......@@ -92,7 +92,7 @@ ipython_config.py
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# having no cross-platform support, pipenv may install dependencies that don'threshold work, or not
# install all needed dependencies.
#Pipfile.lock
......
MONGODB_USER = 'root'
MONGODB_HOST = 'localhost:27017'
MONGODB_PASSWORD = 'rootpassword'
MONGODB_DB_NAME = 'chats'
import pymongo
from app.quality.intent import Intent
from app.quality.intent_list import IntentList
class MongoDBConnector:
def __init__(self, user, host, password, database_name):
"""
Constructor of MongoDBConnector
:param user: username for authentication
:param host: hostname for mongodb
:param password: password for authentication
:param database_name: database name where collection with intents is
stored
"""
self.user = user
self.host = host
self.password = password
self.database_name = database_name
self.client = pymongo.MongoClient(
'mongodb://' + self.user + ':' + self.password + '@' + self.host)
self.database = self.client[database_name]
def get_intents(self) -> []:
"""
:returns: IntentList: list of intents for similarity report
"""
intent_list = []
for faq in self.database['faqData'].find():
for dialog in faq['dialogs']:
intent = Intent(dialog['id'], dialog['question'],
dialog['alternativeQuestions'],
dialog['answer'])
intent_list.append(intent)
return IntentList(intent_list)
import os
import json
import os
from chatette.facade import Facade
from fastapi import FastAPI, Request
from fastapi.encoders import jsonable_encoder
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse
from fastapi.encoders import jsonable_encoder
from pydantic import BaseModel # pylint: disable=no-name-in-module
from chatette.facade import Facade
from app import config
from app.db.mongodb_connector import MongoDBConnector
from app.quality.balance_report import BalanceReport
from app.quality.similarity_report import SimilarityReport
from app.questiongenerator import QuestionGenerator
from app.summary.simple_spacy_summarizer import SimpleSpacySummarizer
from app.summary.summarization_with_strategy_tfidf import SummaryTFIDF
from app.summary.summary_bert import BertSummary
from app.summary.summary_sentence_embedding import SentenceEmbeddingSummarizer
from app.summary.summary_word_embedding import WordEmbeddingSummarizer
from app.utilities.success_response import SuccessResponse
from app.utilities.client_error_response import ClientErrorResponse
from app.utilities.server_error_response import ServerErrorResponse
from app.utilities import generator
from app.utilities.models import nlp, bert_model, bert_tokenizer
from app.quality.intent_id_similarity import intent_id_similarity
from app.quality.intent_question_similarity import intent_question_similarity
from app.quality.intent_answer_similarity import intent_answer_similarity
from app.quality.similarity_report import SimilarityReport
from app.tests.test_constants import test_intents
from app.utilities.server_error_response import ServerErrorResponse
from app.utilities.success_response import SuccessResponse
from app.utilities.utilities import Utilities
class Item(BaseModel):
......@@ -57,6 +56,10 @@ strategies = [
BertSummary(_bert_model=bert_model, _bert_tokenizer=bert_tokenizer),
WordEmbeddingSummarizer(_nlp=nlp)]
mongo_db_connector = MongoDBConnector(
config.MONGODB_USER, config.MONGODB_HOST,
config.MONGODB_PASSWORD, config.MONGODB_DB_NAME)
@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request,
......@@ -65,7 +68,7 @@ async def validation_exception_handler(request,
"""
Handle all raised request exceptions and return them to the client as JSON
"""
return JSONResponse(content=jsonable_encoder({'statusCode': 400,
return JSONResponse(content=jsonable_encoder({'status_code': 400,
'message': str(exc),
'body': None}))
......@@ -99,16 +102,17 @@ async def test(item2: Item2):
force_overwriting=True
)
facade.run()
return_json = open(os.path.join(
with open(os.path.join(
"./app/output/train/output.json"),
"r"
).read()
return json.loads(return_json)
) as json_output:
return_json = json_output.read()
return json.loads(return_json)
@app.get("/")
async def root():
return {"message": "nlp server is available"}
return SuccessResponse({"message": "nlp server is available"})
@app.get("/strategies")
......@@ -126,57 +130,39 @@ async def api_strategies():
return res
async def call_strategy(strategy_id: str, text: str, num_sentences: int):
@app.post("/summarize",
summary="Get all summaries to a given text.")
async def post_summarize(req: Request):
"""
This function will summarize a given text with a given summarization
strategy
Get all summaries to a given text.
:param num_sentences: max number of sentences in the summarization
:param strategy_id: The id of the strategy
:param text: The text to be summarized
:param req: The post body of the Request. Should have a text and maxLength
field.
:return: The summary, strategy and quality of the summary in JSON format
"""
for strategy in strategies:
if strategy.id == strategy_id:
quality = 0.5
summary = strategy.summarize(text, num_sentences)
response = SuccessResponse({"strategy": strategy_id,
"quality": quality,
"summary": summary})
return response
response = ClientErrorResponse("error: unknown strategy")
return response
req_json = await req.json()
text = req_json["text"]
text = Utilities.clean_up_text(text)
length = req_json["maxLength"]
res = []
summaries = {}
static_ranks = {}
@app.get("/summarize/{strategy_id}", summary="Generate a summary of the given"
" text.")
async def get_summarize(strategy_id: str, num_sentences: int, text: str):
"""
get variant of the summarize api endpoint
for strategy in strategies:
summaries[strategy.id] = strategy.summarize(text, length)
static_ranks[strategy.id] = strategy.rank
:param num_sentences: max number of characters in the summarization
:param strategy_id: The id of the strategy
:param text: The text to be summarized
:return: The summary, strategy and quality of the summary in JSON format
"""
return await call_strategy(strategy_id, text, num_sentences)
ranking = Utilities.generate_quality_scores(text, summaries)
for key, rank in ranking.items():
res.append({"strategy": key,
"quality": rank,
"summary": summaries[key]})
@app.post("/summarize/{strategy_id}", summary="Generate a summary of the given"
" text.")
async def post_summarize(strategy_id: str, req: Request):
"""
post variant of the summarize api endpoint
res.sort(key=lambda x: (x["quality"], static_ranks[x["strategy"]]))
res.reverse()
:param strategy_id: The id of the strategy
:param req: The post body of the Request. Should have a text and maxLength
field.
:return: The summary, strategy and quality of the summary in JSON format
"""
req_json = await req.json()
return await call_strategy(strategy_id, req_json["text"],
req_json["maxLength"])
return SuccessResponse(res)
@app.get("/{bot}/similarIntents", summary="Get similar Intents.")
......@@ -228,13 +214,11 @@ async def get_similar_intents( # pylint: disable=unused-argument,invalid-name
:param t: The threshold of for the cosine similarity
:returns: The result of the analysis
"""
intents = mongo_db_connector.get_intents()
# get intents from db
intents = test_intents
similar_by_answer = intent_answer_similarity(intents, t)
similar_by_id = intent_id_similarity(intents, t)
similar_by_question = intent_question_similarity(intents, t)
similar_by_answer = intents.similar_by_answer(t)
similar_by_id = intents.similar_by_intent_id(t)
similar_by_question = intents.similar_by_question(t)
return SuccessResponse(SimilarityReport(intents,
similar_by_answer,
......@@ -242,6 +226,23 @@ async def get_similar_intents( # pylint: disable=unused-argument,invalid-name
similar_by_question))
@app.get("/{bot}/intentBalance", summary="Get alternative question balance of"
" the intents.")
async def get_intent_balance(
bot: int, # pylint: disable=unused-argument,invalid-name
):
"""
Run a balance analysis on the bot's intents alternative questions
:param bot: The id of the bot
:returns: The result of the analysis
"""
intents = mongo_db_connector.get_intents()
avg, under, over = intents.alternative_question_balance_analysis()
return SuccessResponse(BalanceReport(intents, avg, under, over))
@app.post("/intentid", summary="Generate an intent id from a given intent"
" text")
async def generate_intent_id(intent: str, max_tokens: int):
......
from app.quality.intent_list import IntentList
class BalanceReport:
def __init__(self,
intents: IntentList,
average_alt_questions: float,
under_average: [int],
over_average: [int]):
self.intents = intents
self.average_alt_questions = average_alt_questions
self.under_average = under_average
self.over_average = over_average
......@@ -2,7 +2,7 @@ class Intent:
def __init__(self,
intent_id: str,
question: str,
alternative_questions: list[str],
alternative_questions: [str],
answer: str):
"""
Constructor of the Intent class
......@@ -31,13 +31,13 @@ class Intent:
"""
return self.question
def get_alternative_questions(self) -> list[str]:
def get_alternative_questions(self) -> [str]:
"""
:returns: The alternative questions of the Intent
"""
return self.alternative_questions
def get_all_questions(self) -> list[str]:
def get_all_questions(self) -> [str]:
"""
:returns: The question and all alternative questions in one list
"""
......
from app.quality.intent_list import IntentList
from app.quality.string_similarity import spacy_cosine_matrix_similarity
def intent_answer_similarity(intents: IntentList,
threshold: float = 0.8) -> list[list[int]]:
"""
Find similar intents based on their answers.
:param intents: The intents
:param threshold: The threshold for the cosine similarity algorithm
:returns: a list of indexes of similar answers for every intent
"""
string_list = list()
for intent in intents:
string_list.append(intent.answer)
return spacy_cosine_matrix_similarity(string_list, threshold)
from app.quality.intent_list import IntentList
from app.quality.string_similarity import spacy_cosine_matrix_similarity
def intent_id_similarity(intents: IntentList,
threshold: float = 0.8) -> list[list[int]]:
"""
Find similar intents based on their id.
:param intents: The intents
:param threshold: The threshold for the cosine similarity algorithm
:returns: a list of indexes of similar intents
"""
string_list = list()
for intent in intents:
string_list.append(intent.get_intent_id().replace("_", " "))
return spacy_cosine_matrix_similarity(string_list, threshold)
from app.quality.intent import Intent
from app.quality.string_similarity import spacy_cosine_matrix_similarity
from app.quality.string_similarity import spacy_cosine_similarity
class IntentList(list[Intent]):
pass
class IntentList(list):
@staticmethod
def __compare_intent(intent_a: Intent,
intent_b: Intent,
threshold: float = 0.8) -> [[int]]:
"""
Find similar questions in two Intents
:param intent_a: First intent
:param intent_b: Second intent
:param threshold: The threshold for the cosine similarity algorithm
:returns: a list of similar questions
"""
questions_a = intent_a.get_all_questions()
questions_b = intent_b.get_all_questions()
res = list()
# pylint: disable=consider-using-enumerate
for i in range(0, len(questions_a)):
# pylint: disable=consider-using-enumerate
for j in range(0, len(questions_b)):
sim = spacy_cosine_similarity(questions_a[i], questions_b[j])
if sim > threshold:
res.append([i, j])
return res
def __get_answers(self):
"""
Create a list of all answers
:returns: A list of all answers
"""