Commit 53e0bd25 authored by Ala Rouis's avatar Ala Rouis
Browse files

Revert "Merge branch 'feat/#71-Aenderungsverfolgung' into 'master'"

This reverts merge request !101
parent 6cab49a4
Pipeline #79967 passed with stages
in 2 minutes and 45 seconds
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/intentfinder.iml" filepath="$PROJECT_DIR$/.idea/intentfinder.iml" />
</modules>
</component>
</project>
\ No newline at end of file
......@@ -25,14 +25,13 @@ There are two types of data to be processed:
{
"type": "url", # type either "doc" or "url"
"name": "https://www.berlin.de/sen/finanzen/steuern/informationen-fuer-steuerzahler-/faq-steuern/artikel.697552.php", # URI of the website
"update_frequency":"daily", # update frequency, valid values are [daily|weekly|monthly|yearly]
"question" : {
"type": "text", # "text" indicates its html on the same site
"css_selector": "div.block .land-toggler-button # css selector to the question, you can check it using the webconsole document.querySelector(css_selector)
"css_selector": "div.block a" # css selector to the question, you can check it using the webconsole document.querySelector(css_selector)
},
"answer": {
"type": "text", # same as above
"css_selector": "div.block .html5-section.collapse" # same as above
"css_selector": "div.block .text" # same as above
}
}
```
......@@ -42,14 +41,14 @@ There are two types of data to be processed:
{
"type": "url", # same as above
"name": "https://www.stvo.de/info/faq", # same as above
"update_frequency":"daily", # same as above
"question" : {
"type": "text", # same as above
"css_selector": "table.category > tbody td.list-title > a", # same as above
},
"answer": {
"type": "href", # link to answer found in the href attribute of the question selector
"css_selector": "content > div.item-page > [itemprop = articleBody]" # where to find the text data of the answer on the new page
"type": "href", # link to answer found in the href attribute of the href_css_selector
"href_css_selector": "table.category > tbody td.list-title > a", # path to the link, found in the href attribute of the object if selected using document.querySelector(href_css_selector)
"css_selector": "div.item-page > div:nth-child(5)" # where to find the text data of the answer on the new page
}
}
```
......
{
"type": "url",
"name": "https://www.berlin.de/sen/finanzen/steuern/informationen-fuer-steuerzahler-/faq-steuern/artikel.697552.php",
"update_frequency":"daily",
"question" : {
"type": "text",
"css_selector": "div.block .land-toggler-button"
......
{
"type": "url",
"name": "https://www.kletterfabrik.koeln/faq.html",
"update_frequency":"weekly"
"question" : {
"type": "text",
"css_selector": "div.toggler h3"
......
{
"type": "url",
"name": "https://www.stvo.de/info/faq",
"update_frequency":"monthly"
"question" : {
"type": "text",
"css_selector": "table.category > tbody td.list-title > a"
......
......@@ -71,14 +71,7 @@ dependencies {
testImplementation 'org.hamcrest:hamcrest:2.2'
// for MongoDB
implementation("org.litote.kmongo:kmongo:4.2.7")
// https://mvnrepository.com/artifact/org.mongodb/mongodb-driver-sync
implementation group: 'org.mongodb', name: 'mongodb-driver-sync', version: '4.2.3'
// https://mvnrepository.com/artifact/org.mongodb/mongodb-driver-core
implementation group: 'org.mongodb', name: 'mongodb-driver-core', version: '4.2.3'
implementation "org.mongodb:mongodb-driver:3.12.3"
//jackson for reading json
implementation 'com.fasterxml.jackson.module:jackson-module-kotlin:2.12.1'
......
......@@ -11,7 +11,6 @@ import de.h_da.fbi.smebt.intentfinder.server.routing.quality.qualityRoutes
import de.h_da.fbi.smebt.intentfinder.server.routing.summarization.summarizationRoutes
import de.h_da.fbi.smebt.intentfinder.server.routing.tags.Tags
import de.h_da.fbi.smebt.intentfinder.server.routing.upload.uploadRoutes
import de.h_da.fbi.smebt.intentfinder.server.sources.*
import io.ktor.application.*
import io.ktor.features.*
import io.ktor.http.*
......@@ -19,8 +18,6 @@ import io.ktor.response.*
import io.ktor.routing.*
import io.ktor.serialization.*
import kotlin.reflect.KType
import kotlinx.coroutines.delay
import kotlinx.coroutines.launch
fun main(args: Array<String>): Unit = io.ktor.server.netty.EngineMain.main(args)
......@@ -83,22 +80,4 @@ fun Application.module() {
qualityRoutes()
}
}
// checks if website is updated automatically
launch {
while (true) {
// check every 24 hours
delay(86400000)
val db = FAQDataRepository()
val data = db.getFaqDataByTimeOfUpdate(calculateUpdateTime(" "))
for (d in data) {
if (d.type == "url") {
val urlReader = UrlReader(d.jsonUrlConfig)
val qAaMap = urlReader.extraction()
val changeHandler = ChangesHandlerUrl()
changeHandler.updateDialog(qAaMap, d.jsonUrlConfig.name)
db.updateTimeForUpdate(d.jsonUrlConfig.name, calculateUpdateTime(d.jsonUrlConfig.update_frequency))
}
}
}
}
}
......@@ -165,58 +165,6 @@ fun NormalOpenAPIRoute.uploadRoutes() {
val urlReader = UrlReader(config)
// save in db vgl. #39
val qAaMap = urlReader.extraction()
val db = FAQDataRepository()
var counter = 0
var dialog = mutableListOf(Dialog())
for ((question, answer: String) in qAaMap) {
dialog.add(Dialog("" + counter++, question, mutableListOf(), answer, mutableListOf()))
}
val faqData = FAQData(
"url",
config.name,
setTimestamp(),
calculateUpdateTime(config.update_frequency),
mutableListOf(setTimestamp()),
status.msg,
dialog,
mutableListOf(History()),
mutableListOf(String()),
config
)
db.insertFaqDataUrlInMongo(faqData)
respond(JsonResp(status.path, status.msg, qAaMap.size))
} else {
respond(JsonResp(status.path, status.msg))
}
} catch (e: Exception) {
respond(JsonResp(status.path, e.toString()))
}
}
}
route("{chatbotId}/configs/update") {
post<BotIDPathParam, JsonResp, JsonReq>(
info(
summary = "Check if Website ist updated",
description = "Parse a FAQ website for QuestionAnswers Pairs according to a .json file"
),
exampleResponse = JsonResp.EXAMPLE
) { params, body ->
var status = FileHandler.Status("", "")
try {
val handler = FileHandler(body.file, params.chatbotId)
status = handler.version(body.file.name ?: return@post)
if (status.msg == "updated") {
val config = handler.parseJSON("url") as JSONUrlConfig
// extract question answer pairs
val urlReader = UrlReader(config)
val qAaMap = urlReader.extraction()
// updates date if changes are detected
val changeHandler = ChangesHandlerUrl()
changeHandler.updateDialog(qAaMap, config.name)
respond(JsonResp(status.path, status.msg, qAaMap.size))
} else {
respond(JsonResp(status.path, status.msg))
......
package de.h_da.fbi.smebt.intentfinder.server.sources
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.Summary
/**
* this class detects changes on websites and adds
* new dialogs and history entries
*/
class ChangesHandlerUrl {
/**
* this function updates dialogs and adds history, if changes are detected
*
*/
fun updateDialog(qAndA: HashMap<String, String>, configName: String) {
val faq = FAQDataRepository()
// reads data from database
val faqData = faq.getFaqDataByName(configName)
if (faqData != null) {
// updates dialog and adds history
updateDialogContent(qAndA, faqData, faq)
}
if (faqData != null) {
// deletes dialog, which are not found on the webpage anymore
deleteDataInDialog(qAndA, faqData, faq)
}
}
private fun deleteDataInDialog(qAndA: HashMap<String, String>, faqData: FAQData, faq: FAQDataRepository) {
var found = false
for (dialog in faqData.dialogs) {
found = false
for ((question, answer: String) in qAndA) {
if (question == dialog.question
) {
found = true
}
}
if (!found) {
val history = History(
setTimestamp(),
dialog.question,
dialog.alternativeQuestions,
dialog.answer,
dialog.summaries
)
faq.insertHistoryDataByName(faqData.name, history)
faq.deleteDialog(faqData.name, dialog.d_id)
}
}
}
private fun updateDialogContent(
qAndA: HashMap<String, String>,
faqData: FAQData,
faq: FAQDataRepository
) {
var counter = 0
var found = false
for ((question, answer: String) in qAndA) {
found = false
for (dialog in faqData.dialogs) {
if (question == dialog.question
) {
found = true
if (!answer.contains(
dialog.answer
)
) {
val history = History(
setTimestamp(),
dialog.question,
dialog.alternativeQuestions,
dialog.answer,
dialog.summaries
)
faq.updateFaqDataDialogAnswer(
faqData.name,
answer,
dialog.d_id
)
faq.insertHistoryDataByName(faqData.name, history)
}
}
counter++
}
if (!found) {
// TODO: fill the field if Issue #39 implemented
val alternativeQuestions = mutableListOf<String>()
// TODO: fill the field if Issue #39 implemented
val summary = mutableListOf<Summary>()
val dialog = Dialog(
counter++.toString(),
question,
alternativeQuestions,
answer,
summary
)
faq.insertDialogDataByName(faqData.name, dialog)
}
}
}
}
......@@ -4,20 +4,17 @@ import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.Summary
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.SummaryBody
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.time.LocalDate
import java.time.format.DateTimeFormatter
import java.util.*
import java.util.Date
import java.util.TimeZone
data class FAQData(
// @BsonId val id: Id<FAQData>,
var type: String,
var name: String,
var creationTimestamp: String,
var nextContentUpdate: String,
var changeTimestamp: MutableList<String>,
var status: String,
var dialogs: MutableList<Dialog>,
var history: MutableList<History>,
var unprocessed: MutableList<String>
) {
var jsonDocxConfig: JSONDocxConfig = JSONDocxConfig()
......@@ -29,66 +26,53 @@ data class FAQData(
"docx",
"name",
setTimestamp(),
calculateUpdateTime("Daily"),
mutableListOf("2021-06-13T11:45", "2021-06-13T11:46"),
"status",
mutableListOf(Dialog()),
mutableListOf(History()),
mutableListOf("text 1", "text 2")
)
constructor(
// id: Id<FAQData>,
// id: Id<FAQData>,
type: String,
name: String,
creationTimestamp: String,
nextContentUpdate: String,
changeTimestamp: MutableList<String>,
status: String,
dialogs: MutableList<Dialog>,
history: MutableList<History>,
unprocessed: MutableList<String>,
jsonDocxConfig: JSONDocxConfig
) :
this(
// id,
type,
name,
creationTimestamp,
nextContentUpdate,
changeTimestamp,
status,
dialogs,
history,
unprocessed
) {
this.jsonDocxConfig = jsonDocxConfig
}
constructor(
// id: Id<FAQData>,
// id: Id<FAQData>,
type: String,
name: String,
creationTimestamp: String,
nextContentUpdate: String,
changeTimestamp: MutableList<String>,
status: String,
dialogs: MutableList<Dialog>,
history: MutableList<History>,
unprocessed: MutableList<String>,
jsonUrlConfig: JSONUrlConfig
) :
this(
// id,
type,
name,
creationTimestamp,
nextContentUpdate,
changeTimestamp,
status,
dialogs,
history,
unprocessed
) {
this.jsonUrlConfig = jsonUrlConfig
......@@ -99,11 +83,9 @@ data class FAQData(
type: String,
name: String,
creationTimestamp: String,
nextContentUpdate: String,
changeTimestamp: MutableList<String>,
status: String,
dialogs: MutableList<Dialog>,
history: MutableList<History>,
unprocessed: MutableList<String>,
jsonCSVConfig: JSONCSVConfig
) :
......@@ -111,11 +93,9 @@ data class FAQData(
type,
name,
creationTimestamp,
nextContentUpdate,
changeTimestamp,
status,
dialogs,
history,
unprocessed
) {
this.jsonCSVConfig = jsonCSVConfig
......@@ -123,43 +103,14 @@ data class FAQData(
}
data class Dialog(
var d_id: String,
var question: String,
var alternativeQuestions: MutableList<String>,
var answer: String,
var summaries: MutableList<Summary>
) {
constructor() : this(
"d1",
"What time is it?",
mutableListOf(
"time?",
"what is the time?"
),
"12h00",
mutableListOf(
Summary(
200,
"message",
SummaryBody(
"strategy",
0,
"summary"
)
)
)
)
}
data class History(
val history_id: String,
val id: String,
val question: String,
val alternativeQuestions: MutableList<String>,
val answer: String,
val summaries: MutableList<Summary>
) {
constructor() : this(
setTimestamp(),
"d1",
"What time is it?",
mutableListOf(
"time?",
......@@ -189,22 +140,3 @@ fun setTimestamp(): String {
return df.format(Date())
}
const val dataFormatDate = "yyyy-MM-dd"
// calculate date for next update based on frequency
fun calculateUpdateTime(frequency: String): String {
var currentDate = LocalDate.now()
when (frequency) {
"daily" -> currentDate = currentDate.plusDays(1)
"weekly" -> currentDate = currentDate.plusWeeks(1)
"monthly" -> currentDate = currentDate.plusMonths(1)
"yearly" -> currentDate = currentDate.plusYears(1)
else -> { // Note the block
}
}
val formatter = DateTimeFormatter.ofPattern(dataFormatDate)
val formattedString = currentDate.format(formatter)
return formattedString
}
package de.h_da.fbi.smebt.intentfinder.server.sources
import com.mongodb.MongoClientSettings
import com.mongodb.client.FindIterable
import com.mongodb.client.MongoClients
import com.mongodb.client.MongoCollection
import com.mongodb.client.MongoDatabase
import com.mongodb.client.model.Filters.eq
import com.mongodb.client.model.Updates
import de.h_da.fbi.smebt.intentfinder.server.nlp.dto.Summary
import org.bson.Document
import org.bson.codecs.configuration.CodecRegistries
......@@ -30,7 +28,6 @@ class FAQDataRepository {
CodecRegistries.fromRegistries(MongoClientSettings.getDefaultCodecRegistry(), pojoCodecRegistry)
val mongoClient = MongoClients.create("mongodb://root:rootpassword@mongodb_container:27017")
// val mongoClient = MongoClients.create("mongodb://localhost:27017")
database = mongoClient.getDatabase("FaqDev").withCodecRegistry(codecRegistry)
col = database.getCollection(FAQData::class.java.name, FAQData::class.java)
......@@ -48,11 +45,9 @@ class FAQDataRepository {
faqData.type,
faqData.name,
faqData.creationTimestamp,
faqData.nextContentUpdate,
faqData.changeTimestamp,
faqData.status,
faqData.dialogs,
faqData.history,
faqData.unprocessed,
faqData.jsonDocxConfig
)
......@@ -64,15 +59,12 @@ class FAQDataRepository {
if (faqData.type == "url")
col.insertOne(
FAQData(
// faqData.id,
faqData.type,
faqData.name,
faqData.creationTimestamp,
faqData.nextContentUpdate,
faqData.changeTimestamp,
faqData.status,
faqData.dialogs,
faqData.history,
faqData.unprocessed,
faqData.jsonUrlConfig
)
......@@ -88,11 +80,9 @@ class FAQDataRepository {
faqData.type,
faqData.name,
faqData.creationTimestamp,
faqData.nextContentUpdate,
faqData.changeTimestamp,
faqData.status,
faqData.dialogs,
faqData.history,
faqData.unprocessed,
faqData.jsonDocxConfig
)
......@@ -104,21 +94,13 @@ class FAQDataRepository {
return col.find(eq("_id", ObjectId(id))).first()
}
fun getFaqDataByName(name: String): FAQData? {
return col.find(eq("name", name)).first()
}
fun getFaqDataByTimeOfUpdate(nextContentUpdate: String): FindIterable<FAQData> {
return col.find(eq("nextContentUpdate", nextContentUpdate))
}
fun getAllFaqData(): List<FAQData> {
val result = col.find()
return result.toList()
}
fun addTimestamp(id: String): Boolean {
val getFaqDataById = getFaqDataById(id)
var getFaqDataById = getFaqDataById(id)
var status = true
if (getFaqDataById.toString() == "null") {
status = false
......@@ -130,27 +112,6 @@ class FAQDataRepository {
return status
}
fun updateTimeForUpdate(name: String, date: String) {
col.updateOne(eq("name", name), Document("\$set", Document("nextContentUpdate", date)))
}
fun insertHistoryDataByName(name: String, history: History) {
col.updateOne(eq("name", name), Updates.addToSet("history", history))
}