Skip to content
Snippets Groups Projects
Commit bbb20f6d authored by Jacob Benz's avatar Jacob Benz
Browse files

initial commit

parents
No related branches found
No related tags found
No related merge requests found
.venv
__pycache__
\ No newline at end of file
FROM python:3.13
WORKDIR /code
COPY ./requirements.txt /code/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
COPY ./app /code/app
CMD ["fastapi", "run", "--workers", "8", "app/main.py", "--port", "5700"]
\ No newline at end of file
This diff is collapsed.
# General
This fastapi-based application provides a REST-API for LEAF-Writer to query when annotating objects. To annotate objects, information from authorities such as GND, Wikidata and so on ca be used. LEAF-Writer used to query these authorities directly, however, an increasing number of authorities set CORS-headers, making this strategie unavailable. Therefore, a backend service is needed. This backend service queries authorities on behalf of LEAF-Writer and returns the results.
Upstream LEAF-Writer developers use their LINCS-API for this purpose. To make standalone deployments of LEAF-Writer possible that do not rely on upstream services the ```reconcile```-API has been re-implemented in this package to be used by LEAF-Writer.
This application is intended to be used as a dependency running for LEAF-Writer. It may or may not be of any use outside the context of LEAF-Writer.
# Development
1. Create a virtual Python Environment and install all requirements as specified in ```requirements.txt``` with the toolings of your liking.
2. Run ```fastapi dev app/main.py``` for a development environment
# Deployment
This application is intended to be deployed together with a LEAF-Writer instance, see [LEAF-Writer Deployment](https://code.fbi.h-da.de/leaf-writer-x/leafwriter-x/-/wikis/Simplified_Deployment).
If you want or need a standalone deployment, you can start by building upon the provided ```Dockerfile```. When using this Dockerfile the application runs on port ```5700```.
\ No newline at end of file
"""
Server-side component to get authority information to annotate elements in LEAF-Writer
Copyright (C) 2025 University of Applied Sciences Darmstadt, Jacob Benz (jacob.benz@h-da.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
from typing import Union
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import requests
import os
import json
import xmltodict
from wikibaseintegrator import wbi_helpers
# Implementation is based on LINCS-API (https://gitlab.com/calincs/infrastructure/lincs-api/-/blob/main/src/services/link.ts) and the old implementation of the authority queries in
# LEAF-Writer
class Reconcile(BaseModel):
authorities: list
entity: str
moreResults: bool | None = False
class ResultItem(BaseModel):
uri: str | None = None
label: str | None = None
description: str | None = None
class ResultForGivenAuthority(BaseModel):
authority: str
matches: list[ResultItem | None]
app = FastAPI()
def dbpedia(entity: str, typeName: str | None, numEntities: int):
request_url = "https://lookup.dbpedia.org/api/search"
params = {
"maxResults": str(numEntities),
"format":"JSON_RAW",
"query":entity
}
if typeName:
params["typeName"] = typeName
result = requests.get(request_url, params=params)
if (result.status_code == requests.codes.ok):
return_list: list = []
if "docs" in result.json() and isinstance(result.json()["docs"], list):
for item in result.json()["docs"]:
return_item = {}
if "resource" in item and isinstance(item["resource"], list):
return_item["uri"] = item["resource"][0]
if "label" in item and isinstance(item["label"], list):
return_item["label"] = item["label"][0]
if "comment" in item and isinstance(item["comment"], list):
return_item["description"] = item["comment"][0]
return_list.append(return_item)
return (return_list)
else:
return([])
else:
return([])
def geonames(entity: str, numEntities:int):
request_url = "https://secure.geonames.org/searchJSON"
# A Geonames username is required. Can't get answer without one
username = os.getenv("GEONAMES_USERNAME")
print(username)
if not username:
return([])
params = {
"username":username,
"maxRows":str(numEntities),
"q":entity
}
result = requests.get(request_url,params=params)
if (result.status_code == requests.codes.ok):
return_list: list = []
if "geonames" in result.json() and isinstance(result.json()["geonames"], list):
for item in result.json()["geonames"]:
return_item = {}
if "geonameId" in item and isinstance(item["geonameId"], int):
return_item["uri"] = "https://sws.geonames.org/" + str(item["geonameId"])
if "toponymName" in item and isinstance(item["toponymName"], str):
return_item["label"] = item["toponymName"]
if "adminName1" in item and isinstance(item["adminName1"], str):
return_item["label"] = return_item["label"] + " " + item["adminName1"]
if "countryName" in item and isinstance(item["countryName"], str):
return_item["label"] = return_item["label"] + " " + item["countryName"]
if "fcodeName" in item and isinstance(item["fcodeName"], str):
return_item["description"] = item["fcodeName"]
return_list.append(return_item)
return (return_list)
else:
return([])
else:
return([])
def getty(entity: str, queryType: str, numEntities: int):
request_url = "https://services.getty.edu/vocab/reconcile"
json_data = {
"q1": {
"query":entity,
"type": queryType,
"limit": numEntities
}
}
form_data = "queries=" + json.dumps(json_data)
headers = {
'Accept':"application/json",
'Content-Type': 'application/x-www-form-urlencoded'
}
result = requests.post(request_url, data=form_data, headers=headers)
if (result.status_code == requests.codes.ok):
return_list: list = []
if "q1" in result.json() and isinstance(result.json()["q1"], dict):
if "result" in result.json()["q1"] and isinstance(result.json()["q1"]["result"], list):
sorted_result = sorted(result.json()["q1"]["result"], key= lambda x: x["score"],reverse=True)
for item in sorted_result[:numEntities]:
return_item = {}
if "id" in item and isinstance(item["id"], str):
return_item["uri"] = "http://vocab.getty.edu/" + item["id"]
if "name" in item and isinstance(item["name"], str):
return_item["label"] = item["name"]
return_item["description"] = "Score: " + str(item["score"])
return_list.append(return_item)
return (return_list)
else:
return([])
else:
return([])
else:
return([])
def gettyCONA(entity: str, numEntities: int):
request_url = "http://vocabsservices.getty.edu/CONAService.asmx/CONAGetTermMatch?"
request_url += "term=" + entity + "&logop=&notes=&facet=&wtype=&creator=&material=&location=&number=&geographic=&creation_start=&creation_end=&general_subject=&specific_subject="
result = requests.get(request_url)
if (result.status_code == requests.codes.ok):
result_as_dict = xmltodict.parse(result.text)
return_list: list = []
if "Vocabulary" in result_as_dict and isinstance(result_as_dict["Vocabulary"],dict) and "Count" in result_as_dict["Vocabulary"] and int(result_as_dict["Vocabulary"]["Count"]) > 0 and "Subject" in result_as_dict["Vocabulary"] and isinstance(result_as_dict["Vocabulary"]["Subject"], list):
for item in result_as_dict["Vocabulary"]["Subject"]:
return_item = {}
if "Subject_ID" in item and isinstance(item["Subject_ID"], str):
return_item["uri"] = "http://vocab.getty.edu/page/cona/" + item["Subject_ID"]
if "Preferred_Term" in item and isinstance(item["Preferred_Term"], dict) and "#text" in item["Preferred_Term"]:
return_item["label"] = item["Preferred_Term"]["#text"]
if "Cona_Label" in item and isinstance(item["Cona_Label"], str):
return_item["description"] = item["Cona_Label"]
return_list.append(return_item)
return (return_list)
else:
return([])
else:
return([])
def gnd(entity: str, queryType: str, numEntities: int):
request_url = "https://lobid.org/gnd/search"
params = {
"q": entity,
"filter": "type:" + queryType,
"format":"json",
"size":str(numEntities)
}
result = requests.get(request_url, params=params)
if (result.status_code == requests.codes.ok):
return_list: list = []
if "member" in result.json() and isinstance(result.json()["member"], list):
for item in result.json()["member"]:
return_item = {}
if "id" in item and isinstance(item["id"], str):
return_item["uri"] = item["id"]
if "preferredName" in item and isinstance(item["preferredName"], str):
return_item["label"] = item["preferredName"]
if "biographicalOrHistoricalInformation" in item and isinstance(item["biographicalOrHistoricalInformation"],list):
return_item["description"] = item["biographicalOrHistoricalInformation"][0]
return_list.append(return_item)
return (return_list)
else:
return([])
else:
return([])
def viaf(entity:str, queryType: str, numEntities: int):
request_url = "https://viaf.org/viaf/search"
params = {
"query" : queryType + "=\"" + entity + "\"",
"sortKeys": "holdingscount",
"maximumRecords" : numEntities,
"recordSchema": "BriefVIAF"
}
headers = {
"Accept": "application/json"
}
try:
result = requests.get(request_url, params=params, headers=headers)
if (result.status_code == requests.codes.ok):
return_list: list = []
if "searchRetrieveResponse" in result.json() and "records" in result.json()["searchRetrieveResponse"] and "record" in result.json()["searchRetrieveResponse"]["records"] and isinstance(result.json()["searchRetrieveResponse"]["records"]["record"], list):
print(len(result.json()["searchRetrieveResponse"]["records"]["record"]))
for item in result.json()["searchRetrieveResponse"]["records"]["record"]:
candidate = item["recordData"]
cluster = candidate["v:VIAFCluster"]
labelData = cluster["v:mainHeadings"]["v:data"]
label = ""
if isinstance(labelData, list):
label = labelData[0]["v:text"]
else:
label = labelData["v:text"]
description = ""
if isinstance(labelData,list):
description = labelData[1]["v:text"]
return_list.append({"uri":"http://viaf.org/viaf/" + str(cluster["v:viafID"]["content"]),"label":label,"description":description})
return(return_list)
else:
return([])
else:
return([])
except:
return([])
def wikidata(entity: str, numEntities: int):
results = wbi_helpers.search_entities(search_string=entity, max_results=numEntities,language="en",user_agent="LEAF-Writer-X",dict_result=True)
return_list = []
for result in results[0:numEntities]:
return_element = {}
return_element["uri"] = "http://www.wikidata.org/entity/" + result["id"]
return_element["label"] = result["label"]
return_element["description"] = result["description"]
return_list.append(return_element)
return(return_list)
def lincs(entity:str, typeFilter: str|list|None, numEntities: int):
request_url = "https://authority.lincsproject.ca/reconcile/any"
json_data = {
"q1": {
"query":entity,
"type": typeFilter,
"limit": numEntities
}
}
form_data = "queries=" + json.dumps(json_data)
headers = {
'Accept':"application/json",
'Content-Type': 'application/x-www-form-urlencoded'
}
result = requests.post(request_url, data=form_data, headers=headers)
if (result.status_code == requests.codes.ok):
return_list: list = []
if "q1" in result.json() and "result" in result.json()["q1"] and isinstance(result.json()["q1"]["result"],list):
for item in result.json()["q1"]["result"]:
return_element = {}
if "id" in item:
return_element["uri"] = item["id"]
if "name" in item:
return_element["label"] = item["name"]
if "type" in item and isinstance(item["type"],list) and "id" in item["type"][0]:
return_element["description"] = item["type"][0]["id"]
return_list.append(return_element)
return(return_list)
else:
return([])
else:
return([])
@app.post("/reconcile/")
def read_item(reconcile: Reconcile) -> list[ResultForGivenAuthority]:
num_results: int
results: list = []
if (reconcile.moreResults):
num_results = 10
else:
num_results = 5
for authority in reconcile.authorities:
result = {"authority": authority} # holds result for a single authority
# DBPedia
if authority == "DBpedia-ALL":
result["matches"] = dbpedia(reconcile.entity, None, num_results)
elif authority == "DBpedia-Event":
result["matches"] = dbpedia(reconcile.entity, "Event", num_results)
elif authority == "DBpedia-Organisation":
result["matches"] = dbpedia(reconcile.entity, "Organisation", num_results)
elif authority == "DBpedia-Person":
result["matches"] = dbpedia(reconcile.entity, "Person", num_results)
elif authority == "DBpedia-Place":
result["matches"] = dbpedia(reconcile.entity, "Place", num_results)
elif authority == "DBpedia-Work":
result["matches"] = dbpedia(reconcile.entity, "Work", num_results)
# Geonames
elif authority == "Geonames":
result["matches"] = geonames(reconcile.entity,num_results)
# Getty
elif authority == "Getty-All":
result["matches"] = getty(reconcile.entity, "/all", num_results)
elif authority == "Getty-AAT":
result["matches"] = getty(reconcile.entity, "/aat", num_results)
elif authority == "Getty-CONA":
result["matches"] = gettyCONA(reconcile.entity, num_results)
elif authority == "Getty-TGN":
result["matches"] = getty(reconcile.entity, "/tgn", num_results)
elif authority == "Getty-ULAN":
result["matches"] = getty(reconcile.entity, "/ulan", num_results)
# GND
elif authority == "GND-Organisation":
result["matches"] = gnd(reconcile.entity, "CoporateBody", num_results)
elif authority == "GND-Person":
result["matches"] = gnd(reconcile.entity, "Person", num_results)
elif authority == "GND-Person":
result["matches"] = gnd(reconcile.entity, "PlaceOrGeographicName", num_results)
elif authority == "GND-Subject":
result["matches"] = gnd(reconcile.entity, "SubjectHeading", num_results)
elif authority == "GND-Work":
result["matches"] = gnd(reconcile.entity, "Work", num_results)
# VIAF
elif authority == "VIAF-Bibliographic":
result["matches"] = viaf(reconcile.entity, "local.title", num_results)
elif authority == "VIAF-Corporate":
result["matches"] = viaf(reconcile.entity, "local.corporateNames", num_results)
elif authority == "VIAF-Expressions":
result["matches"] = viaf(reconcile.entity, "local.uniformTitleExpressions", num_results)
elif authority == "VIAF-Geographic":
result["matches"] = viaf(reconcile.entity, "local.geographicNames", num_results)
elif authority == "VIAF-Personal":
result["matches"] = viaf(reconcile.entity, "local.personalNames", num_results)
elif authority == "VIAF-Works":
result["matches"] = viaf(reconcile.entity, "local.uniformTitleWorks", num_results)
# Wikidata
elif authority == "Wikidata":
result["matches"] = wikidata(reconcile.entity, num_results)
# LINCS
elif authority == "LINCS" or authority == "LINCS-ALL":
result["matches"] = lincs(reconcile.entity, None, num_results)
elif authority == "LINCS-Person":
result["matches"] = lincs(reconcile.entity, "http://www.cidoc-crm.org/cidoc-crm/E21_Person", num_results)
elif authority == "LINCS-Place":
result["matches"] = lincs(reconcile.entity, "http://www.cidoc-crm.org/cidoc-crm/E53_Place", num_results)
elif authority == "LINCS-Work":
result["matches"] = lincs(reconcile.entity, ["http://iflastandards.info/ns/fr/frbr/frbroo/F1_Work", "http://iflastandards.info/ns/fr/frbr/frbroo/F2_Expression", "http://www.wikidata.org/entity/Q15306849"], num_results)
elif authority == "LINCS-Group":
result["matches"] = lincs(reconcile.entity, "http://www.cidoc-crm.org/cidoc-crm/E74_Group", num_results)
elif authority == "LINCS-Event":
result["matches"] = lincs(reconcile.entity, ["http://www.cidoc-crm.org/cidoc-crm/E5_Event", "http://www.cidoc-crm.org/cidoc-crm/E7_Activity", "http://www.cidoc-crm.org/cidoc-crm/E8_Acquisition", "http://www.cidoc-crm.org/cidoc-crm/E12_Production", "http://www.cidoc-crm.org/cidoc-crm/E13_Attribute_Assignment", "http://www.cidoc-crm.org/cidoc-crm/E65_Creation", "http://www.cidoc-crm.org/cidoc-crm/E66_Formation", "http://www.cidoc-crm.org/cidoc-crm/E67_Birth", "http://www.cidoc-crm.org/cidoc-crm/E69_Death", "http://www.cidoc-crm.org/cidoc-crm/E85_Joining", "http://www.cidoc-crm.org/cidoc-crm/crmtex/TX6_Transcription", "http://iflastandards.info/ns/fr/frbr/frbroo/F29_Recording_Event", "http://iflastandards.info/ns/fr/frbr/frbroo/F31_Performance", "http://iflastandards.info/ns/fr/frbr/frbroo/F51_Pursuit"], num_results)
# Default case: Error!
else:
raise HTTPException(status_code=400, detail="Invalid authority: " + authority)
results.append(result)
return (results)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment