Source code for digital_milliet.lib.commentaries
import datetime
from uuid import uuid4
from urllib.parse import urlparse
import re
from flask_pymongo import PyMongo
from bson.objectid import ObjectId
from MyCapytain.common.reference import URN
[docs]class CommentaryHandler(object):
""" Parses data for retrieval/storage to/from the database
"""
[docs] def __init__(self, db=None, authors=None, config=None, auth=None):
""" CommentaryHandler object
:param db: Mongo Db Handle
:type db: PyMongo
:param authors: helper for building new Author records
:type authors: AuthorBuilder
:param config: configuration dictionary
:type config: dict
"""
self.mongo = db
self.author_builder = authors
self.config = config
self.auth = auth
[docs] def create_commentary(self, form):
"""Save a new set of annotations from the input form
:param form: key/value pairs from input form
:type form: dict
:return: the Milliet number for the saved annotations or None if the record couldn't be saved
:rtype: string
"""
data = self.form_to_OpenAnnotation(form)
cid = data["commentary"][0]["hasBody"]["@id"]
exists = self.mongo.db.annotation.find_one(
{"commentary.hasBody.@id": cid})
if not exists and self.validate_annotation(data):
self.mongo.db.annotation.insert(data)
# Now compile author info
self.author_builder.author_db_build(data)
return form["milnum"]
else:
return None
[docs] def update_commentary(self, form):
"""Save an edited set of annotations to the db
:param form: key/value pairs from edit form
:type form: dict
:return: True if successful False if not
:rtype: bool
"""
modtime = datetime.datetime.utcnow().isoformat()
record = self.mongo.db.annotation.find_one_or_404(
{'_id': ObjectId(form['mongo_id'])})
record['commentary'][0]['hasBody']['chars'] = form['c1text']
record['bibliography'][0]['hasBody']['chars'] = form['b1text']
cite_urn = record['commentary'][0]['hasBody']['@id']
millnum = cite_urn.split('.')[2]
if 't1_text' in form:
if form['t1_text'] != '':
if not isinstance(record['translation'][0]['hasBody'], dict):
# if we have switched from a uri to text then make sure we
# have the structure in place
record['translation'][0]['hasBody'] = self.format_translation_annotation(
"t1", millnum, form['t1_text'], None, None, form['lang1'])
else:
record['translation'][0]['hasBody']['chars'] = form['t1_text']
record['translation'][0]['hasBody']['language'] = form['lang1']
else:
record['translation'][0]['hasBody'] = form['t1_uri']
if 't2_text' in form:
if form['t2_text'] != '':
if not isinstance(record['translation'][1]['hasBody'], dict):
# if we have switched from a uri to text then make sure we
# have the structure in place
record['translation'][1]['hasBody'] = self.format_translation_annotation(
"t2", millnum, form['t2_text'], None, None, form['lang2'])
else:
record['translation'][1]['hasBody']['chars'] = form['t2_text']
record['translation'][1]['hasBody']['language'] = form['lang2']
else:
record['translation'][1]['hasBody'] = form['t2_uri']
record['commentary'][0]['modified'] = modtime
record['bibliography'][0]['modified'] = modtime
record['translation'][0]['modified'] = modtime
record['translation'][1]['modified'] = modtime
if form['orig_uri'] != '':
record['translation'][0]['hasTarget'] = form['orig_uri']
record['translation'][1]['hasTarget'] = form['orig_uri']
self.update_contributors(record['commentary'][0])
self.update_contributors(record['bibliography'][0])
self.update_contributors(record['translation'][0])
self.update_contributors(record['translation'][1])
if not isinstance(record['commentary'][0]['hasTarget'], list):
main_text = {
"@id": self.format_uri(millnum, 'l1'),
"format": "text",
"chars": "",
"language": ""
}
record['commentary'][0]['hasTarget'] = ["", main_text]
record['commentary'][0]['hasTarget'][0] = form['orig_uri']
record['commentary'][0]['hasTarget'][1]['chars'] = form['orig_text']
record['commentary'][0]['hasTarget'][1]['language'] = form['orig_lang']
if "iiif" in form and len(form["iiif"]):
duets = dict(zip(form["iiif"], form["iiif_publisher"]))
images = {k["oa:hasBody"]["@id"]: k for k in record["images"]}
to_delete = []
for manifestUri, annotation in images.items():
if manifestUri in duets and duets[manifestUri] != annotation["oa:hasBody"]["dc:publisher"]:
images[manifestUri] = self.format_manifests_from_form(
manifestUri, duets[manifestUri], modtime, millnum, update_anno=annotation)
del duets[manifestUri]
elif manifestUri not in duets:
to_delete.append(manifestUri)
for manifestUri, publisher in duets.items():
if manifestUri != '':
images[manifestUri] = self.format_manifests_from_form(
manifestUri, publisher, modtime, millnum)
record["images"] = [anno for key,
anno in images.items() if key not in to_delete]
else:
record["images"] = []
# we're just going to recreate the tags for now
# iterating through and adding/deleting would be the better thing to do
person = self.format_person_from_authentificated_user()
if "tags" in form:
record["tags"] = [
self.create_tag_annotation(tag, cite_urn, person, modtime)
for tag in filter(len, (form["tags"] + form["semantic_tags"]))
]
rv = None
if self.validate_annotation(record):
rv = self.mongo.db.annotation.save(record)
self.author_builder.author_db_build(record)
return rv
[docs] def form_to_OpenAnnotation(self, form):
""" Make a structure for the annotation from a set of key/value pairs
:param form: key/value pairs from the form
:type form: dict
:return: the annotation
:rtype: dict
"""
date = datetime.datetime.today()
milnum = form['milnum'].zfill(3)
person = self.format_person_from_authentificated_user()
commentary_uri = self.format_uri(milnum, 'c1')
primary_source_uri = ""
if form['l1uri']:
primary_source_uri = form['l1uri']
elif form['own_uri_l1']:
primary_source_uri = form['own_uri_l1']
main_text = {
"@id": self.format_uri(milnum, 'l1'),
"format": "text",
"chars": form['l1text'],
"language": form['select_l1']
}
annotation = {
"commentary": [
{
"@context": "http://www.w3.org/ns/oa-context-20130208.json",
"@id": self.generate_uuid(),
"@type": "oa:Annotation",
"annotatedAt": str(date),
"creator": person,
"hasBody": {
"@id": commentary_uri,
"format": "text",
"chars": form['c1text'],
"language": "eng",
},
"hasTarget": [primary_source_uri, main_text],
"motivatedBy": "oa:commenting"
}
],
"bibliography": [
{
"@context": "http://www.w3.org/ns/oa-context-20130208.json",
"@id": self.generate_uuid(),
"@type": "oa:Annotation",
"annotatedAt": str(date),
"creator": person,
"hasBody": {
"@id": self.format_uri(milnum, 'b1'),
"format": "text",
"chars": form['b1text'],
"language": "eng"
},
"hasTarget": commentary_uri,
"motivatedBy": "oa:linking"
}
],
"translation": [
{
"@context": "http://www.w3.org/ns/oa-context-20130208.json",
"@id": self.generate_uuid(),
"@type": "oa:Annotation",
"annotatedAt": str(date),
"creator": person,
"hasBody": self.format_translation_annotation(
"t1", form['milnum'], form['t1text'],
form['t1uri'], form['own_uri_t1'], form['lang_t1']
),
"hasTarget": primary_source_uri,
"motivatedBy": "oa:linking"
},
{
"@context": "http://www.w3.org/ns/oa-context-20130208.json",
"@id": self.generate_uuid(),
"@type": "oa:Annotation",
"annotatedAt": str(date),
"creator": person,
"hasBody": self.format_translation_annotation(
"t2", form['milnum'], form['t2text'],
form['t2uri'], form['own_uri_t2'], form['lang_t2']
),
"hasTarget": primary_source_uri,
"motivatedBy": "oa:linking"
}
],
"tags": [],
"images": []
}
if "iiif" in form:
annotation["images"] = [
self.format_manifests_from_form(
manifest_uri, publisher, date, milnum) for manifest_uri, publisher in zip(
form["iiif"], form["iiif_publisher"]) if manifest_uri != '']
if "tags" in form:
annotation["tags"] = [
self.create_tag_annotation(tag, commentary_uri, person, date)
for tag in filter(len, (form["tags"] + form["semantic_tags"]))
]
return annotation
[docs] def create_tag_annotation(self, tag, target, creator, date):
""" Create a tag annotation
:param tag: the tag (text or a URI)
:type tag: string
:param target: the target of the annotation
:type target: string
:param creator: the creator of the annotation
:type creator: dict
:param date: the date the annotation was created
:type date: date
:return: Annotation content to set at annotation["tags"]
"""
annotation = {
"@context": "http://www.w3.org/ns/oa-context-20130208.json",
"@id": self.generate_uuid(),
"@type": "oa:Annotation",
"annotatedAt": str(date),
"creator": creator,
"hasTarget": target,
"motivatedBy": "oa:tagging"
}
parsed = urlparse(tag)
if parsed.scheme == "http" or parsed.scheme == "https":
annotation["hasBody"] = {
"@id": tag,
"@type": "oa:SemanticTag"
}
else:
# normalize tags to lower case
annotation["hasBody"] = {
"@id": self.generate_uuid(),
"@type": "oa:Tag",
"format": "text",
"chars": tag.lower() # normalize tags to lower case
}
return annotation
[docs] def format_manifests_from_form(
self,
manifest_uri,
publisher,
date,
milnum,
update_anno=None):
""" Helper to format IIIF Manifests given a form
:param manifest_uri: Manifest URI
:param publisher: Publisher
:param date: Current date (Isocode)
:param milnum: Current milnum
:return: Value to set at annotation["images"]
"""
if update_anno is not None:
anno = update_anno
anno["oa:hasBody"] = {
"@id": manifest_uri,
"dc:publisher": publisher
}
anno["oa:serializedAt"] = str(date)
else:
anno = {
"@context": {
"oa": "http://www.w3.org/ns/oa-context-20130208.json",
"dc": "http://purl.org/dc/elements/1.1/"
},
"@id": self.generate_uuid(),
"@type": "oa:Annotation",
"oa:annotatedAt": str(date),
"oa:hasBody": {
"@id": manifest_uri,
"dc:publisher": publisher
},
"oa:hasTarget": self.format_uri(milnum, 'c1'),
"oa:motivatedBy": "oa:linking"
}
return anno
[docs] def generate_uuid(self):
"""Create a unique id for an annotation
:return: uid
:rtype: string
"""
uuid = 'digmilann.' + str(uuid4())
return uuid
[docs] def format_translation_annotation(
self, num, milnum, text, uri, own_uri, lang):
""" Build the body of a translation annotation.
:param num: the translation identifier (t1 or t2)
:type num: string
:param milnum: the Milliet number for the annotation
:type milnum: string
:param text: the text of the translation (None if uri or own_uri is supplied)
:type text: String
:param uri: the uri of a translation - this is expected to be a CTS URN that appears in the linked cts repository
:type uri: string
:param own_uri: an user-supplied uri for a translation - this is for an externally linked translation text
:type own_uri: string
:param lang: the language code of the translation ('fra' or 'eng')
:type lang: string
:return: the body of the translation annotation
:rtype: string (for a URI) or dict (if an embedded body)
"""
if not uri and not own_uri:
body = {
"@id": self.format_uri(milnum, num),
"format": "text",
"chars": text,
"language": lang
}
elif not uri and own_uri:
body = own_uri
else:
body = uri
return body
[docs] def get_milliet(self, milliet_id, simplify=True):
"""Get the first set of annotations that target the supplied Milliet Number
:param milliet_id: Milliet Number
:type milnum: string
:param simplify: If set to True, simplify for the view
:type simplify: bool
:return: Tuple where first element is the set of annotations and the second the author informations
:rtype: (dict, dict)
:raises 404 Not Found Exception: if the annotation is not found
"""
obj = self.mongo.db.annotation.find_one_or_404(
{"commentary.hasBody.@id": self.format_uri(milliet_id, 'c1')})
if simplify is False:
del obj['_id']
return obj
parsed_obj = self.simplify_milliet(obj)
parsed_obj["millnum"] = milliet_id
auth_info = {
"auth": "",
"work": "",
"passage": ""
}
for author in self.author_builder.search(
query=milliet_id, milliet_id=True):
auth_info['auth'] = author['name']
for w in author['works']:
for tup in w['millnums']:
if milliet_id in tup:
auth_info['work'] = w['title']
auth_info['passage'] = tup[1]
break
return parsed_obj, auth_info
[docs] def remove_milliet(self, milliet_id):
"""Remove the annotation set that targets the supplied Milliet Number
:param millnum: Milliet Number
:type milnum: string
:return: the number of records removed
:rtype: int
:raises 404 Not Found Exception: if the annotation is not found
"""
removed = self.mongo.db.annotation.delete_many(
{"commentary.hasBody.@id": self.format_uri(milliet_id, 'c1')})
author_removed = 0
if removed.deleted_count > 0:
author_removed = self.author_builder.remove_milliet_id_from_author(
milliet_id)
return removed.deleted_count + author_removed
[docs] def simplify_milliet(self, annotation_set):
""" Parse a db record into a dict setup for views
:param annotation_set: the db record
:type annotation_set: dict
:return: Parsed version of the record
:rtype: dict
"""
result = dict()
result['mid'] = annotation_set['_id']
result['bibl'] = annotation_set['bibliography'][0]['hasBody']['chars']
result['comm'] = annotation_set['commentary'][0]['hasBody']['chars']
result["comm@id"] = annotation_set["commentary"][0]["hasBody"]["@id"]
if 'creator' in annotation_set['commentary'][0]:
result['creator'] = annotation_set['commentary'][0]['creator']
else:
result['creator'] = None
if 'contributor' in annotation_set['commentary'][0]:
result['contributor'] = annotation_set['commentary'][0]['contributor']
else:
result['contributor'] = None
tnum = 0
for transl in annotation_set['translation']:
tnum += 1
if isinstance(transl['hasBody'], dict):
t_num = transl['hasBody']['@id'].split('.')[-1]
text = transl['hasBody']['chars']
lang = transl['hasBody']['language']
result[t_num + '_text'] = text
result[t_num + '_lang'] = lang
else:
t_num = "t" + str(tnum)
text = transl['hasBody']
try:
lang = re.search('\D+', text.split('-')[1]).group(0)
result[t_num + '_uri'] = text
result[t_num + '_lang'] = lang
except BaseException:
# invalid URN we need to recover
result[t_num + '_text'] = text
result[t_num + '_lang'] = "eng"
pass
# List is executed to avoid generators
result["images"] = [
{
"manifestUri": iiif_anno['oa:hasBody']["@id"],
"location": iiif_anno['oa:hasBody']["dc:publisher"]
} for iiif_anno in annotation_set["images"]
]
result["tags"] = [tag['hasBody']['chars']
for tag in annotation_set["tags"] if 'chars' in tag['hasBody']]
result["semantic_tags"] = [tag['hasBody']['@id']
for tag in annotation_set["tags"] if 'chars' not in tag['hasBody']]
if isinstance(annotation_set['commentary'][0]['hasTarget'], list):
result['orig_uri'] = annotation_set['commentary'][0]['hasTarget'][0]
result['orig_text'] = annotation_set['commentary'][0]['hasTarget'][1]['chars']
elif isinstance(annotation_set['commentary'][0]['hasTarget'], dict):
result['orig_uri'] = ""
result['orig_text'] = annotation_set['commentary'][0]['hasTarget']['chars']
else:
result['orig_uri'] = annotation_set['commentary'][0]['hasTarget']
return result
[docs] def validate_annotation(self, annotation):
"""Validate the structure of an annotation.
This is not foolproof but it attempts to catch some errors that could come in from mistakes
in data entry. It would be good to make sure these all couldn't occur to begin with.
:param annotation: the annotation record
:type annotation: dict
:return: True if valid False if not
:rtype: bool
"""
try:
if annotation['commentary'][0]['hasTarget'][0] != '':
URN(annotation['commentary'][0]['hasTarget'][0])
except ValueError:
raise ValueError(
"Invalid commentary target - not parseable as URN")
try:
if isinstance(annotation['translation'][0]['hasBody'], str):
URN(annotation['translation'][0]['hasBody'])
except ValueError:
raise ValueError(
"Invalid translation 1 uri - not parseable as URN")
try:
if isinstance(annotation['translation'][1]['hasBody'], str):
URN(annotation['translation'][1]['hasBody'])
except ValueError:
raise ValueError(
"Invalid translation 2 uri - not parseable as URN")
return True
[docs] def retrieve_millietId_in_commentaries(self, commentaries):
""" Extract a sorted list of Milliet ID from a set of commentary annotations
:param commentaries: set of commentary annotations
:type commentaries: list
:return: sorted list of extracted Milliet numbers
:rtype: list
"""
millnum_list = []
def convert(text): return int(text) if text.isdigit() else text
def alphanum_key(key): return [
convert(
re.split(
'([A-Za-z]+)',
key)[0])]
for row in commentaries:
try:
cite_urn = str(row['commentary'][0]['hasBody']['@id'])
millnum = cite_urn.split('.')[2]
if millnum:
millnum_list.append(millnum)
else:
pass
except BaseException:
pass
return sorted(millnum_list, key=alphanum_key)
[docs] def format_uri(self, milliet_id, subcollection_id=None):
""" Make a Cite Collection URI for an annotation
N.B. this is not a valid implementation of the CITE protocol, as it does not support
CITE collections. Future implementations should consider replacing this with a different identifier syntax.
:param: milliet_id: The Milliet number
:type: milliet_id: string
:param: subcollection_id: the subcollection identifier (e.g. commentary, bibliography, etc.)
:type: string
:return: the compiled URI
:rtype: string
"""
if subcollection_id is not None:
return self.config['CITE_URI_PREFIX'] + self.config['CITE_COLLECTION'] + \
'.' + milliet_id + '.' + subcollection_id
else:
return self.config['CITE_URI_PREFIX'] + \
self.config['CITE_COLLECTION'] + '.' + milliet_id
[docs] def format_person_from_authentificated_user(self):
""" Make a Person for an annotation (i.e for contributor or creator)
Uses the URI identifier for the user of the currently authenticated session
:return: Person properties suitable for inclusion in the annotation
:rtype: dict
"""
person = self.auth.current_user()
if person:
return {
"@id": person['uri'],
"type": "Person",
"name": person['name']
}
else:
return None
[docs] def update_contributors(self, annotation_dict=None):
""" Update the contributors for an annotation
Inserts a Person object for the currently authenticated user if she doesn't already appear
as either creator or contributor.
:param annotation_dict: the annotation to update
:type annotation_dict: dict
"""
if annotation_dict is None:
annotation_dict = {}
contributors = annotation_dict.setdefault('contributor', [])
person = self.format_person_from_authentificated_user()
if person:
found = False
for c in contributors:
if c['@id'] == person['@id']:
found = True
break
if not found:
if 'creator' not in annotation_dict or annotation_dict[
'creator']['@id'] != person['@id']:
contributors.append(person)
[docs] def get_milliet_identifier_list(self):
""" List all known milliet numbers
:return: List of Milliet Numbers and their commentary ID ?
:rtype: tuple
"""
comm_list = self.mongo.db.annotation.find(
{"commentary": {'$exists': 1}}).sort([("commentary.hasBody.@id", 1)])
return self.retrieve_millietId_in_commentaries(comm_list)
[docs] def get_existing_tags(self):
""" List all existing tag body values
:return: tags and semantic tags
:rtype: tuple
"""
tag_list = self.mongo.db.annotation.find(
{"tags": {'$exists': 1}, '$where': "this.tags.length>0"})
tags = {}
semantic_tags = {}
for row in tag_list:
for tag in row["tags"]:
if tag['hasBody']['@type'] == 'oa:Tag':
tags[tag['hasBody']['chars']] = 1
elif tag['hasBody']['@type'] == 'oa:SemanticTag':
semantic_tags[tag['hasBody']['@id']] = 1
return list(tags.keys()), list(semantic_tags.keys())
[docs] def search(self, query, tags=None):
""" Search commentary record (Filters are exclusive)
currently only searching in tags is supported
:param query: String to search
:param tags: Search in tags
:return: List of matching records
"""
parsed = urlparse(query)
comm_list = None
if parsed.scheme == "http" or parsed.scheme == "https":
comm_list = self.mongo.db.annotation.find({"tags.hasBody.@id": query}).sort([
("commentary.hasBody.@id", 1)])
else:
comm_list = self.mongo.db.annotation.find({"tags.hasBody.chars": query}).sort([
("commentary.hasBody.@id", 1)])
return self.retrieve_millietId_in_commentaries(comm_list)
[docs] def get_surrounding_identifier(self, cid):
""" Given a Milliet number, return the previous and next numbers available
:param cid: Milliet number
:type cid: string
:return: pair of Milliet numbers
:rtype: (string, string)
"""
identifiers = self.get_milliet_identifier_list()
index = identifiers.index(cid)
previous_id = identifiers[index - 1] if index - 1 >= 0 else None
next_id = identifiers[index + 1] if index + \
1 < len(identifiers) else None
return (previous_id, next_id)