import uuid
import re
import json
import unidecode
from subprocess import Popen, PIPE
from os import path
from typing import List, Optional, Tuple, Dict
from deprecated.sphinx import deprecated
from deprecated.sphinx import versionadded
from deprecated.sphinx import versionchanged
import logging
logger = logging.getLogger(__name__)
[docs]class Annotation:
Based object which contains Annotation. Each Annotator must return a list of Annotations.
def __init__(self, type:str, value:str, source:str, source_ID:str,
span:Optional[Tuple[int,int]] = None, attributes:Optional[Dict] = None,
isEntity:bool=False, ID:Optional[str] = None, ngram:Optional[str] = None):
"""Intialize an Annotation object
:param type: annotation type define by the user (linked to the Annotator)
:param value: the annotation value, has to be a string
:param source: the name of the Annotator
:param source_ID: the Annotator id
:param span: the (start, end) position of the annotators
:param attributes: In some cases, the value is not enough so other key elements could be saved as dict in attributes
:param isEntity: if the Annotation is an entity define as an annotation which can be normalized (e.g. by a specific uri from an ontology) not the case for segment
:param ID: Annotation ID of this specific annotation
:returns: Annotation
:rtype: Annotation
self.value = value
self.type = type
self.source = source
self.span = span
self.source_ID = source_ID
self.attributes = attributes
if ID is None:
self.ID = str(uuid.uuid1())
self.ID = ID
self.isEntity = isEntity
#add graph properties
self.ngram = ngram # should be called raw_value?
self.parent = None
self.children = None
self.root = None
[docs] def to_json(self):
"""Tranform Annotation to json
:returns: json
:rtype: json
return json.dumps(self.to_dict())
[docs] def to_dict(self):
"""Transform Annotation to a dict object
:returns: dict
:rtype: dict
return {'type':self.type,
'source_ID': self.source_ID,
'isEntity': self.isEntity,
'attributes': self.attributes,
[docs] def get_attributes(self):
"""get Attributes from current and parents Node
:returns: attributes
:rtype: a dict
[docs] def get_ngram(self):
"""get nGram of the current Annotation
:returns: raw ngram
:rtype: string
[docs] def set_ngram(self):
"""set nGram of the current Annotation
:returns: 1
:rtype: int
self.ngram = self.root.value[self.span[0]:self.span[1]]
[docs] def get_span(self):
"""return current Annotation span
:returns: span(start,end)
:rtype: tuple
[docs] def get_children_span(self):
"""from current node, will return all children span
:returns: tuple of span
:rtype: list of tuple
childrenSpans = []
if self.children != None:
for child in self.children:
[docs] def get_entities_children(self):
"""From current Node, return all children which are
Annotation where isEntity =True
:returns: children list
:rtype: list
if self.children != None:
for child in self.children:
if child.children == None:
if child.isEntity:
if self.isEntity:
[docs] def get_properties(self, filter_type:[str]):
"""return current node Properties if the Annotation is from a specific type
:param filter_type: list of Annotations type
:returns: properties
:rtype: list of dictionnary
if self.type in filter_type:
[docs] def get_parents_properties(self, filter_type:[str]):
""" return parent properties of current annotations if
it's belong to a specific type
:param filter_type: list of Annotations types
:returns: list of current and parents Annotation properties
:rtype: list of dict
properties = []
if self.parent != None:
# print( " go see parents" )
# print(properties)
# print(self.type)
# print(self.attributes)
# print(self.span)
# print(properties)
[docs] def set_parent(self, parent):
"""set Parent to current Annotation
:param parent: Annotation
:returns: 1
:rtype: int
self.parent = parent
[docs] def set_root(self, root):
"""set Root to current Annotation
:param root: Annotation
:returns: 1
:rtype: int
self.root = root
[docs] def add_child(self, child):
"""Add a child to current Annotation
:param child: An annotation to set as child of current node
:returns: None
:rtype: None
if self.children == None:
self.children = [child]
[docs] def add_property(self, neighbor):
"""add property of a neighbor to current annnotation, if both have the
same span
:param neighbor: the Annotation neighbor to add the same property
:returns: None
:rtype: None
if self.attributes is not None:
if "properties" not in self.attributes.keys():
# thisProperty = {"type" : neighbor.type, "value":neighbor.value }
thisProperty = neighbor.to_dict()
self.attributes["properties"] = [thisProperty]
# thisProperty = {"type" : neighbor.type, "value":neighbor.value }
thisProperty = neighbor.to_dict()
# thisProperty = {"type" : neighbor.type, "value":neighbor.value }
thisProperty = neighbor.to_dict()
self.attributes = dict()
self.attributes["properties"] = [thisProperty]
[docs] def get_parent(self, from_type):
"""return closest parent of the current Annotation
of a specific type
:param from_type: specific type to found
:returns: Annotation of a specific type
:rtype: Annotation
if self.parent != None:
if self.parent.type == from_type :
[docs]class Annotator:
Abstract class of each Annotator. For that purpose an Annotator must
implement the function annotate_function(). This function return
a list of Annotations object.
def __init__(self, key_input:[str], key_output:str, ID:str):
"""Initialised an Annotator
:param key_input: a list of input annotation type (because annotators could use more than one type of annotation)
:param key_output: a string which is the type of Annotator
:param ID: an uuid object, must be generate by the user to be uniq
:returns: Annotator
:rtype: Annotator
self.key_input = key_input # list
self.key_output = key_output # str
self.ID = ID
[docs] def annotate_function(self, _input):
""" main annotation function
each Annotator must implement this function
:param _input: a list of Annotation typet
:returns: a list of annotations. they will be added to Document.annotations
:rtype: List[Annotation]
[docs]class Relation:
Based object which contains Relation
def __init__(self, type: str, head: str, target:str, source:str,
source_ID:str, attributes:Optional[List] = None, ID:Optional[str] = None):
"""Intialize an Annotation object
:param type: annotation type define by the user (linked to the Annotator)
:param head: head of the relation, ID of the source entity
:param target: target of the relation, ID of the target entity
:param source: the name of the Annotator
:param source_ID: the Annotator id
:param attributes: In some cases, the value is not enough so other key elements could be saved as dict in attributes
:param ID: Annotation ID of this specific annotation
:returns: Relation
:rtype: Relation
self.head = head = target
self.type = type
self.source = source
self.source_ID = source_ID
self.attributes = attributes
if ID is None:
self.ID = str(uuid.uuid1())
self.ID = ID
# TODO: add graph properties
# self.ngram = ngram # should be called raw_value?
# self.parent = None
# self.children = None
# self.root = None
[docs] def to_json(self):
"""Tranform Relation to json
:returns: json
:rtype: json
return json.dump(self.to_dict())
[docs] def to_dict(self):
"""Transform Relation to a dict object
:returns: dict
:rtype: dict
return {'type':self.type,
'source_ID': self.source_ID,
'attributes': self.attributes,