#!/usr/bin/env python3
import base64
import xml.etree.ElementTree as ET
from .datatransform import DataTransform
from .document import Document
from .annotators import Annotation
import uuid
import logging
logger = logging.getLogger(__name__)
[docs]class FHIR(DataTransform):
def __parse_xml__(root,getResources= ["{http://hl7.org/fhir}DocumentReference","{http://hl7.org/fhir}Binary" ]):
fhir_list=[]
for entry in root:
for resources in entry:
for resource in resources:
if (resource.tag) in getResources:
logger.debug(resource.tag, resource.attrib, resource.text)
logger.debug("###########")
resourceDict=dict()
for attributes in resource:
# logger.debug("##",attributes.attrib)
# logger.debug("##tag",attributes.tag)
# logger.debug(attributes.tag, attributes.attrib, attributes.text)
attrDict=dict()
for attribute in attributes:
if attribute.attrib:
# logger.debug(attribute.attrib["value"])
attrDict[attribute.tag.replace("{http://hl7.org/fhir}", "")]=attribute.attrib
else:
attrDict[attribute.tag.replace("{http://hl7.org/fhir}", "")]={"value":""}
if attribute.text:
attrDict[attribute.tag.replace("{http://hl7.org/fhir}", "")].update({"text":attribute.text} )
else:
attrDict[attribute.tag.replace("{http://hl7.org/fhir}", "")].update({"text":""} )
# logger.debug("####TAG",attribute.tag.replace("{http://hl7.org/fhir}", ""))
# logger.debug("####A", attribute.attrib)
# logger.debug("####Y", attribute.text)
if len(attribute)!= 0:
# logger.debug("baby")
elementDict=dict()
for element in attribute:
if element.attrib:
# logger.debug(element.attrib["value"])
elementDict[element.tag.replace("{http://hl7.org/fhir}", "")]=element.attrib
else:
elementDict[element.tag.replace("{http://hl7.org/fhir}", "")]={"value":""}
if element.text:
elementDict[element.tag.replace("{http://hl7.org/fhir}", "")].update({"text": element.text})
else:
elementDict[element.tag.replace("{http://hl7.org/fhir}", "")].update({"text": "" })
attrDict[attribute.tag.replace("{http://hl7.org/fhir}", "")].update({"element":elementDict} )
resourceKey=attributes.tag.replace("{http://hl7.org/fhir}", "")
if attributes.attrib:
# logger.debug(attributes.attrib["value"])
resourceDict[resourceKey]=attributes.attrib
else:
resourceDict[resourceKey]={"value":""}
if attributes.text:
resourceDict[resourceKey].update({"text":attributes.text} )
else:
resourceDict[resourceKey].update({"text":""} )
resourceDict[resourceKey].update({"attributes":attrDict} )
fhir_list.append({resource.tag.replace("{http://hl7.org/fhir}", ""):resourceDict })
return(fhir_list)
def __orderDocument__(fhir_list):
fhir_dict= dict()
for data in fhir_list:
if "Binary" in data.keys():
logger.debug(data["Binary"]["contentType"])
tmpKey= data["Binary"]["id"]["value"]
if data["Binary"]["contentType"]["value"] in ["text/plain"] : #'image/jpeg'; 'application/pdf';'application/dicom'
if tmpKey in fhir_dict.keys():
fhir_dict[tmpKey].update({"raw_text": base64.b64decode(data["Binary"]["content"]["value"]).decode("utf-8")})
else:
fhir_dict[tmpKey]={"raw_text": base64.b64decode(data["Binary"]["content"]["value"]).decode("utf-8")}
if "DocumentReference" in data.keys():
subject= data["DocumentReference"]["subject"]["attributes"]["reference"]["value"]
thisDate= data["DocumentReference"]["created"]["value"]
tmpKey= data["DocumentReference"]["content"]["attributes"]["attachment"]["element"]["url"]["value"].replace("/Binary/","")
if tmpKey in fhir_dict.keys():
fhir_dict[tmpKey].update({"subject":subject, "date":thisDate})
else:
fhir_dict[tmpKey]={"subject":subject, "date":thisDate}
return(fhir_dict)
[docs] def load_xml(fhir_input : str):
"""
:param fhir_input: file name of a fhir file
:returns: Document
:rtype: PyMedExt Document
"""
tree = ET.parse(fhir_input)
root = tree.getroot()
fhir_dict=FHIR.__orderDocument__(FHIR.__parse_xml__(root))
documents_collection=[]
for key in fhir_dict.keys():
raw_text_ID=str(uuid.uuid1())
thisDocument= Document(raw_text =fhir_dict[key]["raw_text"],ID =raw_text_ID, source = "FHIR/"+fhir_dict[key]["subject"]+"/"+key, documentDate =fhir_dict[key]["date"])
documents_collection.append(thisDocument)
return(documents_collection)