Source code for pymedextcore.fhirtransform

#!/usr/bin/env python3

import base64
import xml.etree.ElementTree as ET
from .datatransform import DataTransform
from .document import Document
from .annotators import Annotation
import uuid
import logging
logger = logging.getLogger(__name__)

[docs]class FHIR(DataTransform): def __parse_xml__(root,getResources= ["{http://hl7.org/fhir}DocumentReference","{http://hl7.org/fhir}Binary" ]): fhir_list=[] for entry in root: for resources in entry: for resource in resources: if (resource.tag) in getResources: logger.debug(resource.tag, resource.attrib, resource.text) logger.debug("###########") resourceDict=dict() for attributes in resource: # logger.debug("##",attributes.attrib) # logger.debug("##tag",attributes.tag) # logger.debug(attributes.tag, attributes.attrib, attributes.text) attrDict=dict() for attribute in attributes: if attribute.attrib: # logger.debug(attribute.attrib["value"]) attrDict[attribute.tag.replace("{http://hl7.org/fhir}", "")]=attribute.attrib else: attrDict[attribute.tag.replace("{http://hl7.org/fhir}", "")]={"value":""} if attribute.text: attrDict[attribute.tag.replace("{http://hl7.org/fhir}", "")].update({"text":attribute.text} ) else: attrDict[attribute.tag.replace("{http://hl7.org/fhir}", "")].update({"text":""} ) # logger.debug("####TAG",attribute.tag.replace("{http://hl7.org/fhir}", "")) # logger.debug("####A", attribute.attrib) # logger.debug("####Y", attribute.text) if len(attribute)!= 0: # logger.debug("baby") elementDict=dict() for element in attribute: if element.attrib: # logger.debug(element.attrib["value"]) elementDict[element.tag.replace("{http://hl7.org/fhir}", "")]=element.attrib else: elementDict[element.tag.replace("{http://hl7.org/fhir}", "")]={"value":""} if element.text: elementDict[element.tag.replace("{http://hl7.org/fhir}", "")].update({"text": element.text}) else: elementDict[element.tag.replace("{http://hl7.org/fhir}", "")].update({"text": "" }) attrDict[attribute.tag.replace("{http://hl7.org/fhir}", "")].update({"element":elementDict} ) resourceKey=attributes.tag.replace("{http://hl7.org/fhir}", "") if attributes.attrib: # logger.debug(attributes.attrib["value"]) resourceDict[resourceKey]=attributes.attrib else: resourceDict[resourceKey]={"value":""} if attributes.text: resourceDict[resourceKey].update({"text":attributes.text} ) else: resourceDict[resourceKey].update({"text":""} ) resourceDict[resourceKey].update({"attributes":attrDict} ) fhir_list.append({resource.tag.replace("{http://hl7.org/fhir}", ""):resourceDict }) return(fhir_list) def __orderDocument__(fhir_list): fhir_dict= dict() for data in fhir_list: if "Binary" in data.keys(): logger.debug(data["Binary"]["contentType"]) tmpKey= data["Binary"]["id"]["value"] if data["Binary"]["contentType"]["value"] in ["text/plain"] : #'image/jpeg'; 'application/pdf';'application/dicom' if tmpKey in fhir_dict.keys(): fhir_dict[tmpKey].update({"raw_text": base64.b64decode(data["Binary"]["content"]["value"]).decode("utf-8")}) else: fhir_dict[tmpKey]={"raw_text": base64.b64decode(data["Binary"]["content"]["value"]).decode("utf-8")} if "DocumentReference" in data.keys(): subject= data["DocumentReference"]["subject"]["attributes"]["reference"]["value"] thisDate= data["DocumentReference"]["created"]["value"] tmpKey= data["DocumentReference"]["content"]["attributes"]["attachment"]["element"]["url"]["value"].replace("/Binary/","") if tmpKey in fhir_dict.keys(): fhir_dict[tmpKey].update({"subject":subject, "date":thisDate}) else: fhir_dict[tmpKey]={"subject":subject, "date":thisDate} return(fhir_dict)
[docs] def load_xml(fhir_input : str): """ :param fhir_input: file name of a fhir file :returns: Document :rtype: PyMedExt Document """ tree = ET.parse(fhir_input) root = tree.getroot() fhir_dict=FHIR.__orderDocument__(FHIR.__parse_xml__(root)) documents_collection=[] for key in fhir_dict.keys(): raw_text_ID=str(uuid.uuid1()) thisDocument= Document(raw_text =fhir_dict[key]["raw_text"],ID =raw_text_ID, source = "FHIR/"+fhir_dict[key]["subject"]+"/"+key, documentDate =fhir_dict[key]["date"]) documents_collection.append(thisDocument) return(documents_collection)