Module: gd071#
Author: Johannes Busse,, jbusse at jbusse dot de
Homepage of GenDifS:
Version: gd071, 2024-10-23
from lxml import etree #
import rdflib
import owlrl
# global variables
verbose = None
NewID = 1000
main class.
class GenDifS:
"""Main class managing file info; contains dict of GenDifS_taxonomy classes"""
def __init__(self, mindmap_file_location=None, *, from_string=None, verb = 1):
global verbose
verbose = verb
if mindmap_file_location != None:
# open .mm file, parse xml
xml_parser = etree.XMLParser(remove_blank_text=True)
self.mindmap_file_location = mindmap_file_location
self.mindmap_xml = etree.parse(self.mindmap_file_location, xml_parser)
elif from_string != None:
self.mindmap_xml = self.xml_mindmap_from_string(from_string)
print("ERROR 53: no mindmap given, nothing to do")
self.mindmap_topnode = self.mindmap_xml.getroot().find("node") # upmost xml node with xml name "node"
self.dict_of_all_taxonomy_xlm_nodes = None
self.taxonomies_by_ID = None
self.taxonomies_by_name = None
self.code = None
self.languages = None
def xml_mindmap_from_string(self, string):
def depth_lstrip(x):
x = x.rstrip()
xlstrip = x.lstrip()
depth = len(x) - len(xlstrip)
return depth, xlstrip
def texttree_to_xml(current_depth, parent):
nonlocal i, texttree, texttree_len
#subtree = []
new_parent = parent
while i < texttree_len:
d, txt = depth_lstrip(texttree[i])
#print("10:", d, txt)
if d == current_depth:
if len(txt) > 0:
child = etree.SubElement(parent, "node")
child.set("TEXT", txt)
dct = string2dict(txt)
if "ID" in dct.keys():
child.set("ID", "_".join(dct["ID"]))
child.set("ID", f"{i}")
new_parent = child
i += 1
elif d > current_depth:
texttree_to_xml(d, new_parent)
return parent
return parent
i = 0
texttree = ["xml_mindmap_from_string"]
texttree.extend( [ ' ' + s for s in string.split("\n") ] )
texttree_len = len(texttree)
root_map = etree.Element("map")
#node_one = etree.SubElement(root_map, "node")
#node_one.set("TEXT", "XML mm from string")
#root_et = etree.ElementTree(root_map)
xml = texttree_to_xml(i, root_map)
return etree.ElementTree(xml)
def compile(self): # , language_list_list = []
# self.language_list_list = language_list_list
# create list of TAXONOMY xml nodes
self.dict_of_all_taxonomy_xlm_nodes = {}
self.search_mm_for_taxonomy_xml_nodes(self.mindmap_topnode, self.dict_of_all_taxonomy_xlm_nodes)
if verbose >= 3:
print("INFO 131", self.dict_of_all_taxonomy_xlm_nodes)
# for each TAXONOMY node: allocate a GenDifS_Taxonomy object
self.taxonomies_by_ID = {}
self.taxonomies_by_name = {}
for xml_node_ID, xml_node in self.dict_of_all_taxonomy_xlm_nodes.items():
# create taxonomy
t = GenDifS_Taxonomy(xml_node_ID, xml_node ) # , language_list_list = self.language_list_list
# taxonomies by ID
self.taxonomies_by_ID[xml_node_ID] = t
# taxonomies by name
# t.tree.context is the root node of each taxonomy tree, i.e. the node with "TAXONOMY"
t_name = t.tree.context["taxonomy_name"]
if t_name in self.taxonomies_by_name:
print(f"WARNING 62: multiple use of taxonomy name {t_name}; using ID {xml_node_ID} instead.")
t_name = xml_node_ID
self.taxonomies_by_name[t_name] = t
def describe_mindmap(self):
"""Mindmap metadata: #nodes etc."""
print(f"{len(self.mindmap_xml.getroot().xpath('.//node'))=} nodes")
# def describe_taxonomies(self, indent=True):
# for t in self.taxonomies_by_ID.values():
# t.describe()
# def content_taxonomies(self, indent=True):
# for t in self.taxonomies_by_ID.values():
# t.describe_contexts(indent)
# Identify the xml nodes witch define a taxonomy.
# Each taxonomy will be translated into an own standalone named rdf graph.
# All taxonomies together will contribute to a rdf dataset.
# The default graph contains the metadata of the whole ensemble.
def search_mm_for_taxonomy_xml_nodes(self, xml_node, dict_of_all_taxonomy_nodes):
global NewID
my_text = xml_node.get('TEXT')
if my_text == "TAXONOMY" or my_text.startswith("TAXONOMY "):
# print(f"search_gdntree_for_taxonomy: {xml_node.get('TEXT')=}")
# it's essential to have unique node IDs
ID = xml_node.get('ID', None)
if ID == None or ID in dict_of_all_taxonomy_nodes:
NewID += 1
ID = f"NewID_{NewID}"
xml_node["ID"] = ID
dict_of_all_taxonomy_nodes[ID] = xml_node
# we allow for nested TAXONOMY trees
for child_xml_node in xml_node.findall('node'):
button_cancel = 'button_cancel' in [ icon.attrib['BUILTIN'] for icon in child_xml_node.findall("icon") ]
if not button_cancel:
self.search_mm_for_taxonomy_xml_nodes(child_xml_node, dict_of_all_taxonomy_nodes)
class GenDifS_Taxonomy():
"""class managing a stand alone taxonomy; contains root element of a tree of gdn objects"""
def __init__(self, xml_ID, xml_node): # , language_list_list = []
self.ID = xml_ID
self.TEXT = xml_node.get("TEXT")
# TBD: generate terminology specific namespaces?
self.default_namespaces_dict = {
"ex": "<>" , # examples of OWL A-Box
"cpt": "<>" , # concept; instance of skos:concept
"sheet": "<>" , # instances from Excel-Sheet
"": "<>" , # OWL T-Box
"owl": "<>" ,
"rdf": "<>" ,
"xml": "<>" ,
"xsd": "<>" ,
"rdfs": "<>" ,
"skos": "<>" ,
"gendifs": "<>" ,
ttl_prefixes = lambda dct: "\n".join([f"@prefix {key}: {val} ." for key, val in dct.items()])
sparql_prefixes = lambda dct: "\n".join([f"PREFIX {key}: {val} " for key, val in dct.items()])
self.ttl_namespaces = ttl_prefixes(self.default_namespaces_dict)
self.sparql_namespaces = sparql_prefixes(self.default_namespaces_dict)
# print(f"INFO 238:{self.ttl_namespaces=}")
# Dictionary { ID: GenDifS Node } of all GenDifS nodes of the current taxonomy.
self.dict_of_all_gdn = {}
# walk the tree several times, add more and more information
# Step 1:
# build tree of gdn (GenDifS nodes)
self.tree = self.build_gdntree_from_mm(xml_node, None, 0, self.dict_of_all_gdn )
# self.dict_of_all_gdn now is a dict of all gdn (GenDifS node) objects
# Step 2:
# collect context information of each gdn to prepare context free code generation
# [ for gdn in self.dict_of_all_gdn.values() ]
for gdn in self.dict_of_all_gdn.values():
# Step 3:
# generate ttl code and integrity information
# [ gdn.generate_molecule_per_language() for gdn in self.dict_of_all_gdn.values() ]
for gdn in self.dict_of_all_gdn.values():
# Step 4:
# collect ttl code per language and allocate respective rdflib graphs
raw_ttl_lists = {}
self.rdf_graphs = {}
for gdn_id, gdn_val in self.dict_of_all_gdn.items():
for code_lang, code_val in gdn_val.code.items():
if code_lang not in raw_ttl_lists:
raw_ttl_lists[ code_lang ] = [ self.ttl_namespaces ]
self.rdf_graphs[ code_lang ] = rdflib.Graph()
raw_ttl_lists[ code_lang ].append( code_val )
self.rdf_graphs[ code_lang ].parse(data= self.ttl_namespaces + code_val)
print(f"ERROR 220: RDFLIB complains about")
print(code_val, "\n")
self.raw_ttl = { k: "\n\n".join(v) for k,v in raw_ttl_lists.items() }
def build_gdntree_from_mm(self, xml_node, parent_gdn, depth, dict_of_all_gdn):
"""build tree of gdn (GenDifS nodes); store result in self.dict_of_all_gdn"""
# allocate new instance of gdn (gdn = GenDifS Node)
if parent_gdn == None: # we are called from __init__
this_gdn = GenDifS_Differentia(xml_node, depth=depth, parent_gdn=None )
elif isinstance(parent_gdn, GenDifS_Differentia):
this_gdn = GenDifS_Taxon(xml_node, depth=depth, parent_gdn=parent_gdn )
elif isinstance(parent_gdn, GenDifS_Taxon): # NEU 2024-09-19
if xml_node.get('TEXT', '').startswith('ATT'):
this_gdn = gdn_attribute(xml_node, depth=depth, parent_gdn=parent_gdn ) # 2024-09-19: TBD
this_gdn = GenDifS_Differentia(xml_node, depth=depth, parent_gdn=parent_gdn )
print("ERROR 294: 'else' should not be selected")
# add current gdn to dict_of_all_gdn
dict_of_all_gdn[] = this_gdn
for child_xml_node in xml_node.findall('node'):
button_cancel = 'button_cancel' in [ icon.attrib['BUILTIN'] for icon in child_xml_node.findall("icon") ]
taxonomy_node = child_xml_node.get('TEXT', '').startswith('TAXONOMY')
if not ( button_cancel or taxonomy_node ):
this_gdn.children_gdn_list.append( self.build_gdntree_from_mm(child_xml_node, this_gdn, depth+1, dict_of_all_gdn) )
return this_gdn
def DEPRECATED_augment(self, abox):
def instances(graph, category):
#q = ns_sparql + f"""SELECT ?i
q = f"""SELECT ?i
?i a {category} .
return { r[0].encode() for r in graph.query(q) }
if not "RDFS_OWL" in self.rdf_graphs:
print(f"249, {self=}: 'RDFS_OWL' not in {self.rdf_graphs=}")
return None
self.g_augment_before = rdflib.Graph()
self.g_augment_before.parse( data=self.rdf_graphs["RDFS_OWL"].serialize() )
if verbose >= 1:
print(f"WARNING 258: RDFLIB complains about abox, augmentation failed")
if verbose >= 2:
print(f"augment() 264: {len(self.g_augment_before)=}", end=", ")
self.g_augment = rdflib.Graph()
self.g_augment.parse( data=self.g_augment_before.serialize() )
owlrl.DeductiveClosure(owlrl.OWLRL_Semantics, axiomatic_triples = False).expand(self.g_augment)
if verbose >= 2:
for gdn in self.dict_of_all_gdn.values():
#print(f"261: {gdn.entity_context}")
def describe(self):
# print(f"{self.ID} {self.TEXT}: #{len(self.dict_of_all_gdn)} GenDifS nodes;\n{self.languages=};\n{self.rdf_graphs.keys()=}")
print(f"{self.ID} {self.TEXT}: #{len(self.dict_of_all_gdn)} GenDifS nodes")
# def describe_contexts(self, indent=True):
# """walk tree and describe nodes"""
# def describe_contexts_walk(this_gdn, indent):
# this_gdn.describe(indent=indent)
# for child_gdn in this_gdn.children_gdn_list:
# describe_contexts_walk(child_gdn,indent)
# describe_contexts_walk(self.tree, indent)
# ## gdn
class GenDifS_Node():
def __init__(self, xml_node, *,
parent_gdn = None,
depth = None
): = xml_node.get('ID')
self.text = xml_node.get('TEXT')
self.depth = depth
self.parent_gdn = parent_gdn
self.xml_node = xml_node
self.python_type = self.__class__.__name__
self.dct, self.entities = self.string_to_dict_of_entities(self.text)
self.gendifs_type = self.gendifs_dct_type()
# computed by collect_context()
self.context = {}
self.context["id"] =
self.entity_context = {}
self.entity_context["id"] =
# computed by compile()
self.code = {}
# self.molecule_per_language = None # pointer to instance of class GenDifS_molecule_per_language
# self.molecule_dict = {}
# self.integrity = {}
self.augment_dict = {}
def __str__(self):
"""string representation of parsed XML Element node/@TEXT"""
return f"{} ({self.python_type}) {self.text}\n {self.molecule_dict}"
def gendifs_dct_type(self):
dl = list(self.dct.keys())
if dl[0] == "default": dl.pop(0)
return dl[0] if len(dl) >= 1 else None
def string_to_dict_of_entities(self, string):
"""parse a string like `ONE text one TWO some other text`
into a a dict like `{"ONE": "text_one", "TWO": "some_other_text" }`
currect_token = "default" # since we accept only isupper() tokens from user input, there is no conflict
dct = string2dict( string )
if len(dct["default"]) == 0:
dct2 = { k: "_".join(v) for k,v in dct.items() }
dct3 = { k: Entity( self, "_".join(v) ) for k,v in dct.items() }
if verbose >= 3: print(f"318: {dct3=}")
return dct2, dct3
# def generate_molecule_per_language(self):
# self.molecule_per_language = GenDifS_molecule_per_language(self.context, self.entity_context)
# self.molecule_dict = self.molecule_per_language.compile()
def compile(self):
codeclass2fn = {
'taxon': self.code_taxon,
'ISA': self.code_ISA,
'BY': self.code_BY_FROM,
'BY_SOME': self.code_BY_FROM_SOME,
'SUP': self.code_SUP,
## 'ISA_SOME': self.code_ISA_SOME,
'DEF': self.code_DEF,
# 'REL': self.code_REL
codeclass = self.context.get("codeclass") # we get a string
if codeclass == None:
if verbose >= 1:
print(f"WARNING 510: {self.context=}")
elif codeclass not in codeclass2fn:
if verbose >= 1:
print(f"WARNING 513: {codeclass=} not in codeclass2fn")
# call the code generating function
# ALT codeclass2fn[codeclass](**self.context)
return self.code
# the "meat" of the library: the functions which actualle generate code
def code_taxon(self, *, id, codeclass, species):
# these definitions here are redundant:
# most other snippets also define the involved classes
# -----------------
lang = "RDFS"
ttl = [ f"""# {id} {lang}""" ,
f"""{ species.c() } a owl:Class . """ ]
self.code[lang] = "\n".join(ttl)
# lang = "RDFS"
# ttl = [ f"""# {id} {lang}""" ,
# f""":{species} a owl:Class . """ ]
# # ttl += [ f""":{species} rdfs:label ... .""" ]
# self.code[lang] = "\n".join(ttl)
# -----------------
lang = "RDFStest"
ttl = [ f"""# {id} {lang}""" ,
f"""{ species.e(id) } a { species.c() } . """ ]
self.code[lang] = "\n".join(ttl)
# lang = "RDFStest"
# ttl = [ f"""# {id} {lang}""" ,
# f"""ex:{species}_{id} a :{species} . """ ]
# self.code[lang] = "\n".join(ttl)
def code_TAXONOMY(self, *, id, codeclass, taxonomy_name, graph, species_list):
# lang = "DE"
# ttl = [ f"""# {id} {lang}""" ]
# ttl += [ f""":{taxonomy_name} :explanation "Ein Spiel in einer Phantasiewelt, es heißt *{taxonomy_name}*. Die Dinge in der Phantasiewelt haben abstrakte Bezeichner, die wir nicht verstehen, nur der Spieleleiter kennt sie. Wir beschreiben Dinge durch Attribute und ihre Werte. Wir wollen eine Taxonomie rekonstruieren. Unser abstraktes Datenmodell ist OWL-RL in der Serialisierung Turtle." . """ ]
# self.code[lang] = "\n".join(ttl)
# TBD: more detailed ontology metadata etc.
# lang = "OWL"
# ttl = [ f"""# {id} {lang}""" ,
# f""":{taxonomy_name} a owl:Ontology; rdfs:label "{taxonomy_name}" .""" ]
# self.code[lang] = "\n".join(ttl)
# lang = "RDFS"
# ttl = [ f"""# {id} {lang}""" ,
# f""":{taxonomy_name} rdfs:label "RDFS class tree {taxonomy_name}" .""" ]
# self.code[lang] = "\n".join(ttl)
#ex:Ont a owl:Ontology .
#ex:Ont owl:imports <> .
# -----------------
lang = "SKOS"
ttl = [ f"""# {id} {lang}""" ,
f"""[ owl:imports <> ] .""" ,
f"{taxonomy_name.e(None, prefix='cpt')} a skos:ConceptScheme ." ]
ttl += [ f"{ species.e(None, prefix='cpt') } a skos:Concept ;\n skos:inScheme {taxonomy_name.e(None, prefix='cpt')} ." for species in species_list ]
self.code[lang] = "\n".join(ttl)
# lang = "SKOS"
# ttl = [ f"""# {id} {lang}""" ,
# f"cpt:{taxonomy_name} a skos:ConceptScheme ." ]
# ttl += [ f"cpt:{species} a skos:Concept ;\n skos:inScheme cpt:{taxonomy_name} ." for species in species_list ]
# self.code[lang] = "\n".join(ttl)
def code_DEF(self, *, id, codeclass, genus, source, text_list):
# # class definition
# lang = "DE"
# ttl = [ f"""# {id} {lang}""" ]
# ttl += [ f""":{genus} :explanation "{text} ({source})" .""" for text in text_list ]
# self.code[lang] = "\n".join(ttl)
# lang = "SKOS"
# ttl = [ f"""# {id} {lang}""" ,
# f"""{ genus.e(None, prefix='cpt') } a skos:Concept .""" ]
# ttl += [ f"""{ genus.e(None, prefix='cpt') } skos:scopeNote "{text} ({source})" .""" for text in text_list ]
# self.code[lang] = "\n".join(ttl)
def code_ISA(self, *, id, codeclass, genus, species_list):
# lang = "DE"
# ttl = [ f"""# {id} {lang}""" ]
# ttl += [ f""":{species} :explanation "*{reverse(species)}* ist eine Subklasse von *{reverse(genus)}* ." .""" for species in species_list ]
# self.code[lang] = "\n".join(ttl)
# -----------------
# class definitions
lang = "RDFS"
ttl = [ f"""# {id} {lang}""" ,
f"{ genus.c() } a owl:Class ." ]
ttl += [ f"{ species.c() } a owl:Class;\n rdfs:subClassOf { genus.c() } ." for species in species_list ]
self.code[lang] = "\n".join(ttl)
# assess DISJOINT
# self.augment_dict["RDFS"] = { "fn": "DISJOINT", "species_list": species_list }
lang = "RDFStest"
ttl = [ f"""# {id} {lang}""" ,
f"{ genus.e(id) } a { genus.c() } ." ]
ttl += [ f"{ species.e(id) } a { species.c() } ." for species in species_list ]
ttl += [ f"{ species.e(id) } gendifs:classify_similar {genus.e(id)} ." for species in species_list ] # bottom up inferencing "ISA"
self.code[lang] = "\n".join(ttl)
# self.inspect_inferencing = Test_classify_similar(id, self.code["RDFStest"] + self.code["RDFS"] )
# -----------------
lang = "SKOS"
ttl = [ f"""# {id} {lang}""" ,
f"{ genus.e(None, prefix='cpt') } a skos:Concept ." ]
ttl += [ f"{ species.e(None, prefix='cpt') } a skos:Concept ;\n skos:broader { genus.e(None, prefix='cpt') } ." for species in species_list ]
self.code[lang] = "\n".join(ttl)
def code_BY_FROM(self, *, id, codeclass, genus, by, frm, species_list):
# -----------------
ttl = [ f"{ species.c() } a owl:Class;\n rdfs:subClassOf { genus.c() } ." for species in species_list ]
self.code[lang] = "\n".join(ttl)
restriction_technically_owlthing = f"BY_{id}_restriction_owlthing"
restriction_userfriendly_owlthing = f"BY_{ by.e(None) }_FROM_owlthing"
restriction_technically = f"BY_{id}_restriction"
restriction_userfriendly = f"BY_{ by.e(None) }_FROM_{ frm.e(None) }"
intersection_technically = f"BY_{id}_intersection"
intersection_userfriendly = f"{restriction_userfriendly}_INTERSECT_{ genus.e(None) }"
lang = "OWL"
ttl = [ f"""# {id} {lang}""" ,
f"""{ by.c() } rdf:type owl:ObjectProperty .""" ]
ttl += [ f""":{restriction_technically_owlthing} a owl:class ;""" ,
f""" a owl:Restriction ;""" ,
f""" rdfs:label "{restriction_userfriendly_owlthing}" ;""" ,
f""" owl:onProperty { by.c() } ;""" ,
f""" owl:someValuesFrom owl:Thing .""" ]
ttl += [ f""":{restriction_technically} a owl:class ;""" ,
f""" a owl:Restriction ;""" ,
f""" rdfs:label "{restriction_userfriendly}" ;""" ,
f""" owl:onProperty { by.c() } ;""" ,
f""" owl:someValuesFrom { frm.c() } .""" ]
ttl += [ f""":{intersection_technically} a owl:class ;""" ,
f""" rdfs:label "{intersection_userfriendly}" ;""" ,
f""" rdfs:subClassOf { genus.c() } ;""" ,
f""" owl:intersectionOf (:{restriction_technically} { genus.c() } ) .""" ]
self.code[lang] = "\n".join(ttl)
# assess MODALITY
# collect all information which we need to assess MUST (and also SHOULD, MAY, SHOULDNOT, MUSTNOT)
self.augment_dict["MODALITY"] = {"restriction_technically_owlthing": restriction_technically_owlthing,
"restriction_technically": restriction_technically,
"intersection_technically": intersection_technically,
# allocate example instance plus test condition
lang = "OWLtest"
ttl = [ f"""# {id} {lang}""" ,
f"""ex:{restriction_technically}_example a :{restriction_technically} .""" ,
f"""{ genus.e(id) } a { genus.c() } ;""" ,
f""" { by.c() } { frm.e(id) } ;""" ,
f""" gendifs:classify_similar ex:{restriction_technically}_example .""" ,
f"""{frm.e(id) } a { frm.c() } .""" ]
self.code[lang] = "\n".join(ttl)
self.inspect_inferencing = Test_classify_similar(id, self.code["OWL"] + self.code["OWLtest"] )
skos_collection_name_technically = f"cpt:BY_{id}_intersection"
skos_collection_name_userfriendly = f"""{ genus.e(None, prefix='cpt') } BY { by.e(None, prefix='cpt') } FROM { frm.e(None, prefix='cpt') }"""
lang = "SKOS"
ttl = [ f"""# {id} {lang}""" ,
f"{ genus.e(None, prefix='cpt') } a skos:Concept ." ,
f"{skos_collection_name_technically} a skos:Collection ;" ,
f""" rdfs:label "{skos_collection_name_userfriendly}" .""" ]
ttl += [ f"{ species.e(None, prefix='cpt') } a skos:Concept ;\n skos:broader { genus.e(None, prefix='cpt') } ." for species in species_list ]
ttl += [ f"{skos_collection_name_technically} skos:member { species.e(None, prefix='cpt') } ." for species in species_list ]
ttl += [ f"{by.e(None, prefix='cpt')} a rdfs:Property ;\n rdfs:subPropertyOf skos:related ." ]
self.code[lang] = "\n".join(ttl)
# _:b0 rdf:type skos:Collection;
# skos:prefLabel "milk by source animal"@en;
# skos:member ex:cowMilk;
# skos:member ex:goatMilk;
# skos:member ex:buffaloMilk.
def code_BY_FROM_SOME(self, *, id, codeclass, genus, by, frm, some, species):
# Notebook: reference-BY-FROM-SOME.html
# ----------
lang = "DE"
ttl = [ f"""# {id} {lang}""" ]
ttl += [ f"""{ species.c() } :explanation "*{ species.c(reverse = True) }* ist eine Subklasse der Klasse *{ genus.c(reverse = True) }* ." . """ ]
# ttl += [ f"""{ genus.c() } :explanation "*{ genus.c(reverse = True) }* hat das Attribut *{reverse(by)}* ." . """ ]
ttl += [ f"""{ some.c() } :explanation "*{reverse(some)}* ist eine Subklasse der Klasse *{reverse(frm)}* ." . """ ]
# ttl += [ f"""{ genus.c() } :explanation "Wenn ein Exemplar der Klasse *{ genus.c(reverse = True) }* ein Attribut *{reverse(by)}* mit einem Wert aus der Klasse *{reverse(some)}* hat: Dann ist dieses Exemplar aus *{ genus.c(reverse = True) }* auch ein Exemplar aus *{{ species.c(reverse = True) }}*." . """ ]
# ttl += [ f"""{ genus.c() } :explanation "Ein *{ genus.c(reverse = True) }*, dessen Attribut *{reverse(by)}* auf ein Element aus der Klasse *{reverse(some)}* zeigt, ist ein *{{ species.c(reverse = True) }}*." . """ ]
ttl += [ f"""{ genus.c() } :explanation "Ein *{ genus.c(reverse = True) }*, das für das Attribut *{reverse(by)}* einen Wert aus *{reverse(some)}* hat, ist ein *{{ species.c(reverse = True) }}*." . """ ]
self.code[lang] = "\n".join(ttl)
lang = "RDFS"
ttl = [ f"""# {id} {lang}""" ,
f"{ species.c() } a owl:Class;\n rdfs:subClassOf { genus.c() } ." ]
ttl += [ f"""{ some.c() } rdfs:subClassOf { frm.c() } .""" ]
self.code[lang] = "\n".join(ttl)
restriction_technically = f"BY_SOME_{id}_restriction"
restriction_userfriendly = f"BY_{ by.e(None) }_FROM_{ frm.e(None) }_SOME_{some.e(None) }"
intersection_technically = f"BY_SOME_{id}_intersection"
intersection_userfriendly = f"({restriction_userfriendly})_INTERSECT_{ genus.e(None) }"
lang = "OWL"
ttl = [ f"""# {id} {lang}""" ,
f"""{ by.c() } rdf:type owl:ObjectProperty .""" ,
f""":{restriction_technically} a owl:class ;""" ,
f""" a owl:Restriction ;""" ,
f""" rdfs:label "{restriction_userfriendly}" ;""" ,
f""" owl:onProperty { by.c() } ;""" ,
f""" owl:someValuesFrom { some.c() } .""" ] # some
ttl += [ f""":{intersection_technically} a owl:class ;""" ,
f""" rdfs:label "{intersection_userfriendly}" ;""" ,
f""" rdfs:subClassOf { species.c() } ;""" ,
f""" owl:intersectionOf (:{restriction_technically} { genus.c() } ) .""" ] # change e.g. to species to test Test_classify_similar
self.code[lang] = "\n".join(ttl)
# allocate example instance plus test condition
lang = "OWLtest"
ttl = [ f"""# {id} {lang}""" ,
f"""{ species.e(id) } a { species.c() } .""" ,
f"""{ genus.e(id) } a { genus.c() } ;""" ,
f""" { by.c() } { some.e(id) } ;""" ,
f""" gendifs:classify_similar { species.e(id) } .""" ,
f"""{ some.e(id) } a { some.c() } .""" ]
self.code[lang] = "\n".join(ttl)
self.inspect_inferencing = Test_classify_similar(id, self.code["RDFS"] + self.code["OWL"] + self.code["OWLtest"] )
# -----------------
lang = "SKOS"
ttl = [ f"""# {id} {lang}""" ,
f"{ genus.e(None, prefix='cpt') } a skos:Concept ." ,
f"{ species.e(None, prefix='cpt') } a skos:Concept ;\n skos:broader { genus.e(None, prefix='cpt') } ." ,
f"{ frm.e(None, prefix='cpt') } a skos:Concept ." ,
f"{ some.e(None, prefix='cpt') } a skos:Concept ;\n skos:broader { frm.e(None, prefix='cpt') } .",
f"{ by.e(None, prefix='cpt') } a rdfs:Property ;\n rdfs:subPropertyOf skos:related ." ]
self.code[lang] = "\n".join(ttl)
def code_UNCONDITIONAL_SOME(self, *, id, codeclass, some, species):
# ----------
restriction_technically = f"SOME_{id}_restriction"
restriction_userfriendly = f"SOME_{ some.e(None) }"
lang = "DE"
ttl = [ f"""# {id} {lang}""" ]
ttl += [ f"""{ species.c() } :explanation "Ein beliebiges Ding, bei dem ein beliebiges Attribut einen Wert aus *{ some.c(reverse = True) }* hat, ist ein *{ species.c(reverse = True) }*." . """ ]
self.code[lang] = "\n".join(ttl)
lang = "RDFS"
ttl = [ f"""# {id} {lang}""" ,
f"""{ species.c() } a owl:Class .""" ]
self.code[lang] = "\n".join(ttl)
lang = "OWL"
ttl = [ f"""# {id} {lang}""" ,
f""":{restriction_technically} a owl:class ;""" ,
f""" rdfs:subClassOf { species.c() } ;""" ,
f""" a owl:Restriction ;""" ,
f""" rdfs:label "{restriction_userfriendly}" ;""" ,
f""" owl:onProperty owl:topObjectProperty ;""" ,
f""" owl:someValuesFrom { some.c() } .""" ] # some
self.code[lang] = "\n".join(ttl)
# allocate example instance plus test condition
lang = "OWLtest"
ttl = [ f"""# {id} {lang}""" ,
f"""{ species.e(id) } a { species.c() } .""" ,
f"""ex:thing_{id} a owl:Thing ;""" ,
f""" owl:topObjectProperty { some.e(id) } ;""" ,
f""" gendifs:classify_similar { species.e(id) } .""" ,
f"""{ some.e(id) } a { some.c() } .""" ]
self.code[lang] = "\n".join(ttl)
self.inspect_inferencing = Test_classify_similar(id, self.code["RDFS"] + self.code["OWL"] + self.code["OWLtest"] )
def code_SUP(self, *, id, codeclass, species, genus_list):
"""intersection of all genus in genus_list is subclass of secies"""
# bottom up inferencing:
# if x is a stallion, then x is a horse; and x is a male
lang = "RDFS"
ttl = [ f"""# {id} {lang}""" ,
f"{ species.c() } a owl:Class ." ]
ttl += [ f"{ genus.c() } a owl:Class.\n { species.c() } rdfs:subClassOf { genus.c() } ." for genus in genus_list ]
self.code[lang] = "\n".join(ttl)
# top down inferencing:
# if x is a horse and x is a male, then x is a stallion"""
intersection_technically = f"SUP_{id}"
sup_genus_join = " ".join([ f"{ g.c() }" for g in genus_list ])
lang = "OWL"
ttl = [ f"""# {id} {lang}""" ,
f""":{intersection_technically} a owl:class ;""" ,
f""" rdfs:subClassOf { species.c() } ;""" , # comment out to test self.inspect_inferencing
f""" owl:intersectionOf ({ sup_genus_join } ) .""" ]
self.code[lang] = "\n".join(ttl)
# we have one single example of species:
# this species example is element of all genus classes involved, but NOT element of the intersection
# (which in turn is a sublass of the species in question).
lang = "OWLtest"
ttl = [ f"""# {id} {lang}""" ]
ttl += [ f"""{ species.e(id) } a { genus.c() } .""" for genus in genus_list ] # species genus is correct here
ttl += [ f"""{ species.e(id) }_2 a { species.c() } .""" ]
ttl += [ f"""{ species.e(id) } gendifs:classify_similar { species.e(id) }_2 .""" ]
self.code[lang] = "\n".join(ttl)
self.inspect_inferencing = Test_classify_similar(id, self.code["OWL"] + self.code["OWLtest"] )
def code_ISA_SOME(self, *, id, codeclass, genus, by, some, species_list): ...
def code_REL(self, *, id, codeclass, by, some, species_list): ...
class Entity():
def __init__(self, source_gdn, text ):
self.source_gdn = source_gdn
self.text = text
self.it_is_a_uri = len(text) > 0 and text[0] == '<' and text[-1] == '>'
def __str__(self):
return self.text
def __repr__(self):
return f"""Entity({}, {self.text=})"""
# create category curi
def c(self, *, prefix = "", reverse = False):
return self.text if self.it_is_a_uri else prefix + ":" + self.uri_to_string(self.text)
# create example, i.e. an instance
def e(self, id, *, prefix = "ex", reverse = False):
myid = if id == None else id
if self.it_is_a_uri:
r = prefix + ":" + self.uri_to_string(self.text[1:-1])
if id != None: r += "_" + myid
r = prefix + ":" + self.uri_to_string(self.text)
if id != None: r += "_" + myid
return r
def uri_to_string(self, uri):
return "".join([ c if c.isalnum() else "_" for c in self.text ])
class GenDifS_Differentia(GenDifS_Node):
def __init__(self, xml_node, *, parent_gdn=None, depth=None):
super().__init__(xml_node, parent_gdn=parent_gdn, depth=depth)
def describe(self, *, indent = False):
d = self.depth if indent else 0
print(f"--- {d*' '}differentia {self.gendifs_type}: {self.dct} --- \n")
print(f"{d*' '}{self.context=} \n")
# print(f"{d*' '}{self.molecule_per_language=} \n")
print(f"{d*' '}{self.code.keys()=}\n")
print(f"{d*' '}{self.code=} \n")
print(f"{d*' '}{self.augment_dict=} \n")
#print(f"{d*' '}{(20-d)*'-'}")
def collect_context(self):
#print("367:", self.gendifs_type)
match self.gendifs_type:
case 'TAXONOMY':
self.context["codeclass"] = "TAXONOMY"
self.context["taxonomy_name"] = self.dct.get("TAXONOMY") if self.dct.get("TAXONOMY") != '' else
self.context["graph"] = f"graph_{}"
self.context["species_list"] = [ child.dct["default"] for child in self.children_gdn_list ]
self.entity_context["codeclass"] = "TAXONOMY"
self.entity_context["taxonomy_name"] = self.entities.get("TAXONOMY") if self.entities.get("TAXONOMY") != '' else Entity(self,
self.entity_context["graph"] = Entity(self, f"graph_{}")
self.entity_context["species_list"] = [ child.entities["default"] for child in self.children_gdn_list ]
case 'ISA':
self.context["codeclass"] = "ISA"
self.context["genus"] = "anything" if self.parent_gdn == None else self.parent_gdn.dct["default"]
self.context["species_list"] = [ child.dct["default"] for child in self.children_gdn_list ]
self.entity_context["codeclass"] = "ISA"
self.entity_context["genus"] = Entity(self, "anything") if self.parent_gdn == None else self.parent_gdn.entities["default"]
self.entity_context["species_list"] = [ child.entities["default"] for child in self.children_gdn_list ]
case 'BY':
self.context["codeclass"] = "BY"
self.context["genus"] = "anything" if self.parent_gdn == None else self.parent_gdn.dct["default"]
self.context["by"] = self.dct.get("BY", "anyproperty")
self.context["frm"] = self.dct.get("FROM", "anything")
self.context["species_list"] = [ child.dct["default"] for child in self.children_gdn_list ]
self.entity_context["codeclass"] = "BY"
self.entity_context["genus"] = Entity(self, "anything") if self.parent_gdn == None else self.parent_gdn.entities["default"]
self.entity_context["by"] = self.entities.get("BY", "anyproperty")
self.entity_context["frm"] = self.entities.get("FROM", "anything")
self.entity_context["species_list"] = [ child.entities["default"] for child in self.children_gdn_list ]
case 'SUP':
"""SUP: species, list of genus1, genus2 (to generate intersection)"""
self.context["codeclass"] = "SUP"
self.context["species"] = "anything" if self.parent_gdn == None else self.parent_gdn.dct.get("default", "context_SUP_NONE_species")
self.context["genus_list"] = [ child.dct["default"] for child in self.children_gdn_list ]
self.entity_context["codeclass"] = "SUP"
self.entity_context["species"] = Entity(self, "anything") if self.parent_gdn == None else self.parent_gdn.entities["default"]
self.entity_context["genus_list"] = [ child.entities["default"] for child in self.children_gdn_list ]
case 'SOME':
parent_differentia_gendifs_type = self.parent_gdn.parent_gdn.gendifs_type
parent_differentia_gendifs_type = None
match parent_differentia_gendifs_type:
case 'BY':
self.context["codeclass"] = "BY_SOME"
self.context["genus"] = self.parent_gdn.parent_gdn.parent_gdn.dct.get("default", "anything")
self.context["by"] = self.parent_gdn.parent_gdn.dct.get("BY", "anyproperty")
self.context["frm"] = self.parent_gdn.parent_gdn.dct.get("FROM", "anything")
self.context["some"] = self.dct.get("SOME", "anything")
self.context["species"] = self.parent_gdn.dct.get("default", "context_BY_SOME_NONE_species")
self.entity_context["codeclass"] = "BY_SOME"
self.entity_context["genus"] = self.parent_gdn.parent_gdn.parent_gdn.entities.get("default", Entity(self, "anything"))
self.entity_context["by"] = self.parent_gdn.parent_gdn.entities.get("BY", Entity(self, "anyproperty"))
self.entity_context["frm"] = self.parent_gdn.parent_gdn.entities.get("FROM", Entity(self, "anything"))
self.entity_context["some"] = self.entities.get("SOME", Entity(self, "anything"))
self.entity_context["species"] = self.parent_gdn.entities.get("default", Entity(self, "context_BY_SOME_NONE_species"))
case 'ISA':
self.context["codeclass"] = "BY_SOME"
self.context["genus"] = self.parent_gdn.parent_gdn.parent_gdn.dct.get("default", "anything")
self.context["by"] = "anyproperty"
self.context["frm"] = "anything"
self.context["some"] = self.dct.get("SOME", "anything")
self.context["species"] = self.parent_gdn.dct.get("default", "context_ISA_SOME_NONE_species")
self.entity_context["codeclass"] = "BY_SOME"
self.entity_context["genus"] = self.parent_gdn.parent_gdn.parent_gdn.entities.get("default", Entity(self, "anything"))
self.entity_context["by"] = Entity(self, "anyproperty")
self.entity_context["frm"] = Entity(self, "anything")
self.entity_context["some"] = self.entities.get("SOME", Entity(self, "anything"))
self.entity_context["species"] = self.parent_gdn.entities.get("default", Entity(self, "context_ISA_SOME_NONE_species"))
case _:
self.context["codeclass"] = "UNCONDITIONAL_SOME"
self.context["some"] = self.dct.get("SOME", "anything")
self.context["species"] = self.parent_gdn.dct.get("default", "context_MINIMAL_SOME_NONE_species")
self.entity_context["codeclass"] = "UNCONDITIONAL_SOME"
self.entity_context["some"] = self.entities.get("SOME", Entity(self, "anything"))
self.entity_context["species"] = self.parent_gdn.entities.get("default", Entity(self, "context_MINIMAL_SOME_NONE_species"))
case 'REL':
case 'DEF':
self.context["codeclass"] = "DEF"
self.context["genus"] = self.parent_gdn.dct["default"]
self.context["source"] = self.dct.get("DEF", "this_taxonomy")
self.context["text_list"] = [ child.text for child in self.children_gdn_list ]
self.entity_context["codeclass"] = "DEF"
self.entity_context["genus"] = self.parent_gdn.entities["default"]
self.entity_context["source"] = self.entities.get("DEF", Entity(self, "this_taxonomy"))
self.entity_context["text_list"] = [ child.text for child in self.children_gdn_list ]
case _:
if verbose >= 1:
print(f'Error 297: collect_context(): unknown gendifs_type {self.gendifs_type} in node "{self.text}"')
def augment_genus_species_DEPRECATED(self, augment_graph):
def instances(graph, category):
q = f"""SELECT ?i WHERE {{ ?i a {category} . }}"""
return { r[0].n3() for r in graph.query(q) }
if not( "genus" in self.entity_context and "species_list" in self.entity_context):
return None
genus_instances = instances(augment_graph, self.entity_context["genus"].c())
# for each species: all instances
species_instances_dict = {}
for s in self.entity_context["species_list"]:
instance_set = instances( augment_graph, s.c() )
if s.c() not in species_instances_dict:
species_instances_dict[ s.c() ] = set()
species_instances_dict[ s.c() ].update(instance_set)
if verbose >= 3:
print(f"557 {species_instances_dict=}")
# edge: borderline between data lake and land, coming from direction lake
union_of_species_instances = set()
for s in species_instances_dict.values():
#print("562", s)
union_of_species_instances.update( s )
edge = genus_instances - union_of_species_instances
if verbose >= 2:
print(f"546: {}, {edge=})")
self.augment_set["edge"] = edge
non_disjoint = {}
k = list( species_instances_dict.keys() )
for i in range(len(k)):
for j in range(i+1, len(k)):
isct = species_instances_dict[ k[i] ].intersection(species_instances_dict[ k[j] ])
if len(isct) > 0:
non_disjoint[ (k[i], k[j]) ] = isct
self.augment_set["non_disjoint"] = non_disjoint
class GenDifS_Taxon(GenDifS_Node):
def __init__(self, xml_node, *, parent_gdn=None, depth=None):
super().__init__(xml_node, parent_gdn=parent_gdn, depth=depth)
def describe(self, *, indent = False):
d = self.depth if indent else 0
print(f"{d*' '}{self.dct.get('default')}")
print(f"{d*' '}{self.context=}")
print(f"{d*' '}{(20-d)*'='}")
def collect_context(self):
self.context["codeclass"] = "taxon"
self.context["species"] = self.dct.get("default")
self.entity_context["codeclass"] = "taxon"
self.entity_context["species"] = self.entities.get("default")
def augment_genus_species_DEPRECATED(self, augment_graph):
generate ttl code#
class Test_classify_similar:
def __init__(self, id, ttl, *, source = None ):
self.ns = """
PREFIX : <urn:default#>
PREFIX ex: <urn:ex#>
PREFIX cpt: <urn:cpt#>
PREFIX gendifs: <urn:gendifs#>
PREFIX rdf: <>
PREFIX rdfs: <>
PREFIX owl: <>
self.test_errors = None
self.source = source
self.ttl = ttl
self.test_results = {}
self.g1 = rdflib.Graph()
#print(f"""684: =====\n{self.ns + self.ttl}""")
self.g1.parse(data = self.ns + self.ttl)
categories = {}
# classes of example1 before inferencing
q1 = """
SELECT ?example1 ?example2 ?category1
?example1 gendifs:classify_similar ?example2 .
?example1 a ?category1 .
c = "cat1_before_inferencing"
for example1, example2, category1 in self.g1.query(q1): # g1, q1, cat1
ex_1 = example1.encode()
ex_2 = example2.encode()
cat_1 = category1.encode()
if (ex_1, ex_2, c) not in categories:
categories[(ex_1, ex_2, c)] = set()
categories[(ex_1, ex_2, c)].add(cat_1)
# classes of example2 before inferencing
q2 = """
SELECT ?example1 ?example2 ?category2
?example1 gendifs:classify_similar ?example2 .
?example2 a ?category2 .
c = "cat2_before_inferencing"
for example1, example2, category2 in self.g1.query(q2): # g1, q2, cat2
ex_1 = example1.encode()
ex_2 = example2.encode()
cat_2 = category2.encode()
if (ex_1, ex_2, c) not in categories:
categories[(ex_1, ex_2, c)] = set()
categories[(ex_1, ex_2, c)].add(cat_2)
self.g2 = rdflib.Graph()
self.g2.parse(data = self.ns + self.ttl)
owlrl.DeductiveClosure(owlrl.OWLRL_Semantics, axiomatic_triples = False).expand(self.g2)
# Die Instanz `ex_1` ist nach dem Inferencing eine Instanz potentiell vieler Klassen `cat_1`.
# (Das gilt übrigens auch für `ex_2`.)
# Eine dieser vielen Klassen muss auch die (idealerweise einzige) Klasse `cat_2` sein,
# in der `ex_2` schon *vor* dem Inferencing enthalten ist.
c = "cat1_after_inferencing"
for example1, example2, category1 in self.g2.query(q1): # g2, q1, cat1
ex_1 = example1.encode()
ex_2 = example2.encode()
cat_1 = category1.encode()
if (ex_1, ex_2, c) not in categories:
categories[(ex_1, ex_2, c)] = set()
categories[(ex_1, ex_2, c)].add(cat_1)
d2 = {}
for (x,y,z), v in categories.items():
if (x,y) not in d2:
d2[(x,y)] = {}
if z not in d2[(x,y)]:
d2[(x,y)][z] = {}
d2[(x,y)][z] = v
for tuple, sets in d2.items():
#for k,v in sets.items():
# print(f" {k}: {v}")
self.test_results[tuple] = {}
# are cat1 and cat2 not empty, are they instances of some classes?
self.test_results[tuple]["cat1 not empty"] = sets["cat1_before_inferencing"] != set()
self.test_results[tuple]["cat2 not empty"] = sets["cat2_before_inferencing"] != set()
# are cat1_before_inferencing and cat2_before_inferencing disjoint?
self.test_results[tuple]["disjoint before"] = sets["cat1_before_inferencing"].intersection(sets["cat2_before_inferencing"]) == set()
# has cat2_before_inferencing become a subset of von cat1_after_inferencing?
self.test_results[tuple]["subset after"] = sets["cat2_before_inferencing"].intersection(sets["cat1_after_inferencing"]) == sets["cat2_before_inferencing"]
self.test_results[tuple]["ok"] = all( [ boolean for boolean in self.test_results[tuple].values() ] )
if self.test_results[tuple]["ok"]:
if verbose >= 3:
print(f"""766: Test_classify_similar OK, {id=}, {tuple=}.""")
if verbose >= 1:
print(f"""WARNING 767: Test_classify_similar FAILED, {id=},\n {self.test_results=}""")
print("-- self.g1 --")
print("-- self.g2 --")
self.test_ok = all( [ v["ok"] for v in self.test_results.values() ] )
class Integrity_Assessment:
def __init__(self, g1, taxonomy, *, inference = True, verbose = 1):
self.taxonomy = taxonomy
self.assess_dict = {}
# g1: original graph; no other classes etc., i.e. no taxonomy added
self.g1 = g1
self.verbose = verbose
# create deep copy of g1
self.g2 = rdflib.Graph().parse(data = self.g1.serialize() )
# add taxonomy t-box to g2
for language in [ "RDFS", "OWL" ]:
if language in taxonomy.rdf_graphs:
self.g2 += taxonomy.rdf_graphs[language]
# what can be inferred? compute deductive closure!
if inference: # default case
owlrl.DeductiveClosure(owlrl.OWLRL_Semantics, axiomatic_triples = False).expand(self.g2)
... # do nothing, i.e. leave g2 untouched
if self.verbose >= 2:
print(f"INFO 1323: {inference=}, {len(self.g1)=}, {len(self.g2)=}")
# perform integrity test
for gdn in taxonomy.dict_of_all_gdn.values():
# collect ttl into the result graph
self.assess_graph = rdflib.Graph()
for gdn_info in self.assess_dict.values():
for integrity_check in gdn_info.values():
if len( integrity_check ) > 0:
self.assess_graph.parse( data = self.taxonomy.ttl_namespaces + integrity_check )
if verbose >= 1:
print(f"ERROR 1337: RDFLIB complains about { integrity_check }")
def test_INTEGRITY(self, gdn):
if gdn.entity_context['codeclass'] == 'TAXONOMY':
# we do not have integrity checks for taxonomy nodes
if 'species_list' not in gdn.entity_context:
# we do have a leaf node, nothing to do
return None
self.assess_dict[] = {}
q = {} # queries
species_entities = gdn.entity_context['species_list']
genus_entity = gdn.entity_context['genus']
# instances of the genus
genus_instances = { row[0].n3() for row in self.g2.query(
f"""SELECT ?example WHERE {{?example a {genus_entity.c()} .}}""") }
# instances of all species
species_instances = {}
for category in species_entities:
q[category.text] = f"""SELECT ?example WHERE {{?example a {category.c()} .}}"""
species_instances[category.text] = { row[0].n3() for row in self.g2.query(q[category.text]) }
# edge: instances of genus, which are not an instance of at least one subclass (if a subclass is given)
# TBD: better call it the RESIDUUM?
union_of_species_instances = set()
for s in species_instances.values():
#print("1245", s)
union_of_species_instances.update( s )
edge = genus_instances - union_of_species_instances
if verbose >= 2:
print(f"1249: {}, {edge=})")
edge_name = f""":EDGE_{gdn.entity_context['id']}"""
ttl = [ f"""{edge_name} rdfs:subClassOf :EDGE ;\n rdfs:label "EDGE of {genus_entity.c()}" .""" ] # ;\n rdfs:subClassOf {genus_entity.c()}
ttl.extend( [ f"""{e} a {edge_name} .""" for e in edge ] )
self.assess_dict[]['EDGE'] = "\n".join( ttl )
if "DISJOINT" in gdn.dct:
ttl = []
len_species_entities = len(species_entities)
for i in range( len_species_entities ):
for j in range( i+1, len_species_entities ):
ci = species_entities[i].text
cj = species_entities[j].text
cij_set = species_instances[ ci ] & species_instances[ cj ]
if len(cij_set) > 0:
cij = f""":INTEGRITY_{gdn.entity_context['id']}_NOT_DISJOINT_{species_entities[i]}_AND_{species_entities[j]}"""
ttl.extend( [ f"""{cij} rdfs:label "{ci}_AND_{cj}";\n rdfs:subClassOf :INTEGRITY_NOT_DISJOINT . """ ] ) # , :{ci}, :{cj}
for example in cij_set:
ttl.append( f"{example} a {cij} .")
self.assess_dict[]['DISJOINT'] = "\n".join( ttl )
#disjoint = "\n".join( ttl )
if "MODALITY" in gdn.dct:
if "MODALITY" not in gdn.augment_dict:
print(f"ERROR 1311: MODALITY not yet implemented, {gdn.dct=}")
modalities = gdn.dct["MODALITY"].split(" ")
#print(f"1315: {modalities=}")
restriction_instances = {}
for restriction_name, restriction_class in gdn.augment_dict["MODALITY"].items():
q = f"""SELECT ?example WHERE {{?example a :{restriction_class} .}}"""
restriction_instances[restriction_name] = { row[0].n3() for row in self.g2.query(q) }
#print(f"\nXXX INFO 1325: instances of restrictions:\n{restriction_instances}")
ttl = []
# iv ... integrity violation
# * genus minus restriction_technically
# iv = f":MODALITY_{gdn.entity_context['id']}_MUST"
# iv_set = genus_instances - restriction_instances['restriction_technically']
# ttl += [ f"""{iv} rdfs:subClassOf :MODALITY . """ ]
# ttl += [ f"""{i} a {iv} .""" for i in iv_set ]
# MODALITY_MUST_property_not_given_at_all
# * {} != genus intersect restriction_by_owlthing (thus FROM == owl:thing)
iv = f":MODALITY_{gdn.entity_context['id']}_MUST_property_not_given_at_all"
iv_set = genus_instances - restriction_instances['restriction_technically_owlthing']
ttl += [ f"""{iv} rdfs:subClassOf :MODALITY . """ ]
ttl += [ f"""{i} a {iv} .""" for i in iv_set ]
self.assess_dict[]["MODALITY"] = "\n".join( ttl )
# MODALITY_MUST_property_given_but_wrong_range:
# * {} != genus intersect ( restriction_by_owlthing minus restriction_by_from )
iv = f":MODALITY_{gdn.entity_context['id']}_MUST_property_given_but_wrong_range"
iv_set = genus_instances & ( restriction_instances['restriction_technically_owlthing'] - restriction_instances['restriction_technically'] )
ttl += [ f"""{iv} rdfs:subClassOf :MODALITY . """ ]
ttl += [ f"""{i} a {iv} .""" for i in iv_set ]
# MODALITY_MUSTNOT_property_not_allowed
# * restriction_owlthing (also FROM = owl:thing) intersect genus
self.assess_dict[]["MODALITY"] = "\n".join( ttl )
def reverse(x, *, rev = True) -> str:
return x
# if rev and x[0].isupper():
# ret = x[::-1].title()
# elif rev:
# ret = x[::-1].lower()
# else:
# ret = x
# return ret
def focus(focus_curie_list, ttl): # ll ... 'RDF_RDFtest_OWL_OWLtest'
result = "\n\n".join( [ paragraph for paragraph in ttl.split("\n\n") \
if any( [ focus_curie in paragraph for focus_curie in focus_curie_list ] ) ] )
return result
def string2dict(string):
currect_token = "default"
dct = { currect_token: [] }
for token in string.split():
if token.isupper():
currect_token = token
if currect_token in dct:
if not "warning" in dct:
dct["warning"] = []
dct["warning"].append(f"warning from nodetext_to_dict: multiple facets {currect_token}")
dct[currect_token] = []
return dct