Integrity_Assessment#
Dieses Notebook: Sandbox für Regressionstest in der Entwicklung der Klasse Integrity_Assessment
.
import gd071
import rdflib
import owlrl
tbox_GenDifS = """
TAXONOMY a
Person
ISA ID 99_a DISJOINT
Man
ISA
Young_Man
Woman
ISA
Young_Woman
TAXONOMY b
Person
BY has_gender FROM Gender ID 99_b INTEGRITY disjoint must
Man
Woman
TAXONOMY c
Person
BY has_gender FROM Gender ID 99_c
Man
Woman
"""
abox = """
# default prefixes from the respective taxonomy are added here
# interesting in taxonomy `a`:
# infer that a young man also is a man etc.
:Adam_Boy a :Young_Man .
:Eve_Girl a :Young_Woman .
:Angel_Child a :Young_Man , :Young_Woman .
# taxonomy b
:Adam a :Man .
:Eve a :Woman .
:Angel a :Man , :Woman .
# interesting in taxonomy `c`
# Donald is a Person; Donald has gender m; m is a Male gender
# thus Donald can be classified as a Man
:Donald a :Person.
:Donald :has_gender :m .
:m a :Male .
# Nelson also has gender m. However, Nelson is not a person.
# thus Nelson should not beeing classified as a Man
:Nelson a :Dog .
:Nelson :has_gender :m .
# Jessica is also a Person. But we do not know her gender.
:Jessica a :Person.
# Charly is a Person
"""
m = gd071.GenDifS(from_string=tbox_GenDifS)
# from lxml import etree
# print(etree.tostring(m.mindmap_xml, pretty_print=True, encoding="unicode"))
m.compile()
ISA 544: species_list=[Entity(self.source_gdn.id='5', self.text='Man'), Entity(self.source_gdn.id='8', self.text='Woman')]
ISA 544: species_list=[Entity(self.source_gdn.id='7', self.text='Young_Man')]
ISA 544: species_list=[Entity(self.source_gdn.id='10', self.text='Young_Woman')]
m.taxonomies_by_name
{'a': <gd071.GenDifS_Taxonomy at 0x7bb779bdda30>,
'b': <gd071.GenDifS_Taxonomy at 0x7bb778085c40>,
'c': <gd071.GenDifS_Taxonomy at 0x7bb772195970>}
Taxonomy a
#
taxonomy_a = m.taxonomies_by_name["a"]
type( taxonomy_a )
gd071.GenDifS_Taxonomy
taxonomy_a.describe()
2 TAXONOMY a: #9 GenDifS nodes
print(taxonomy_a.ttl_namespaces)
@prefix ex: <http://example.net/namespace/ex#> .
@prefix cpt: <http://example.net/namespace/cpt#> .
@prefix sheet: <http://example.net/namespace/sheet#> .
@prefix : <http://example.net/namespace/default#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix gendifs: <http://jbusse.de/gendifs#> .
print(taxonomy_a.rdf_graphs["RDFS"].serialize() )
@prefix : <http://example.net/namespace/default#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
:Young_Man a owl:Class ;
rdfs:subClassOf :Man .
:Young_Woman a owl:Class ;
rdfs:subClassOf :Woman .
:Man a owl:Class ;
rdfs:subClassOf :Person .
:Woman a owl:Class ;
rdfs:subClassOf :Person .
:Person a owl:Class .
Knoten mit der ID 99: gdn_a99
#
gdn … GenDifS node
gdn_a99 = taxonomy_a.dict_of_all_gdn["99_a"]
type( gdn_a99 )
gd071.GenDifS_Differentia
gdn_a99.dct
{'ISA': '', 'ID': '99_a', 'DISJOINT': ''}
gdn_a99.describe()
--- differentia ISA: {'ISA': '', 'ID': '99_a', 'DISJOINT': ''} ---
self.context={'id': '99_a', 'codeclass': 'ISA', 'genus': 'Person', 'species_list': ['Man', 'Woman']}
self.code.keys()=dict_keys(['RDFS', 'RDFStest', 'SKOS'])
self.code={'RDFS': '# 99_a RDFS\n:Person a owl:Class .\n:Man a owl:Class;\n rdfs:subClassOf :Person .\n:Woman a owl:Class;\n rdfs:subClassOf :Person .', 'RDFStest': '# 99_a RDFStest\nex:Person_99_a a :Person .\nex:Man_99_a a :Man .\nex:Woman_99_a a :Woman .\nex:Man_99_a gendifs:classify_similar ex:Person_99_a .\nex:Woman_99_a gendifs:classify_similar ex:Person_99_a .', 'SKOS': '# 99_a SKOS\ncpt:Person a skos:Concept .\ncpt:Man a skos:Concept ;\n skos:broader cpt:Person .\ncpt:Woman a skos:Concept ;\n skos:broader cpt:Person .'}
self.augment_dict={}
gdn_a99.context
{'id': '99_a',
'codeclass': 'ISA',
'genus': 'Person',
'species_list': ['Man', 'Woman']}
gdn_a99.entity_context
{'id': '99_a',
'codeclass': 'ISA',
'genus': Entity(self.source_gdn.id='3', self.text='Person'),
'species_list': [Entity(self.source_gdn.id='5', self.text='Man'),
Entity(self.source_gdn.id='8', self.text='Woman')]}
gdn_a99.entity_context['species_list']
[Entity(self.source_gdn.id='5', self.text='Man'),
Entity(self.source_gdn.id='8', self.text='Woman')]
#namespaces
ns = taxonomy_a.ttl_namespaces
print(ns)
@prefix ex: <http://example.net/namespace/ex#> .
@prefix cpt: <http://example.net/namespace/cpt#> .
@prefix sheet: <http://example.net/namespace/sheet#> .
@prefix : <http://example.net/namespace/default#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix gendifs: <http://jbusse.de/gendifs#> .
g_a = rdflib.Graph() # .parse(data=taxonomy_a.rdf_graphs_ttl["RDFS"]) # parse T-box
g_a.parse(data=ns+abox)
print( g_a.serialize() )
@prefix : <http://example.net/namespace/default#> .
:Adam a :Man .
:Adam_Boy a :Young_Man .
:Angel a :Man,
:Woman .
:Angel_Child a :Young_Man,
:Young_Woman .
:Donald a :Person ;
:has_gender :m .
:Eve a :Woman .
:Eve_Girl a :Young_Woman .
:Jessica a :Person .
:Nelson a :Dog ;
:has_gender :m .
:m a :Male .
integrity_a = gd071.Integrity_Assessment(g1 = g_a, taxonomy=taxonomy_a, verbose=2 ) # default: inference=True
INFO 1323: inference=True, len(self.g1)=14, len(self.g2)=187
integrity_a.assess_dict
{'99_a': {'EDGE': ':EDGE_99_a rdfs:subClassOf :EDGE ;\n rdfs:label "EDGE of :Person" .\n<http://example.net/namespace/default#Jessica> a :EDGE_99_a .\n<http://example.net/namespace/default#Donald> a :EDGE_99_a .',
'DISJOINT': ':INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 rdfs:label "Man_AND_Woman";\n rdfs:subClassOf :INTEGRITY_NOT_DISJOINT . \n<http://example.net/namespace/default#Angel_Child> a :INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 .\n<http://example.net/namespace/default#Angel> a :INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 .'},
'6': {'EDGE': ':EDGE_6 rdfs:subClassOf :EDGE ;\n rdfs:label "EDGE of :Man" .\n<http://example.net/namespace/default#Adam> a :EDGE_6 .\n<http://example.net/namespace/default#Angel> a :EDGE_6 .'},
'9': {'EDGE': ':EDGE_9 rdfs:subClassOf :EDGE ;\n rdfs:label "EDGE of :Woman" .\n<http://example.net/namespace/default#Eve> a :EDGE_9 .\n<http://example.net/namespace/default#Angel> a :EDGE_9 .'}}
print(integrity_a.assess_graph.serialize())
@prefix : <http://example.net/namespace/default#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
:Adam a :EDGE_6 .
:Angel a :EDGE_6,
:EDGE_9,
:INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 .
:Angel_Child a :INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 .
:Donald a :EDGE_99_a .
:Eve a :EDGE_9 .
:Jessica a :EDGE_99_a .
:EDGE_6 rdfs:label "EDGE of :Man" ;
rdfs:subClassOf :EDGE .
:EDGE_9 rdfs:label "EDGE of :Woman" ;
rdfs:subClassOf :EDGE .
:EDGE_99_a rdfs:label "EDGE of :Person" ;
rdfs:subClassOf :EDGE .
:INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 rdfs:label "Man_AND_Woman" ;
rdfs:subClassOf :INTEGRITY_NOT_DISJOINT .
Taxonomy b
#
taxonomy_b = m.taxonomies_by_name["b"]
taxonomy_b.describe()
12 TAXONOMY b: #5 GenDifS nodes
ns = taxonomy_b.ttl_namespaces
taxonomy_b
<gd071.GenDifS_Taxonomy at 0x7bb778085c40>
g_b = rdflib.Graph() # .parse(data=taxonomy_b.rdf_graphs_ttl["RDFS"]) # parse T-box
g_b.parse(data=ns+abox)
<Graph identifier=Nf9137dcb7ca04df688fb822a513fce04 (<class 'rdflib.graph.Graph'>)>
integrity_b = gd071.Integrity_Assessment(g1 = g_b, taxonomy=taxonomy_b, verbose=2 ) # default: inference=True
INFO 1323: inference=True, len(self.g1)=14, len(self.g2)=212
# print( integrity_b.g2.serialize() )
integrity_b.assess_dict
{'99_b': {'EDGE': ':EDGE_99_b rdfs:subClassOf :EDGE ;\n rdfs:label "EDGE of :Person" .\n<http://example.net/namespace/default#Jessica> a :EDGE_99_b .\n<http://example.net/namespace/default#Donald> a :EDGE_99_b .'}}
print( integrity_b.assess_graph.serialize() )
@prefix : <http://example.net/namespace/default#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
:Donald a :EDGE_99_b .
:Jessica a :EDGE_99_b .
:EDGE_99_b rdfs:label "EDGE of :Person" ;
rdfs:subClassOf :EDGE .
Taxonomy c#
taxonomy_c = m.taxonomies_by_name["c"]
taxonomy_c.describe()
18 TAXONOMY c: #5 GenDifS nodes
print( taxonomy_c.rdf_graphs["RDFS"].serialize() )
@prefix : <http://example.net/namespace/default#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
:Man a owl:Class ;
rdfs:subClassOf :Person .
:Woman a owl:Class ;
rdfs:subClassOf :Person .
:Person a owl:Class .
print( taxonomy_c.rdf_graphs["OWL"].serialize() )
@prefix : <http://example.net/namespace/default#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
:BY_99_c_intersection a owl:class ;
rdfs:label "BY_ex:has_gender_FROM_ex:Gender_INTERSECT_ex:Person" ;
rdfs:subClassOf :Person ;
owl:intersectionOf ( :BY_99_c_restriction :Person ) .
:BY_99_c_restriction_owlthing a owl:Restriction,
owl:class ;
rdfs:label "BY_ex:has_gender_FROM_owlthing" ;
owl:onProperty :has_gender ;
owl:someValuesFrom owl:Thing .
:BY_99_c_restriction a owl:Restriction,
owl:class ;
rdfs:label "BY_ex:has_gender_FROM_ex:Gender" ;
owl:onProperty :has_gender ;
owl:someValuesFrom :Gender .
:has_gender a owl:ObjectProperty .
gdn_99_c = taxonomy_c.dict_of_all_gdn["99_c"]
gdn_99_c.describe()
--- differentia BY: {'BY': 'has_gender', 'FROM': 'Gender', 'ID': '99_c'} ---
self.context={'id': '99_c', 'codeclass': 'BY', 'genus': 'Person', 'by': 'has_gender', 'frm': 'Gender', 'species_list': ['Man', 'Woman']}
self.code.keys()=dict_keys(['RDFS', 'OWL', 'OWLtest', 'SKOS'])
self.code={'RDFS': ':Man a owl:Class;\n rdfs:subClassOf :Person .\n:Woman a owl:Class;\n rdfs:subClassOf :Person .', 'OWL': '# 99_c OWL\n:has_gender rdf:type owl:ObjectProperty .\n:BY_99_c_restriction_owlthing a owl:class ;\n a owl:Restriction ;\n rdfs:label "BY_ex:has_gender_FROM_owlthing" ;\n owl:onProperty :has_gender ;\n owl:someValuesFrom owl:Thing .\n:BY_99_c_restriction a owl:class ;\n a owl:Restriction ;\n rdfs:label "BY_ex:has_gender_FROM_ex:Gender" ;\n owl:onProperty :has_gender ;\n owl:someValuesFrom :Gender .\n:BY_99_c_intersection a owl:class ;\n rdfs:label "BY_ex:has_gender_FROM_ex:Gender_INTERSECT_ex:Person" ;\n rdfs:subClassOf :Person ;\n owl:intersectionOf (:BY_99_c_restriction :Person ) .', 'OWLtest': '# 99_c OWLtest\nex:BY_99_c_restriction_example a :BY_99_c_restriction .\nex:Person_99_c a :Person ;\n :has_gender ex:Gender_99_c ;\n gendifs:classify_similar ex:BY_99_c_restriction_example .\nex:Gender_99_c a :Gender .', 'SKOS': '# 99_c SKOS\ncpt:Person a skos:Concept .\ncpt:BY_99_c_intersection a skos:Collection ;\n rdfs:label "cpt:Person BY cpt:has_gender FROM cpt:Gender" .\ncpt:Man a skos:Concept ;\n skos:broader cpt:Person .\ncpt:Woman a skos:Concept ;\n skos:broader cpt:Person .\ncpt:BY_99_c_intersection skos:member cpt:Man .\ncpt:BY_99_c_intersection skos:member cpt:Woman .\ncpt:has_gender a rdfs:Property ;\n rdfs:subPropertyOf skos:related .'}
self.augment_dict={'MODALITY': {'restriction_technically_owlthing': 'BY_99_c_restriction_owlthing', 'restriction_technically': 'BY_99_c_restriction', 'intersection_technically': 'BY_99_c_intersection'}}
gdn_99_c.augment_dict
{'MODALITY': {'restriction_technically_owlthing': 'BY_99_c_restriction_owlthing',
'restriction_technically': 'BY_99_c_restriction',
'intersection_technically': 'BY_99_c_intersection'}}
g_c = rdflib.Graph()
g_c.parse(data=ns+abox)
<Graph identifier=N88663ae3ee47407881b8cfd4610dac5b (<class 'rdflib.graph.Graph'>)>
integrity_c = gd071.Integrity_Assessment(g1 = g_c, taxonomy=taxonomy_c, verbose=2 )
INFO 1323: inference=True, len(self.g1)=14, len(self.g2)=212
Ziegenkuhmilch#
Wir erstellen zunächst eine T-Box.
import gd071
import rdflib
tbox_GenDifS = """
TAXONOMY d
Nahrungsmittel
ISA
Milch
BY hat_Naehrwert FROM Naehrwert MODALITY must DISJOINT
Ziegenmilch
SOME Guter_Naehrwert
Kuhmilch
"""
m = gd071.GenDifS(from_string=tbox_GenDifS)
m.compile()
m.taxonomies_by_name
ISA 544: species_list=[Entity(self.source_gdn.id='5', self.text='Milch')]
{'d': <gd071.GenDifS_Taxonomy at 0x7bb770c00800>}
taxonomy_d = m.taxonomies_by_name["d"]
ns = taxonomy_d.ttl_namespaces
Wir legen dann ausgewählte Instanzen an. Um Instanzen und Klassen schon an der Struktur ihrer URIS unterscheiden zu können, haben alle Instanzen den Namen der Klasse, ergänzt durch eine laufende Nummer, also z.B. :Milch_1
.
abox_d = ns + """
:Milch_1 a :Milch .
# kein Nährwert gegeben für Ziegenmilch_1
# Integritätsverletzung, falls Naehrwert gegeben sein muss!
:Ziegenmilch_1 a :Ziegenmilch .
# Nährwert gegeben für Ziegenmilch_2
# mit korrektem FROM
:Ziegenmilch_2 a :Ziegenmilch ;
:hat_Naehrwert :Guter_Naehrwert_2 .
:Guter_Naehrwert_2 a :Guter_Naehrwert .
# Mehrwert gegeben, aber falsches FROM
:Ziegenmilch_3 a :Ziegenmilch ;
:hat_Naehrwert :Schnaps_3 .
:Schnaps_3 a :Schnaps .
# eine hybride Milch gibt es nicht
:Ziegenkuhmilch_4 a :Ziegenmilch, :Kuhmilch .
"""
Wir lesen diese A-Box in den g_d
ein.
g_d = rdflib.Graph().parse(data=abox_d)
#print(f"{len(g_d)=} triples")
integrity_d = gd071.Integrity_Assessment(g1 = g_d, taxonomy=m.taxonomies_by_name["d"], verbose=1 ) # default: inference=True
#integrity_d.assess_dict
print( integrity_d.assess_graph.serialize() )
@prefix : <http://example.net/namespace/default#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
:EDGE_4 rdfs:label "EDGE of :Nahrungsmittel" ;
rdfs:subClassOf :EDGE .
:Milch_1 a :EDGE_6,
:MODALITY_6_MUST_property_not_given_at_all .
:Ziegenkuhmilch_4 a :INTEGRITY_6_NOT_DISJOINT_7_AND_9,
:MODALITY_6_MUST_property_not_given_at_all .
:Ziegenmilch_1 a :MODALITY_6_MUST_property_not_given_at_all .
:Ziegenmilch_3 a :MODALITY_6_MUST_property_given_but_wrong_range .
:EDGE_6 rdfs:label "EDGE of :Milch" ;
rdfs:subClassOf :EDGE .
:INTEGRITY_6_NOT_DISJOINT_7_AND_9 rdfs:label "Ziegenmilch_AND_Kuhmilch" ;
rdfs:subClassOf :INTEGRITY_NOT_DISJOINT .
:MODALITY_6_MUST_property_given_but_wrong_range rdfs:subClassOf :MODALITY .
:MODALITY_6_MUST_property_not_given_at_all rdfs:subClassOf :MODALITY .