Integrity_Assessment#

Dieses Notebook: Sandbox für Regressionstest in der Entwicklung der Klasse Integrity_Assessment.

import gd071
import rdflib
import owlrl
tbox_GenDifS = """
TAXONOMY a
  Person
    ISA ID 99_a  DISJOINT 
      Man
        ISA
          Young_Man           
      Woman
        ISA
          Young_Woman

TAXONOMY b
  Person
    BY has_gender FROM Gender ID 99_b INTEGRITY disjoint must
      Man
      Woman

TAXONOMY c
  Person
    BY has_gender FROM Gender ID 99_c 
      Man
      Woman
"""
abox = """
# default prefixes from the respective taxonomy are added here


# interesting in taxonomy `a`:
# infer that a young man also is a man etc.
:Adam_Boy a :Young_Man .
:Eve_Girl a :Young_Woman .
:Angel_Child a :Young_Man , :Young_Woman .

# taxonomy b
:Adam a :Man .
:Eve a :Woman .
:Angel a :Man , :Woman .


# interesting in taxonomy `c`

# Donald is a Person; Donald has gender m; m is a Male gender
# thus Donald can be classified as a Man
:Donald a :Person.
:Donald :has_gender :m .
:m a :Male .

# Nelson also has gender m. However, Nelson is not a person.
# thus Nelson should not beeing classified as a Man
:Nelson a :Dog .
:Nelson :has_gender :m .

# Jessica is also a Person. But we do not know her gender.
:Jessica a :Person.

# Charly is a Person
"""
m = gd071.GenDifS(from_string=tbox_GenDifS)
# from lxml import etree
# print(etree.tostring(m.mindmap_xml, pretty_print=True, encoding="unicode"))
m.compile()
ISA 544: species_list=[Entity(self.source_gdn.id='5', self.text='Man'), Entity(self.source_gdn.id='8', self.text='Woman')]
ISA 544: species_list=[Entity(self.source_gdn.id='7', self.text='Young_Man')]
ISA 544: species_list=[Entity(self.source_gdn.id='10', self.text='Young_Woman')]
m.taxonomies_by_name
{'a': <gd071.GenDifS_Taxonomy at 0x7bb779bdda30>,
 'b': <gd071.GenDifS_Taxonomy at 0x7bb778085c40>,
 'c': <gd071.GenDifS_Taxonomy at 0x7bb772195970>}

Taxonomy a#

taxonomy_a = m.taxonomies_by_name["a"]
type( taxonomy_a )
gd071.GenDifS_Taxonomy
taxonomy_a.describe()
2 TAXONOMY a: #9 GenDifS nodes
print(taxonomy_a.ttl_namespaces)
@prefix ex: <http://example.net/namespace/ex#> .
@prefix cpt: <http://example.net/namespace/cpt#> .
@prefix sheet: <http://example.net/namespace/sheet#> .
@prefix : <http://example.net/namespace/default#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix gendifs: <http://jbusse.de/gendifs#> .
print(taxonomy_a.rdf_graphs["RDFS"].serialize() )
@prefix : <http://example.net/namespace/default#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

:Young_Man a owl:Class ;
    rdfs:subClassOf :Man .

:Young_Woman a owl:Class ;
    rdfs:subClassOf :Woman .

:Man a owl:Class ;
    rdfs:subClassOf :Person .

:Woman a owl:Class ;
    rdfs:subClassOf :Person .

:Person a owl:Class .

Knoten mit der ID 99: gdn_a99#

gdn … GenDifS node

gdn_a99 = taxonomy_a.dict_of_all_gdn["99_a"]
type( gdn_a99 )
gd071.GenDifS_Differentia
gdn_a99.dct
{'ISA': '', 'ID': '99_a', 'DISJOINT': ''}
gdn_a99.describe()
--- differentia ISA: {'ISA': '', 'ID': '99_a', 'DISJOINT': ''} --- 

self.context={'id': '99_a', 'codeclass': 'ISA', 'genus': 'Person', 'species_list': ['Man', 'Woman']} 

self.code.keys()=dict_keys(['RDFS', 'RDFStest', 'SKOS'])

self.code={'RDFS': '# 99_a RDFS\n:Person a owl:Class .\n:Man a owl:Class;\n   rdfs:subClassOf :Person .\n:Woman a owl:Class;\n   rdfs:subClassOf :Person .', 'RDFStest': '# 99_a RDFStest\nex:Person_99_a a :Person .\nex:Man_99_a a :Man .\nex:Woman_99_a a :Woman .\nex:Man_99_a gendifs:classify_similar ex:Person_99_a .\nex:Woman_99_a gendifs:classify_similar ex:Person_99_a .', 'SKOS': '# 99_a SKOS\ncpt:Person a skos:Concept .\ncpt:Man a skos:Concept ;\n   skos:broader cpt:Person .\ncpt:Woman a skos:Concept ;\n   skos:broader cpt:Person .'} 

self.augment_dict={} 
gdn_a99.context
{'id': '99_a',
 'codeclass': 'ISA',
 'genus': 'Person',
 'species_list': ['Man', 'Woman']}
gdn_a99.entity_context
{'id': '99_a',
 'codeclass': 'ISA',
 'genus': Entity(self.source_gdn.id='3', self.text='Person'),
 'species_list': [Entity(self.source_gdn.id='5', self.text='Man'),
  Entity(self.source_gdn.id='8', self.text='Woman')]}
gdn_a99.entity_context['species_list']
[Entity(self.source_gdn.id='5', self.text='Man'),
 Entity(self.source_gdn.id='8', self.text='Woman')]
#namespaces
ns = taxonomy_a.ttl_namespaces
print(ns)
@prefix ex: <http://example.net/namespace/ex#> .
@prefix cpt: <http://example.net/namespace/cpt#> .
@prefix sheet: <http://example.net/namespace/sheet#> .
@prefix : <http://example.net/namespace/default#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix gendifs: <http://jbusse.de/gendifs#> .
g_a = rdflib.Graph() # .parse(data=taxonomy_a.rdf_graphs_ttl["RDFS"]) # parse T-box
g_a.parse(data=ns+abox)
print( g_a.serialize() )
@prefix : <http://example.net/namespace/default#> .

:Adam a :Man .

:Adam_Boy a :Young_Man .

:Angel a :Man,
        :Woman .

:Angel_Child a :Young_Man,
        :Young_Woman .

:Donald a :Person ;
    :has_gender :m .

:Eve a :Woman .

:Eve_Girl a :Young_Woman .

:Jessica a :Person .

:Nelson a :Dog ;
    :has_gender :m .

:m a :Male .
integrity_a = gd071.Integrity_Assessment(g1 = g_a, taxonomy=taxonomy_a, verbose=2 ) # default: inference=True
INFO 1323: inference=True, len(self.g1)=14, len(self.g2)=187
integrity_a.assess_dict
{'99_a': {'EDGE': ':EDGE_99_a rdfs:subClassOf :EDGE ;\n   rdfs:label "EDGE of :Person"  .\n<http://example.net/namespace/default#Jessica> a :EDGE_99_a .\n<http://example.net/namespace/default#Donald> a :EDGE_99_a .',
  'DISJOINT': ':INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 rdfs:label "Man_AND_Woman";\n rdfs:subClassOf :INTEGRITY_NOT_DISJOINT . \n<http://example.net/namespace/default#Angel_Child> a :INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 .\n<http://example.net/namespace/default#Angel> a :INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 .'},
 '6': {'EDGE': ':EDGE_6 rdfs:subClassOf :EDGE ;\n   rdfs:label "EDGE of :Man"  .\n<http://example.net/namespace/default#Adam> a :EDGE_6 .\n<http://example.net/namespace/default#Angel> a :EDGE_6 .'},
 '9': {'EDGE': ':EDGE_9 rdfs:subClassOf :EDGE ;\n   rdfs:label "EDGE of :Woman"  .\n<http://example.net/namespace/default#Eve> a :EDGE_9 .\n<http://example.net/namespace/default#Angel> a :EDGE_9 .'}}
print(integrity_a.assess_graph.serialize())
@prefix : <http://example.net/namespace/default#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

:Adam a :EDGE_6 .

:Angel a :EDGE_6,
        :EDGE_9,
        :INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 .

:Angel_Child a :INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 .

:Donald a :EDGE_99_a .

:Eve a :EDGE_9 .

:Jessica a :EDGE_99_a .

:EDGE_6 rdfs:label "EDGE of :Man" ;
    rdfs:subClassOf :EDGE .

:EDGE_9 rdfs:label "EDGE of :Woman" ;
    rdfs:subClassOf :EDGE .

:EDGE_99_a rdfs:label "EDGE of :Person" ;
    rdfs:subClassOf :EDGE .

:INTEGRITY_99_a_NOT_DISJOINT_5_AND_8 rdfs:label "Man_AND_Woman" ;
    rdfs:subClassOf :INTEGRITY_NOT_DISJOINT .

Taxonomy b#

taxonomy_b = m.taxonomies_by_name["b"]
taxonomy_b.describe()
12 TAXONOMY b: #5 GenDifS nodes
ns = taxonomy_b.ttl_namespaces
taxonomy_b
<gd071.GenDifS_Taxonomy at 0x7bb778085c40>
g_b = rdflib.Graph() # .parse(data=taxonomy_b.rdf_graphs_ttl["RDFS"]) # parse T-box
g_b.parse(data=ns+abox)
<Graph identifier=Nf9137dcb7ca04df688fb822a513fce04 (<class 'rdflib.graph.Graph'>)>
integrity_b = gd071.Integrity_Assessment(g1 = g_b, taxonomy=taxonomy_b, verbose=2 ) # default: inference=True
INFO 1323: inference=True, len(self.g1)=14, len(self.g2)=212

# print( integrity_b.g2.serialize() )
integrity_b.assess_dict
{'99_b': {'EDGE': ':EDGE_99_b rdfs:subClassOf :EDGE ;\n   rdfs:label "EDGE of :Person"  .\n<http://example.net/namespace/default#Jessica> a :EDGE_99_b .\n<http://example.net/namespace/default#Donald> a :EDGE_99_b .'}}
print( integrity_b.assess_graph.serialize() )
@prefix : <http://example.net/namespace/default#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

:Donald a :EDGE_99_b .

:Jessica a :EDGE_99_b .

:EDGE_99_b rdfs:label "EDGE of :Person" ;
    rdfs:subClassOf :EDGE .

Taxonomy c#

taxonomy_c = m.taxonomies_by_name["c"]
taxonomy_c.describe()
18 TAXONOMY c: #5 GenDifS nodes
print( taxonomy_c.rdf_graphs["RDFS"].serialize() )
@prefix : <http://example.net/namespace/default#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

:Man a owl:Class ;
    rdfs:subClassOf :Person .

:Woman a owl:Class ;
    rdfs:subClassOf :Person .

:Person a owl:Class .
print( taxonomy_c.rdf_graphs["OWL"].serialize() )
@prefix : <http://example.net/namespace/default#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

:BY_99_c_intersection a owl:class ;
    rdfs:label "BY_ex:has_gender_FROM_ex:Gender_INTERSECT_ex:Person" ;
    rdfs:subClassOf :Person ;
    owl:intersectionOf ( :BY_99_c_restriction :Person ) .

:BY_99_c_restriction_owlthing a owl:Restriction,
        owl:class ;
    rdfs:label "BY_ex:has_gender_FROM_owlthing" ;
    owl:onProperty :has_gender ;
    owl:someValuesFrom owl:Thing .

:BY_99_c_restriction a owl:Restriction,
        owl:class ;
    rdfs:label "BY_ex:has_gender_FROM_ex:Gender" ;
    owl:onProperty :has_gender ;
    owl:someValuesFrom :Gender .

:has_gender a owl:ObjectProperty .
gdn_99_c = taxonomy_c.dict_of_all_gdn["99_c"]
gdn_99_c.describe()
--- differentia BY: {'BY': 'has_gender', 'FROM': 'Gender', 'ID': '99_c'} --- 

self.context={'id': '99_c', 'codeclass': 'BY', 'genus': 'Person', 'by': 'has_gender', 'frm': 'Gender', 'species_list': ['Man', 'Woman']} 

self.code.keys()=dict_keys(['RDFS', 'OWL', 'OWLtest', 'SKOS'])

self.code={'RDFS': ':Man a owl:Class;\n   rdfs:subClassOf :Person .\n:Woman a owl:Class;\n   rdfs:subClassOf :Person .', 'OWL': '# 99_c OWL\n:has_gender rdf:type owl:ObjectProperty .\n:BY_99_c_restriction_owlthing a owl:class ;\n   a owl:Restriction ;\n   rdfs:label "BY_ex:has_gender_FROM_owlthing" ;\n   owl:onProperty :has_gender ;\n   owl:someValuesFrom owl:Thing .\n:BY_99_c_restriction a owl:class ;\n   a owl:Restriction ;\n   rdfs:label "BY_ex:has_gender_FROM_ex:Gender" ;\n   owl:onProperty :has_gender ;\n   owl:someValuesFrom :Gender .\n:BY_99_c_intersection a owl:class ;\n   rdfs:label "BY_ex:has_gender_FROM_ex:Gender_INTERSECT_ex:Person" ;\n   rdfs:subClassOf :Person ;\n   owl:intersectionOf (:BY_99_c_restriction :Person ) .', 'OWLtest': '# 99_c OWLtest\nex:BY_99_c_restriction_example a :BY_99_c_restriction .\nex:Person_99_c a :Person ;\n   :has_gender ex:Gender_99_c ;\n   gendifs:classify_similar ex:BY_99_c_restriction_example .\nex:Gender_99_c a :Gender .', 'SKOS': '# 99_c SKOS\ncpt:Person a skos:Concept .\ncpt:BY_99_c_intersection a skos:Collection ;\n   rdfs:label "cpt:Person BY cpt:has_gender FROM cpt:Gender" .\ncpt:Man a skos:Concept ;\n   skos:broader cpt:Person .\ncpt:Woman a skos:Concept ;\n   skos:broader cpt:Person .\ncpt:BY_99_c_intersection skos:member cpt:Man .\ncpt:BY_99_c_intersection skos:member cpt:Woman .\ncpt:has_gender a rdfs:Property ;\n   rdfs:subPropertyOf skos:related .'} 

self.augment_dict={'MODALITY': {'restriction_technically_owlthing': 'BY_99_c_restriction_owlthing', 'restriction_technically': 'BY_99_c_restriction', 'intersection_technically': 'BY_99_c_intersection'}} 
gdn_99_c.augment_dict
{'MODALITY': {'restriction_technically_owlthing': 'BY_99_c_restriction_owlthing',
  'restriction_technically': 'BY_99_c_restriction',
  'intersection_technically': 'BY_99_c_intersection'}}
g_c = rdflib.Graph()
g_c.parse(data=ns+abox)
<Graph identifier=N88663ae3ee47407881b8cfd4610dac5b (<class 'rdflib.graph.Graph'>)>
integrity_c = gd071.Integrity_Assessment(g1 = g_c, taxonomy=taxonomy_c, verbose=2 )
INFO 1323: inference=True, len(self.g1)=14, len(self.g2)=212

Ziegenkuhmilch#

Wir erstellen zunächst eine T-Box.

import gd071
import rdflib
tbox_GenDifS = """
TAXONOMY d
  Nahrungsmittel
    ISA
      Milch
        BY hat_Naehrwert FROM Naehrwert MODALITY must DISJOINT
          Ziegenmilch
            SOME Guter_Naehrwert
          Kuhmilch
"""
m = gd071.GenDifS(from_string=tbox_GenDifS)
m.compile()
m.taxonomies_by_name
ISA 544: species_list=[Entity(self.source_gdn.id='5', self.text='Milch')]
{'d': <gd071.GenDifS_Taxonomy at 0x7bb770c00800>}
taxonomy_d = m.taxonomies_by_name["d"]
ns = taxonomy_d.ttl_namespaces

Wir legen dann ausgewählte Instanzen an. Um Instanzen und Klassen schon an der Struktur ihrer URIS unterscheiden zu können, haben alle Instanzen den Namen der Klasse, ergänzt durch eine laufende Nummer, also z.B. :Milch_1.

abox_d = ns + """
:Milch_1 a :Milch .

# kein Nährwert gegeben für Ziegenmilch_1
# Integritätsverletzung, falls Naehrwert gegeben sein muss!
:Ziegenmilch_1 a :Ziegenmilch .

# Nährwert gegeben für Ziegenmilch_2
# mit korrektem FROM
:Ziegenmilch_2 a :Ziegenmilch ;
   :hat_Naehrwert :Guter_Naehrwert_2 .
:Guter_Naehrwert_2 a :Guter_Naehrwert .

# Mehrwert gegeben, aber falsches FROM
:Ziegenmilch_3 a :Ziegenmilch ;
   :hat_Naehrwert :Schnaps_3 .
:Schnaps_3 a :Schnaps .

# eine hybride Milch gibt es nicht
:Ziegenkuhmilch_4 a :Ziegenmilch, :Kuhmilch .
"""

Wir lesen diese A-Box in den g_d ein.

g_d = rdflib.Graph().parse(data=abox_d)
#print(f"{len(g_d)=} triples")
integrity_d = gd071.Integrity_Assessment(g1 = g_d, taxonomy=m.taxonomies_by_name["d"], verbose=1 ) # default: inference=True
#integrity_d.assess_dict
print( integrity_d.assess_graph.serialize() )
@prefix : <http://example.net/namespace/default#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

:EDGE_4 rdfs:label "EDGE of :Nahrungsmittel" ;
    rdfs:subClassOf :EDGE .

:Milch_1 a :EDGE_6,
        :MODALITY_6_MUST_property_not_given_at_all .

:Ziegenkuhmilch_4 a :INTEGRITY_6_NOT_DISJOINT_7_AND_9,
        :MODALITY_6_MUST_property_not_given_at_all .

:Ziegenmilch_1 a :MODALITY_6_MUST_property_not_given_at_all .

:Ziegenmilch_3 a :MODALITY_6_MUST_property_given_but_wrong_range .

:EDGE_6 rdfs:label "EDGE of :Milch" ;
    rdfs:subClassOf :EDGE .

:INTEGRITY_6_NOT_DISJOINT_7_AND_9 rdfs:label "Ziegenmilch_AND_Kuhmilch" ;
    rdfs:subClassOf :INTEGRITY_NOT_DISJOINT .

:MODALITY_6_MUST_property_given_but_wrong_range rdfs:subClassOf :MODALITY .

:MODALITY_6_MUST_property_not_given_at_all rdfs:subClassOf :MODALITY .