Data Quality Assessment Reports
From SemWebQuality.org
Data Quality Scores can be used to get a quick impression about the quality state of a data source. Below you can find queries that can calculate data quality scores based on pre-defined data requirements on the fly.
Contents |
Completeness
Scenario: | Calculate the Completeness of a property based on the ratio of all instances of a class and instances that miss a specific property or property value. |
Input: | All property completeness rules (in OWL DL Design) |
Output: | Completeness Scores per Property Completeness Requirement |
SELECT ?dqr ?tclassURI ?tpropURI (COUNT(?s) AS ?violations) (COUNT(?s2) AS ?total) (((?total - ?violations)/?total) AS ?completeness) WHERE { { ?dqr a dqm:PropertyCompletenessRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?dqr dqm:requiredValue "true"^^xsd:boolean . ?dqr dqm:requiredProperty "true"^^xsd:boolean . ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . { ?s a ?tclassURI . NOT EXISTS{ ?s ?tpropURI ?value . } }UNION{ ?s a ?tclassURI . ?s ?tpropURI "" . } }UNION{ ?dqr a dqm:PropertyCompletenessRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?dqr dqm:requiredValue "true"^^xsd:boolean . ?dqr dqm:requiredProperty "true"^^xsd:boolean . ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . ?s2 a ?tclassURI . } }GROUP BY ?dqr ?tclassURI ?tpropURI
Uniqueness in Depth
Scenario: | Calculate the uniqueness of a property based on the ratio of all instances of a class and instances that have non-unique property values. |
Input: | All unique value rules (in OWL DL Design) |
Output: | Uniqueness Scores per unique value rule |
SELECT ?dqr ?tclassURI ?tpropURI ((COUNT(?i)/2) AS ?violations) (COUNT(?i3) AS ?total) (((?total - ?violations)/?total) AS ?uniquenessDepth) WHERE { { ?dqr a dqm:UniqueValueRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . ?i a ?tclassURI . ?i ?tpropURI ?uniqueValue1 . ?i2 ?tpropURI ?uniqueValue2 . FILTER(?i!=?i2 && (str(?uniqueValue1) = str(?uniqueValue2))) }UNION{ ?dqr a dqm:UniqueValueRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . ?i3 a ?tclassURI . } }GROUP BY ?dqr ?tclassURI ?tpropURI
Syntactic Accuracy
Based on Syntax Rules
Scenario: | Calculate Syntactic Accuracy of a property based on the ratio of all instances of a class and instances that have property values with syntax rule violations. |
Input: | All syntax rules (in OWL DL Design) |
Output: | Score for Syntactic Accuracy per syntax rule |
SELECT ?dqr ?tclassURI ?tpropURI (COUNT(?i) AS ?violations) (COUNT(?i2) AS ?total) (((?total - ?violations)/?total) AS ?syntacticAccuracy) WHERE { { ?dqr a dqm:SyntaxRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?dqr dqm:regex ?regex . ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . ?i a ?tclassURI . ?i ?tpropURI ?value . FILTER (!regex(str(?value), ?regex)) . }UNION{ ?dqr a dqm:SyntaxRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?dqr dqm:regex ?regex . ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . ?i2 a ?tclassURI . } }GROUP BY ?dqr ?tclassURI ?tpropURI
Based on Legal Value Rules
Scenario: | Calculate Syntactic Accuracy of a property based on the ratio of all instances of a class and instances that have property values that are not listed as legal values. |
Input: | All legal value rules (in OWL DL Design) |
Output: | Score for Syntactic Accuracy per legal value rule |
SELECT ?dqr ?tclassURI ?tpropURI (COUNT(?i) AS ?violations) (COUNT(?i2) AS ?total) (((?total - ?violations)/?total) AS ?syntacticAccuracy) WHERE { { ?dqr a dqm:LegalValueRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . ?dqr dqm:referenceClass ?rclass . ?dqr dqm:referenceProperty1 ?rprop . ?rclass dqm:hasURI ?rclassvalue . ?rprop dqm:hasURI ?rpropvalue . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . BIND (IRI(str(?rpropvalue)) AS ?rpropURI) . BIND (IRI(str(?rclassvalue)) AS ?rclassURI) . ?i a ?tclassURI . ?i ?tpropURI ?value . OPTIONAL { ?i2 a ?rclassURI . ?i2 ?rpropURI ?value1 . FILTER (str(?value1) = str(?value)) . } . FILTER (!bound(?value1)) . }UNION{ ?dqr a dqm:LegalValueRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . ?i2 a ?tclassURI . } }GROUP BY ?dqr ?tclassURI ?tpropURI
Based on Legal Value Range Rules
Scenario: | Calculate Syntactic Accuracy of a property based on the ratio of all instances of a class and instances that have property values that are out of a legal range. |
Input: | All legal value range rules (in OWL DL Design) |
Output: | Score for Syntactic Accuracy per legal value range rule |
SELECT ?dqr ?tclassURI ?tpropURI (COUNT(?i) AS ?violations) (COUNT(?i2) AS ?total) (((?total - ?violations)/?total) AS ?syntacticAccuracy) WHERE { { ?dqr a dqm:LegalValueRangeRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?dqr dqm:testedProperty1 ?tprop . OPTIONAL{ ?dqr dqm:upperLimit ?upperLimit . } OPTIONAL{ ?dqr dqm:lowerLimit ?lowerLimit . } ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . ?i a ?tclassURI . ?i ?tpropURI ?value . FILTER (STRDT(str(?value), xsd:float) > ?upperLimit || STRDT(str(?value), xsd:float) < ?lowerLimit) . }UNION{ ?dqr a dqm:LegalValueRangeRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . ?i2 a ?tclassURI . } }GROUP BY ?dqr ?tclassURI ?tpropURI
Semantic Accuracy
SELECT ?dqr ?tclassURI (COUNT(?i) AS ?violations) (COUNT(?i2) AS ?total) (((?total - ?violations)/?total) AS ?syntacticAccuracy) WHERE {
{ ?dqr a dqm:FuncDepValueRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?dqr dqm:equals ?dvalue . ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . ?dqr dqm:hasCondition1 ?cond1 .
MINUS{
?dqr dqm:hasCondition2 ?cond2 .
} MINUS{
?dqr dqm:hasCondition3 ?cond3 .
} MINUS{
?dqr dqm:hasCondition4 ?cond4 .
} MINUS{
?dqr dqm:hasCondition5 ?cond5 .
}
?cond1 dqm:conditionalProperty ?cprop1 . ?cprop1 dqm:hasURI ?cpropreal1 . ?cond1 dqm:equals ?condvalue1 . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . BIND (IRI(str(?cpropreal1)) AS ?cpropURI1) . ?i a ?tclassURI . ?i ?cpropURI1 ?condvalue1 . ?i ?tpropURI ?value . FILTER (str(?dvalue) != str(?value)) . }UNION{ ?dqr a dqm:FuncDepValueRule . ?dqr dqm:testedClass ?tclass . ?dqr dqm:testedProperty1 ?tprop . ?tclass dqm:hasURI ?tclassreal . ?tprop dqm:hasURI ?tpropreal . ?dqr dqm:hasCondition1 ?cond1 .
MINUS{
?dqr dqm:hasCondition2 ?cond2 .
} MINUS{
?dqr dqm:hasCondition3 ?cond3 .
} MINUS{
?dqr dqm:hasCondition4 ?cond4 .
} MINUS{
?dqr dqm:hasCondition5 ?cond5 .
}
?cond1 dqm:conditionalProperty ?cprop1 . ?cprop1 dqm:hasURI ?cpropreal1 . ?cond1 dqm:equals ?condvalue1 . BIND (IRI(str(?tpropreal)) AS ?tpropURI) . BIND (IRI(str(?tclassreal)) AS ?tclassURI) . BIND (IRI(str(?cpropreal1)) AS ?cpropURI1) . ?i2 a ?tclassURI . ?i2 ?cpropURI1 ?condvalue1 . }
}GROUP BY ?dqr ?tclassURI