From SemWebQuality.org
Data Quality Scores can be used to get a quick impression about the quality state of a data source. Below you can find queries that can calculate data quality scores based on pre-defined data requirements on the fly.
Prerequisites
With the DQM-Vocabulary you can easily create Data Quality Scores. All you need is a SPARQL-Endpoint that supports SPARQL 1.1 or SPARQL extensions that cover datatype conversion and conversion of strings to URIs. Before you can automatically analyze your data for requirement violations, you need to perform the following steps:
Data Quality Score Calculations
Completeness (OWL DL)
Scenario: |
Calculate the Completeness of a property based on the ratio of all instances of a class and instances that miss a specific property or property value.
|
Input: |
All property completeness rules (in OWL DL Design)
|
Output: |
Completeness Scores per Property Completeness Requirement
|
SELECT ?dqr ?tclassURI ?tpropURI (COUNT(?s) AS ?violations) (COUNT(?s2) AS ?total)
(((?total - ?violations)/?total) AS ?completeness)
WHERE {
{
?dqr a dqm:PropertyCompletenessRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?dqr dqm:requiredValue "true"^^xsd:boolean .
?dqr dqm:requiredProperty "true"^^xsd:boolean .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
{
?s a ?tclassURI .
FILTER NOT EXISTS{
?s ?tpropURI ?value .
}
}UNION{
?s a ?tclassURI .
?s ?tpropURI "" .
}
}UNION{
?dqr a dqm:PropertyCompletenessRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?dqr dqm:requiredValue "true"^^xsd:boolean .
?dqr dqm:requiredProperty "true"^^xsd:boolean .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?s2 a ?tclassURI .
}
}GROUP BY ?dqr ?tclassURI ?tpropURI
Completeness (OWL Full)
Scenario: |
Calculate the Completeness of a property based on the ratio of all instances of a class and instances that miss a specific property or property value.
|
Input: |
All property completeness rules (in OWL Full Design)
|
Output: |
Completeness Scores per Property Completeness Requirement
|
SELECT ?dqr ?tclassURI ?tpropURI (COUNT(?s) AS ?violations) (COUNT(?s2) AS ?total)
(((?total - ?violations)/?total) AS ?completeness)
WHERE {
{
?dqr a dqm:PropertyCompletenessRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?dqr dqm:requiredValue "true"^^xsd:boolean .
?dqr dqm:requiredProperty "true"^^xsd:boolean .
?tclass dqm:hasURI ?tclassURI .
?tprop dqm:hasURI ?tpropURI .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
{
?s a ?tclassURI .
FILTER NOT EXISTS{
?s ?tpropURI ?value .
}
}UNION{
?s a ?tclassURI .
?s ?tpropURI "" .
}
}UNION{
?dqr a dqm:PropertyCompletenessRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?dqr dqm:requiredValue "true"^^xsd:boolean .
?dqr dqm:requiredProperty "true"^^xsd:boolean .
?tclass dqm:hasURI ?tclassURI .
?tprop dqm:hasURI ?tpropURI .
?s2 a ?tclassURI .
}
}GROUP BY ?dqr ?tclassURI ?tpropURI
Uniqueness in Depth
Scenario: |
Calculate the uniqueness of a property based on the ratio of all instances of a class and instances that have non-unique property values.
|
Input: |
All unique value rules (in OWL DL Design)
|
Output: |
Uniqueness Scores per unique value rule
|
SELECT ?dqr ?tclassURI ?tpropURI ((COUNT(?i)/2) AS ?violations) (COUNT(?i3) AS ?total)
(((?total - ?violations)/?total) AS ?uniquenessDepth)
WHERE {
{
?dqr a dqm:UniqueValueRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?i a ?tclassURI .
?i ?tpropURI ?uniqueValue1 .
?i2 ?tpropURI ?uniqueValue2 .
FILTER(?i!=?i2 && (str(?uniqueValue1) = str(?uniqueValue2)))
}UNION{
?dqr a dqm:UniqueValueRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?i3 a ?tclassURI .
}
}GROUP BY ?dqr ?tclassURI ?tpropURI
Uniqueness in Scope
Scenario: |
Calculate the uniqueness of class instances based on the ratio of all instances of a class and duplicate instances.
|
Input: |
All duplicate instances rules (Example case: 3 tested Properties, OWL DL Design)
|
Output: |
Uniqueness in scope scores for classes
|
PREFIX dqm:<http://purl.org/dqm-vocabulary/v1.1/dqm#>
PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
SELECT ?dqr ?tclassURI
((COUNT(?i)/2) AS ?violations)
(COUNT(?i3) AS ?total)
(((?total - ?violations)/?total) AS ?uniquenessScope)
WHERE {
{
?dqr a dqm:DuplicateInstanceRule .
?dqr dqm:testedClass ?tclass .
?tclass dqm:hasURI ?tclassreal .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?dqr dqm:testedProperty1 ?tprop1 .
?tprop1 dqm:hasURI ?tpropreal1 .
BIND (IRI(str(?tpropreal1)) AS ?tpropURI1) .
?dqr dqm:testedProperty2 ?tprop2 .
?tprop2 dqm:hasURI ?tpropreal2 .
BIND (IRI(str(?tpropreal2)) AS ?tpropURI2) .
?dqr dqm:testedProperty3 ?tprop3 .
?tprop3 dqm:hasURI ?tpropreal3 .
BIND (IRI(str(?tpropreal3)) AS ?tpropURI3) .
?i a ?tclassURI .
?i ?tpropURI1 ?value11 .
?i2 ?tpropURI1 ?value12 .
?i ?tpropURI2 ?value21 .
?i2 ?tpropURI2 ?value22 .
?i ?tpropURI3 ?value31 .
?i2 ?tpropURI3 ?value32 .
FILTER(?i!=?i2 && str(?value11) = str(?value12) && str(?value21) = str(?value22) && str(?value31) = str(?value32))
}UNION{
?dqr a dqm:DuplicateInstanceRule .
?dqr dqm:testedClass ?tclass .
?tclass dqm:hasURI ?tclassreal .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?dqr dqm:testedProperty1 ?tprop1 .
?tprop1 dqm:hasURI ?tpropreal1 .
BIND (IRI(str(?tpropreal1)) AS ?tpropURI1) .
?dqr dqm:testedProperty2 ?tprop2 .
?tprop2 dqm:hasURI ?tpropreal2 .
BIND (IRI(str(?tpropreal2)) AS ?tpropURI2) .
?dqr dqm:testedProperty3 ?tprop3 .
?tprop3 dqm:hasURI ?tpropreal3 .
BIND (IRI(str(?tpropreal3)) AS ?tpropURI3) .
?i3 a ?tclassURI .
}
}
GROUP BY ?dqr ?tclassURI
Syntactic Accuracy
Based on Syntax Rules
Scenario: |
Calculate Syntactic Accuracy of a property based on the ratio of all instances of a class and instances that have property values with syntax rule violations.
|
Input: |
All syntax rules (in OWL DL Design)
|
Output: |
Score for Syntactic Accuracy per syntax rule
|
SELECT ?dqr ?tclassURI ?tpropURI (COUNT(?i) AS ?violations) (COUNT(?i2) AS ?total)
(((?total - ?violations)/?total) AS ?syntacticAccuracy)
WHERE {
{
?dqr a dqm:SyntaxRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?dqr dqm:regex ?regex .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?i a ?tclassURI .
?i ?tpropURI ?value .
FILTER (!regex(str(?value), ?regex)) .
}UNION{
?dqr a dqm:SyntaxRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?dqr dqm:regex ?regex .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?i2 a ?tclassURI .
}
}GROUP BY ?dqr ?tclassURI ?tpropURI
Based on Legal Value Rules
Scenario: |
Calculate Syntactic Accuracy of a property based on the ratio of all instances of a class and instances that have property values that are not listed as legal values.
|
Input: |
All legal value rules (in OWL DL Design)
|
Output: |
Score for Syntactic Accuracy per legal value rule
|
SELECT ?dqr ?tclassURI ?tpropURI
(COUNT(?i) AS ?violations)
(COUNT(?i2) AS ?total)
(((?total - ?violations)/?total) AS ?syntacticAccuracy)
WHERE {
{
?dqr a dqm:LegalValueRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
?dqr dqm:referenceClass ?rclass .
?dqr dqm:referenceProperty1 ?rprop .
?rclass dqm:hasURI ?rclassvalue .
?rprop dqm:hasURI ?rpropvalue .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
BIND (IRI(str(?rpropvalue)) AS ?rpropURI) .
BIND (IRI(str(?rclassvalue)) AS ?rclassURI) .
?i a ?tclassURI .
?i ?tpropURI ?value .
OPTIONAL {
?i2 a ?rclassURI .
?i2 ?rpropURI ?value1 .
FILTER (str(?value1) = str(?value)) .
} .
FILTER (!bound(?value1)) .
}UNION{
?dqr a dqm:LegalValueRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?i2 a ?tclassURI .
}
}GROUP BY ?dqr ?tclassURI ?tpropURI
Based on Legal Value Range Rules
Scenario: |
Calculate Syntactic Accuracy of a property based on the ratio of all instances of a class and instances that have property values that are out of a legal range.
|
Input: |
All legal value range rules (in OWL DL Design)
|
Output: |
Score for Syntactic Accuracy per legal value range rule
|
SELECT ?dqr ?tclassURI ?tpropURI (COUNT(?i) AS ?violations) (COUNT(?i2) AS ?total)
(((?total - ?violations)/?total) AS ?syntacticAccuracy)
WHERE {
{
?dqr a dqm:LegalValueRangeRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?dqr dqm:testedProperty1 ?tprop .
OPTIONAL{
?dqr dqm:upperLimit ?upperLimit .
}
OPTIONAL{
?dqr dqm:lowerLimit ?lowerLimit .
}
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?i a ?tclassURI .
?i ?tpropURI ?value .
FILTER (STRDT(str(?value), xsd:float) > ?upperLimit ||
STRDT(str(?value), xsd:float) < ?lowerLimit) .
}UNION{
?dqr a dqm:LegalValueRangeRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?i2 a ?tclassURI .
}
}GROUP BY ?dqr ?tclassURI ?tpropURI
Semantic Accuracy
Based on Functional Dependent Value Rule (1 Condition)
Scenario: |
Calculate an approximation of Semantic Accuracy of an instance regarding the value of a certain property based on the ratio of all instances of a class and instances that have property value combinations that are incorrect.
|
Input: |
All functional dependent value rules (in OWL DL Design)
|
Output: |
Score for Semantic Accuracy per functional dependent value rule
|
SELECT ?dqr ?tclassURI
(COUNT(?i) AS ?violations)
(COUNT(?i2) AS ?total)
(((?total - ?violations)/?total) AS ?semanticAccuracy)
WHERE {
{
?dqr a dqm:FuncDepValueRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?dqr dqm:equals ?dvalue .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
?dqr dqm:hasCondition1 ?cond1 .
MINUS{
?dqr dqm:hasCondition2 ?cond2 .
}
MINUS{
?dqr dqm:hasCondition3 ?cond3 .
}
MINUS{
?dqr dqm:hasCondition4 ?cond4 .
}
MINUS{
?dqr dqm:hasCondition5 ?cond5 .
}
?cond1 dqm:conditionalProperty ?cprop1 .
?cprop1 dqm:hasURI ?cpropreal1 .
?cond1 dqm:equals ?condvalue1 .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
BIND (IRI(str(?cpropreal1)) AS ?cpropURI1) .
?i a ?tclassURI .
?i ?cpropURI1 ?condvalue1 .
?i ?tpropURI ?value .
FILTER (str(?dvalue) != str(?value)) .
}UNION{
?dqr a dqm:FuncDepValueRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
?dqr dqm:hasCondition1 ?cond1 .
MINUS{
?dqr dqm:hasCondition2 ?cond2 .
}
MINUS{
?dqr dqm:hasCondition3 ?cond3 .
}
MINUS{
?dqr dqm:hasCondition4 ?cond4 .
}
MINUS{
?dqr dqm:hasCondition5 ?cond5 .
}
?cond1 dqm:conditionalProperty ?cprop1 .
?cprop1 dqm:hasURI ?cpropreal1 .
?cond1 dqm:equals ?condvalue1 .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
BIND (IRI(str(?cpropreal1)) AS ?cpropURI1) .
?i2 a ?tclassURI .
?i2 ?cpropURI1 ?condvalue1 .
}
}GROUP BY ?dqr ?tclassURI
Timeliness
Based on Expiry Rules
Scenario: |
Calculate Timeliness of class instances based on the ratio of all instances of a class and expired instances.
|
Input: |
All expiry rules (in OWL DL Design)
|
Output: |
Score for Timeliness per expiry rule
|
SELECT ?dqr ?tclassURI
(COUNT(?i) AS ?violations)
(COUNT(?i2) AS ?total)
(((?total - ?violations)/?total) AS ?timeliness)
WHERE {
{
?dqr a dqm:ExpiryRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
BIND ((now()) AS ?today)
?i a ?tclassURI .
?i ?tpropURI ?expiry .
FILTER(?expiry<?today)
}UNION{
?dqr a dqm:ExpiryRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?i2 a ?tclassURI .
}
}GROUP BY ?dqr ?tclassURI
Based on Update Rules
Please Note: This rule requires the SPARQL extension dqf:requiredTimestamp in order to work.
Scenario: |
Calculate Timeliness of class instances based on the ratio of all instances of a class and instances that have not been updated in a specific period of time.
|
Input: |
All update rules (in OWL DL Design) and the timestamps of the tested instances
|
Output: |
Score for Timeliness per update rule
|
SELECT ?dqr ?tclassURI
(COUNT(?i) AS ?violations)
(COUNT(?i2) AS ?total)
(((?total - ?violations)/?total) AS ?timeliness)
WHERE {
{
?dqr a dqm:UpdateRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?dqr dqm:expectedUpdateInterval ?duration .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
BIND((dqf:requiredTimestamp(?duration)) AS ?reqTimestamp) .
?i a ?tclassURI .
?i ?tpropURI ?timestamp .
FILTER(?reqTimestamp>?timestamp)
}UNION{
?dqr a dqm:UpdateRule .
?dqr dqm:testedClass ?tclass .
?dqr dqm:testedProperty1 ?tprop .
?tclass dqm:hasURI ?tclassreal .
?tprop dqm:hasURI ?tpropreal .
BIND (IRI(str(?tpropreal)) AS ?tpropURI) .
BIND (IRI(str(?tclassreal)) AS ?tclassURI) .
?i2 a ?tclassURI .
}
}GROUP BY ?dqr ?tclassURI