Help
Difference between revisions of "SPARQL for maintenance"
SPARQL for maintenance gather some SPARQL queries which help to monitors errors and bring fix to our resources.
Line 435: | Line 435: | ||
</query> | </query> | ||
|} | |} | ||
+ | |||
+ | === [?] Recordings → With with corrupted information === | ||
+ | :''✅ Users linguistic profile has been corrected.'' | ||
+ | {| style="width:100%" | ||
+ | |- style="vertical-align:top;" | ||
+ | |style="padding: 0 3em;width:60%"| | ||
+ | <syntaxhighlight lang="sparql"> | ||
+ | #defaultEndpoint:Lingualibre | ||
+ | SELECT ?lang | ||
+ | (SUBSTR(STR(?speaker),32) AS ?SpeakerQid) ?speakerLabel | ||
+ | ?audio ?audioLabel | ||
+ | ?url (SUBSTR(STR(?urlLabel),52) AS ?filename) | ||
+ | WHERE { | ||
+ | # Obsolete languages items | ||
+ | # VALUES ?i18nLabel { "Chinese" "Mossi" "Duala" } | ||
+ | # VALUES ?i18nScript { "zho" "mos" "dua" } | ||
+ | VALUES ?i18nQid { entity:Q130 entity:Q170137 entity:Q52071 } | ||
+ | # Chinese Q113 Q130 | ||
+ | # Mossi Q359 Q170137 | ||
+ | # Duala Q73 Q52071 | ||
+ | |||
+ | { ?audio prop:P4 ?i18nQid . } # Filter: P4 language is Q130/Q170137/Q52071 ; | ||
+ | # ?audio prop:P4 entity:Q170137 . # Filter: P4 language is Q170137 (redirect) ; | ||
+ | ?audio prop:P2 entity:Q2 . # Filter: audios | ||
+ | ?audio prop:P4 ?lang . | ||
+ | ?audio prop:P5 ?speaker . | ||
+ | ?audio prop:P3 ?url . | ||
+ | # ?lang prop:P13 ?iso . | ||
+ | # Add Labels | ||
+ | SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . } | ||
+ | } | ||
+ | # ORDER BY ASC(?audio) | ||
+ | # GROUP BY ?lang ?SpeakerQid ?speakerLabel | ||
+ | </syntaxhighlight> | ||
+ | || | ||
+ | <query _pagination="5"> | ||
+ | #defaultEndpoint:Lingualibre | ||
+ | SELECT ?lang | ||
+ | (SUBSTR(STR(?speaker),32) AS ?SpeakerQid) ?speakerLabel | ||
+ | ?audio ?audioLabel | ||
+ | ?url (SUBSTR(STR(?urlLabel),52) AS ?filename) | ||
+ | WHERE { | ||
+ | # Obsolete languages items | ||
+ | # VALUES ?i18nLabel { "Chinese" "Mossi" "Duala" } | ||
+ | # VALUES ?i18nScript { "zho" "mos" "dua" } | ||
+ | VALUES ?i18nQid { entity:Q130 entity:Q170137 entity:Q52071 } | ||
+ | # Chinese Q113 Q130 | ||
+ | # Mossi Q359 Q170137 | ||
+ | # Duala Q73 Q52071 | ||
+ | |||
+ | { ?audio prop:P4 ?i18nQid . } # Filter: P4 language is Q130/Q170137/Q52071 ; | ||
+ | # ?audio prop:P4 entity:Q170137 . # Filter: P4 language is Q170137 (redirect) ; | ||
+ | ?audio prop:P2 entity:Q2 . # Filter: audios | ||
+ | ?audio prop:P4 ?lang . | ||
+ | ?audio prop:P5 ?speaker . | ||
+ | ?audio prop:P3 ?url . | ||
+ | # ?lang prop:P13 ?iso . | ||
+ | # Add Labels | ||
+ | SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . } | ||
+ | } | ||
+ | # ORDER BY ASC(?audio) | ||
+ | # GROUP BY ?lang ?SpeakerQid ?speakerLabel | ||
+ | </query> | ||
== Languages == | == Languages == |
Revision as of 08:13, 3 October 2023
Speakers
✅ Speakers → Username containing reserved sign `-
`
- See also T297635 : Lingualibre filename separator should be mass migrated from
-
to—
, without breaking usernames nor words.
#defaultEndpoint:Lingualibre
SELECT *
WHERE {
?id prop:P2 entity:Q3 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_username)
}
ORDER BY DESC (?has_fields_separator_in_username)
|
|
#defaultEndpoint:Lingualibre
SELECT ?has_fields_separator_in_username (COUNT(?has_fields_separator_in_username) AS ?found)
WHERE {
?id prop:P2 entity:Q3 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_username)
# filter( regex(?name, "-" ))
}
#ORDER BY DESC (?has_fields_separator_in_username)
GROUP BY (?has_fields_separator_in_username)
|
|
✅ Recordings → Gascon `Q930` recordings containing reserved sign `-
`
- See also T297635 : Lingualibre filename separator should be mass migrated from
-
to—
, without breaking usernames nor words.
#defaultEndpoint:Lingualibre
SELECT *
WHERE {
?id prop:P2 entity:Q2 .
?id prop:P4 entity:Q930 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_written_word)
}
ORDER BY DESC (?has_fields_separator_in_written_word)
|
|
#defaultEndpoint:Lingualibre
SELECT ?has_fields_separator_in_username (COUNT(?has_fields_separator_in_username) AS ?found)
WHERE {
?id prop:P2 entity:Q2 .
?id prop:P4 entity:Q930 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_written_word)
# filter( regex(?name, "-" ))
}
#ORDER BY DESC (?has_fields_separator_in_written_word)
GROUP BY (?has_fields_separator_in_written_word)
|
|
✅ Speakers → Speakers of `zho
` to change to `cmn
`
- Should be maintained to 0.
#defaultEndpoint:Lingualibre
SELECT ?langLabel ?isoLabel (SUBSTR(STR(?speaker),32) AS ?speaker) ?speakerLabel
WHERE {
?speaker prop:P2 entity:Q3 . # Filter: speakers
?speaker prop:P4 entity:Q130 . # Filter: P4 language is Q130 zho ;
?speaker prop:P4 ?lang .
?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} ORDER BY ASC(?isoLabel)
# Helper: Chinese writing zho:Q130/wd:Q7850 ; Chinese mandarin cmn:Q113/wd:Q9192 .
|
|
✅ Speakers → Undefined place of residence
- The following may need precision. If stale, it mean none was found.
#defaultEndpoint:Lingualibre
SELECT ?item ?itemLabel ?filledGenderLabel
WHERE {
?item prop:P2 entity:Q3 . # Filter: is Q3 `speaker`
?item prop:P14 . # Filter: missing P14 `place of residence` <-------------------------------------
# Add labels to each variable used.
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
}
}
|
|
✅ Speakers → Non-documented genders values
- This query returns speakers who may have willfully declined to document a gender. See also list of genders.
#defaultEndpoint:Lingualibre
SELECT ?item ?itemLabel ?filledGenderLabel
WHERE {
?item prop:P2 entity:Q3 .
{ ?item prop:P8 entity:Q710460 . } # Filter: 'gender' is 'not documented'
?item prop:P8 ?filledGender .
# Add labels to each variable used.
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
}
}
|
|
✅ Speakers → missing gender value
- This query returns speakers who have missing gender value. See also list of genders.
#defaultEndpoint:Lingualibre
SELECT ?speaker ?speakerLabel ?gender ?languageLabel (COUNT(?audio) AS ?audio)
WHERE {
?speaker prop:P2 entity:Q3 .
FILTER NOT EXISTS { ?speaker prop:P8 ?gender }
?audio prop:P5 ?speaker . # Filter: P5 'speaker' is Q445757 'SangeetaRH'
?audio prop:P4 ?language . # Filter: P4 'language' is Q34 'Marathi'
# Add labels
SERVICE wikibase:label {bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en"}
}
GROUP BY ?speaker ?speakerLabel ?genderLabel ?languageLabel # Sorting first groups per language and speaker
ORDER BY DESC (?audio)
|
|
Recordings
✅ Recordings → With language `P4` as zho
`Q130` to change to cmn
`Q113`
#defaultEndpoint:Lingualibre
SELECT ?langLabel ?isoLabel ?audio ?audioLabel
WHERE {
?audio prop:P4 entity:Q130 . # Filter: P4 language is Q130 zho ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} ORDER BY ASC(?isoLabel)
# Helper: Chinese writing zho:Q130/wd:Q7850 ; Chinese mandarin cmn:Q113/wd:Q9192 .
|
|
#defaultEndpoint:Lingualibre
SELECT ?isoLabel (COUNT(?iso) AS ?found)
WHERE {
?audio prop:P4 entity:Q130 . # Filter: P4 language is Q130 zho ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} GROUP BY ?isoLabel
# Helper: Chinese writing zho:Q130/wd:Q7850 ; Chinese mandarin cmn:Q113/wd:Q9192
|
|
✅ Recordings → With language `P4` as Mossi
`Q170137` to change to Mossi
`Q359`
- ✅ Users linguistic profile has been corrected into Q359. Erroneous recordings language (P4) on Lingualibre, while Commons use correct Wikidata language Qid.
#defaultEndpoint:Lingualibre
SELECT ?lang
?audio ?audioLabel
(SUBSTR(STR(?speaker),32) AS ?SpeakerQid) ?speakerLabel
?url (SUBSTR(STR(?urlLabel),52) AS ?filename)
WHERE {
?audio prop:P4 entity:Q170137 . # Filter: P4 language is Q170137 (redirect) ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?audio prop:P5 ?speaker .
?audio prop:P3 ?url .
# ?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} ORDER BY ASC(?audio)
# Helper: .
|
|
✅ Recordings → With language `P4` as Duala
`Q52071` to change to Duala
`Q73`
- ✅ Users linguistic profile has been corrected into Q73. Erroneous recordings language (P4) on Lingualibre, while Commons use correct Wikidata language Qid.
#defaultEndpoint:Lingualibre
SELECT ?lang (SUBSTR(STR(?audio),32) AS ?Audio) ?audioLabel (SUBSTR(STR(?speaker),32) AS ?SpeakerQid) ?speakerLabel
WHERE {
?audio prop:P4 entity:Q52071 . # Filter: P4 language is Q52071 (redirect) ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?audio prop:P5 ?speaker .
# ?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} ORDER BY ASC(?Audio)
# Helper: .
|
|
[?] Recordings → With with corrupted information
- ✅ Users linguistic profile has been corrected.
#defaultEndpoint:Lingualibre
SELECT ?lang
(SUBSTR(STR(?speaker),32) AS ?SpeakerQid) ?speakerLabel
?audio ?audioLabel
?url (SUBSTR(STR(?urlLabel),52) AS ?filename)
WHERE {
# Obsolete languages items
# VALUES ?i18nLabel { "Chinese" "Mossi" "Duala" }
# VALUES ?i18nScript { "zho" "mos" "dua" }
VALUES ?i18nQid { entity:Q130 entity:Q170137 entity:Q52071 }
# Chinese Q113 Q130
# Mossi Q359 Q170137
# Duala Q73 Q52071
{ ?audio prop:P4 ?i18nQid . } # Filter: P4 language is Q130/Q170137/Q52071 ;
# ?audio prop:P4 entity:Q170137 . # Filter: P4 language is Q170137 (redirect) ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?audio prop:P5 ?speaker .
?audio prop:P3 ?url .
# ?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
# ORDER BY ASC(?audio)
# GROUP BY ?lang ?SpeakerQid ?speakerLabel
|
Languages✅ Languages → duplicata by Wikidata ID
✅ Languages → list of values used including redirects
See alsoSections above : #Recordings for corrupt data to fix. ✅ Languages → languages by type (if it exists)
✅ Languages → language with media type (P24) is video (Q88890)
✅ Languages with media type video → written word (P7), url (P3) speakers (P5)
✅ Languages iso from SignIt → relevant data
OtherAll properties with their Wikidata equivalent (if it exists)
See also |