Help
SPARQL for maintenance
Revision as of 08:13, 3 October 2023 by Yug (talk | contribs) (→✅ Recordings → With language `P4` as Duala `Q52071` to change to Duala `Q73`)
SPARQL for maintenance gather some SPARQL queries which help to monitors errors and bring fix to our resources.
Revision as of 08:13, 3 October 2023 by Yug (talk | contribs) (→✅ Recordings → With language `P4` as Duala `Q52071` to change to Duala `Q73`)
Speakers
✅ Speakers → Username containing reserved sign `-
`
- See also T297635 : Lingualibre filename separator should be mass migrated from
-
to—
, without breaking usernames nor words.
#defaultEndpoint:Lingualibre
SELECT *
WHERE {
?id prop:P2 entity:Q3 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_username)
}
ORDER BY DESC (?has_fields_separator_in_username)
|
|
#defaultEndpoint:Lingualibre
SELECT ?has_fields_separator_in_username (COUNT(?has_fields_separator_in_username) AS ?found)
WHERE {
?id prop:P2 entity:Q3 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_username)
# filter( regex(?name, "-" ))
}
#ORDER BY DESC (?has_fields_separator_in_username)
GROUP BY (?has_fields_separator_in_username)
|
|
✅ Recordings → Gascon `Q930` recordings containing reserved sign `-
`
- See also T297635 : Lingualibre filename separator should be mass migrated from
-
to—
, without breaking usernames nor words.
#defaultEndpoint:Lingualibre
SELECT *
WHERE {
?id prop:P2 entity:Q2 .
?id prop:P4 entity:Q930 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_written_word)
}
ORDER BY DESC (?has_fields_separator_in_written_word)
|
|
#defaultEndpoint:Lingualibre
SELECT ?has_fields_separator_in_username (COUNT(?has_fields_separator_in_username) AS ?found)
WHERE {
?id prop:P2 entity:Q2 .
?id prop:P4 entity:Q930 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_written_word)
# filter( regex(?name, "-" ))
}
#ORDER BY DESC (?has_fields_separator_in_written_word)
GROUP BY (?has_fields_separator_in_written_word)
|
|
✅ Speakers → Speakers of `zho
` to change to `cmn
`
- Should be maintained to 0.
#defaultEndpoint:Lingualibre
SELECT ?langLabel ?isoLabel (SUBSTR(STR(?speaker),32) AS ?speaker) ?speakerLabel
WHERE {
?speaker prop:P2 entity:Q3 . # Filter: speakers
?speaker prop:P4 entity:Q130 . # Filter: P4 language is Q130 zho ;
?speaker prop:P4 ?lang .
?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} ORDER BY ASC(?isoLabel)
# Helper: Chinese writing zho:Q130/wd:Q7850 ; Chinese mandarin cmn:Q113/wd:Q9192 .
|
|
✅ Speakers → Undefined place of residence
- The following may need precision. If stale, it mean none was found.
#defaultEndpoint:Lingualibre
SELECT ?item ?itemLabel ?filledGenderLabel
WHERE {
?item prop:P2 entity:Q3 . # Filter: is Q3 `speaker`
?item prop:P14 . # Filter: missing P14 `place of residence` <-------------------------------------
# Add labels to each variable used.
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
}
}
|
|
✅ Speakers → Non-documented genders values
- This query returns speakers who may have willfully declined to document a gender. See also list of genders.
#defaultEndpoint:Lingualibre
SELECT ?item ?itemLabel ?filledGenderLabel
WHERE {
?item prop:P2 entity:Q3 .
{ ?item prop:P8 entity:Q710460 . } # Filter: 'gender' is 'not documented'
?item prop:P8 ?filledGender .
# Add labels to each variable used.
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
}
}
|
|
✅ Speakers → missing gender value
- This query returns speakers who have missing gender value. See also list of genders.
#defaultEndpoint:Lingualibre
SELECT ?speaker ?speakerLabel ?gender ?languageLabel (COUNT(?audio) AS ?audio)
WHERE {
?speaker prop:P2 entity:Q3 .
FILTER NOT EXISTS { ?speaker prop:P8 ?gender }
?audio prop:P5 ?speaker . # Filter: P5 'speaker' is Q445757 'SangeetaRH'
?audio prop:P4 ?language . # Filter: P4 'language' is Q34 'Marathi'
# Add labels
SERVICE wikibase:label {bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en"}
}
GROUP BY ?speaker ?speakerLabel ?genderLabel ?languageLabel # Sorting first groups per language and speaker
ORDER BY DESC (?audio)
|
|
Recordings
✅ Recordings → With language `P4` as zho
`Q130` to change to cmn
`Q113`
#defaultEndpoint:Lingualibre
SELECT ?langLabel ?isoLabel ?audio ?audioLabel
WHERE {
?audio prop:P4 entity:Q130 . # Filter: P4 language is Q130 zho ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} ORDER BY ASC(?isoLabel)
# Helper: Chinese writing zho:Q130/wd:Q7850 ; Chinese mandarin cmn:Q113/wd:Q9192 .
|
|
#defaultEndpoint:Lingualibre
SELECT ?isoLabel (COUNT(?iso) AS ?found)
WHERE {
?audio prop:P4 entity:Q130 . # Filter: P4 language is Q130 zho ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} GROUP BY ?isoLabel
# Helper: Chinese writing zho:Q130/wd:Q7850 ; Chinese mandarin cmn:Q113/wd:Q9192
|
|
✅ Recordings → With language `P4` as Mossi
`Q170137` to change to Mossi
`Q359`
- ✅ Users linguistic profile has been corrected into Q359. Erroneous recordings language (P4) on Lingualibre, while Commons use correct Wikidata language Qid.
#defaultEndpoint:Lingualibre
SELECT ?lang
?audio ?audioLabel
(SUBSTR(STR(?speaker),32) AS ?SpeakerQid) ?speakerLabel
?url (SUBSTR(STR(?urlLabel),52) AS ?filename)
WHERE {
?audio prop:P4 entity:Q170137 . # Filter: P4 language is Q170137 (redirect) ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?audio prop:P5 ?speaker .
?audio prop:P3 ?url .
# ?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} ORDER BY ASC(?audio)
# Helper: .
|
|
✅ Recordings → With language `P4` as Duala
`Q52071` to change to Duala
`Q73`
- ✅ Users linguistic profile has been corrected into Q73. Erroneous recordings language (P4) on Lingualibre, while Commons use correct Wikidata language Qid.
#defaultEndpoint:Lingualibre
SELECT ?lang (SUBSTR(STR(?audio),32) AS ?Audio) ?audioLabel (SUBSTR(STR(?speaker),32) AS ?SpeakerQid) ?speakerLabel
WHERE {
?audio prop:P4 entity:Q52071 . # Filter: P4 language is Q52071 (redirect) ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?audio prop:P5 ?speaker .
# ?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} ORDER BY ASC(?Audio)
# Helper: .
|
|
[?] Recordings → With with corrupted information
- ✅ Users linguistic profile has been corrected.
#defaultEndpoint:Lingualibre
SELECT ?lang
(SUBSTR(STR(?speaker),32) AS ?SpeakerQid) ?speakerLabel
?audio ?audioLabel
?url (SUBSTR(STR(?urlLabel),52) AS ?filename)
WHERE {
# Obsolete languages items
# VALUES ?i18nLabel { "Chinese" "Mossi" "Duala" }
# VALUES ?i18nScript { "zho" "mos" "dua" }
VALUES ?i18nQid { entity:Q130 entity:Q170137 entity:Q52071 }
# Chinese Q113 Q130
# Mossi Q359 Q170137
# Duala Q73 Q52071
{ ?audio prop:P4 ?i18nQid . } # Filter: P4 language is Q130/Q170137/Q52071 ;
# ?audio prop:P4 entity:Q170137 . # Filter: P4 language is Q170137 (redirect) ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?audio prop:P5 ?speaker .
?audio prop:P3 ?url .
# ?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
# ORDER BY ASC(?audio)
# GROUP BY ?lang ?SpeakerQid ?speakerLabel
|
Languages✅ Languages → duplicata by Wikidata ID
✅ Languages → list of values used including redirects
See alsoSections above : #Recordings for corrupt data to fix. ✅ Languages → languages by type (if it exists)
✅ Languages → language with media type (P24) is video (Q88890)
✅ Languages with media type video → written word (P7), url (P3) speakers (P5)
✅ Languages iso from SignIt → relevant data
OtherAll properties with their Wikidata equivalent (if it exists)
See also |