Speakers
✅ Speakers → Username containing reserved sign `-
`
- Lists declared speakers accounts, different from speakers account with 1+ recordings.
- See also T297635 : Lingualibre filename separator should be mass migrated from
-
to —
, without breaking usernames nor words.
#defaultEndpoint:Lingualibre
SELECT *
WHERE {
?id prop:P2 entity:Q3 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_username)
}
ORDER BY DESC (?has_fields_separator_in_username)
|
|
#defaultEndpoint:Lingualibre
SELECT ?has_fields_separator_in_username (COUNT(?has_fields_separator_in_username) AS ?found)
WHERE {
?id prop:P2 entity:Q3 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_username)
# filter( regex(?name, "-" ))
}
#ORDER BY DESC (?has_fields_separator_in_username)
GROUP BY (?has_fields_separator_in_username)
|
|
✅ Recordings → Gascon `Q930` recordings containing reserved sign `-
`
- See also T297635 : Lingualibre filename separator should be mass migrated from
-
to —
, without breaking usernames nor words.
#defaultEndpoint:Lingualibre
SELECT *
WHERE {
?id prop:P2 entity:Q2 .
?id prop:P4 entity:Q930 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_written_word)
}
ORDER BY DESC (?has_fields_separator_in_written_word)
|
|
#defaultEndpoint:Lingualibre
SELECT ?has_fields_separator_in_username (COUNT(?has_fields_separator_in_username) AS ?found)
WHERE {
?id prop:P2 entity:Q2 .
?id prop:P4 entity:Q930 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?id rdfs:label ?name .
}
BIND (regex(STR(?name),"-") AS ?has_fields_separator_in_written_word)
# filter( regex(?name, "-" ))
}
#ORDER BY DESC (?has_fields_separator_in_written_word)
GROUP BY (?has_fields_separator_in_written_word)
|
|
✅ Speakers → Speakers of obsolete languages
- Should be maintained to 0.
#defaultEndpoint:Lingualibre
SELECT ?speaker ?speakerLabel
WHERE {
# Obsolete languages items (duplicata and redirects)
# Chinese Q130 → Q113
# Mossi Q170137 → Q359
# Duala Q52071 → Q73
VALUES ?i18nQid { entity:Q130 entity:Q170137 entity:Q52071 }
?speaker prop:P2 entity:Q3 . # Filter: audios
?speaker prop:P4 ?i18nQid . # Filter: P4 language is Q130/Q170137/Q52071 ;
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
|
|
✅ Speakers → Undefined place of residence
- The following may need precision. If stale, it mean none was found.
#defaultEndpoint:Lingualibre
SELECT ?speaker ?speakerLabel ?localisationLabel ?languageLabel (COUNT(?audio) AS ?audio)
WHERE {
?speaker prop:P2 entity:Q3 .
FILTER NOT EXISTS { ?speaker prop:P14 ?localisation }
?audio prop:P5 ?speaker . # Filter: P5 'speaker' is Q445757 'SangeetaRH'
?audio prop:P4 ?language . # Filter: P4 'language' is Q34 'Marathi'
# Add labels
SERVICE wikibase:label {bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en"}
}
GROUP BY ?speaker ?speakerLabel ?localisationLabel ?languageLabel # Sorting first groups per language and speaker
ORDER BY DESC (?audio)
|
|
#defaultEndpoint:Lingualibre
SELECT ?type (COUNT(DISTINCT ?user) AS ?users)
WHERE {
?item prop:P2 entity:Q3 . # Filter: is Q3 `speaker`
?item prop:P2 ?type .
?item prop:P11 ?user .
FILTER NOT EXISTS { ?item prop:P14 []. }
# Add labels to each variable used.
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr,en". }
} GROUP BY ?type
|
|
✅ Speakers → Non-documented genders values
- This query returns speakers who may have willfully declined to document a gender. See also list of genders.
#defaultEndpoint:Lingualibre
SELECT ?item ?itemLabel ?filledGenderLabel
WHERE {
?item prop:P2 entity:Q3 .
{ ?item prop:P8 entity:Q710460 . } # Filter: 'gender' is 'not documented'
?item prop:P8 ?filledGender .
# Add labels to each variable used.
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
}
}
|
|
✅ Speakers → missing gender value
- This query returns speakers who have missing gender value. See also list of genders.
#defaultEndpoint:Lingualibre
SELECT ?speaker ?speakerLabel ?genderLabel ?languageLabel (COUNT(?audio) AS ?audio)
WHERE {
?speaker prop:P2 entity:Q3 .
FILTER NOT EXISTS { ?speaker prop:P8 ?gender }
?audio prop:P5 ?speaker . # Filter: P5 'speaker' is Q445757 'SangeetaRH'
?audio prop:P4 ?language . # Filter: P4 'language' is Q34 'Marathi'
# Add labels
SERVICE wikibase:label {bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en"}
}
GROUP BY ?speaker ?speakerLabel ?genderLabel ?languageLabel # Sorting first groups per language and speaker
ORDER BY DESC (?audio)
|
|
Recordings
✅ Recordings → With language `P4` as Chinese writing
`Q130` to change to cmn
`Q113`
- ✅ Users linguistic profile has been corrected into Q359. Erroneous recordings language (P4) on Lingualibre, while Commons use correct Wikidata language Qid.
#defaultEndpoint:Lingualibre
SELECT ?langLabel ?isoLabel ?speaker ?speakerLabel ?audio ?audioLabel
WHERE {
?audio prop:P4 entity:Q130 . # Filter: P4 language is Q130 zho ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?audio prop:P5 ?speaker .
?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} ORDER BY ASC(?isoLabel)
# Helper: Chinese writing zho:Q130/wd:Q7850 ; Chinese mandarin cmn:Q113/wd:Q9192 .
|
|
#defaultEndpoint:Lingualibre
SELECT ?isoLabel (COUNT(?iso) AS ?found)
WHERE {
?audio prop:P4 entity:Q130 . # Filter: P4 language is Q130 zho ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?lang prop:P13 ?iso .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
} GROUP BY ?isoLabel
# Helper: Chinese writing zho:Q130/wd:Q7850 ; Chinese mandarin cmn:Q113/wd:Q9192
|
|
[?] Recordings → With with corrupted information
- ✅ Users linguistic profile has been corrected.
#defaultEndpoint:Lingualibre
SELECT ?lang
(SUBSTR(STR(?speaker),32) AS ?SpeakerQid) ?speakerLabel
?audio ?audioLabel
?url (SUBSTR(STR(?urlLabel),52) AS ?filename)
WHERE {
# Obsolete languages items (duplicata and redirects)
# Chinese Q130 → Q113
# Mossi Q170137 → Q359
# Duala Q52071 → Q73
VALUES ?i18nQid { entity:Q130 entity:Q170137 entity:Q52071 }
{ ?audio prop:P4 ?i18nQid . } # Filter: P4 language is Q130/Q170137/Q52071 ;
# ?audio prop:P4 entity:Q170137 . # Filter: P4 language is Q170137 (redirect) ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
?audio prop:P5 ?speaker .
?audio prop:P3 ?url .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
# ORDER BY ASC(?audio)
# GROUP BY ?lang ?SpeakerQid ?speakerLabel
|
|
Counts
#defaultEndpoint:Lingualibre
SELECT ?lang ?langLabel ?iso
?speaker ?speakerLabel
(COUNT(?audio) AS ?found)
WHERE {
# Obsolete languages items (duplicata and redirects)
# Chinese Q130 → Q113
# Mossi Q170137 → Q359
# Duala Q52071 → Q73
VALUES ?i18nQid { entity:Q130 entity:Q170137 entity:Q52071 }
{ ?audio prop:P4 ?i18nQid . } # Filter: P4 language is Q130/Q170137/Q52071 ;
?audio prop:P2 entity:Q2 . # Filter: audios
?audio prop:P4 ?lang .
OPTIONAL { ?lang prop:P13 ?iso . }
?audio prop:P5 ?speaker .
?audio prop:P3 ?url .
# Add Labels
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . }
}
GROUP BY ?lang ?langLabel ?iso ?speaker ?speakerLabel
ORDER BY ?lang
|
|
Languages
✅ LL Languages → duplicata by Wikidata ID
- Duplicated languages can be merged. If stale, it mean none was found.
#defaultEndpoint:Lingualibre
SELECT ?idWD (COUNT(?item) AS ?count) (GROUP_CONCAT(?item) AS ?items)
WHERE {
?item prop:P2 entity:Q4 ;
prop:P12 ?idWD .
}
GROUP BY ?idWD
HAVING ( ?count > 1 )
|
|
✅ LL Languages → LL Qid, records, speakers
- Note: If some languageLabels display full uri, those are remains of duplicate language items now merged, one being a redirect. Erroneous data should be corrected or deleted on both Commons and Lingualibre's recordings. User:Yug has Dragons Bot for that.
#defaultEndpoint:Lingualibre
SELECT
?language
?languageLabel
(COUNT(DISTINCT ?record) AS ?records)
(COUNT(DISTINCT ?speaker) AS ?speakers)
WHERE {
?record prop:P2 entity:Q2 .
# If all speakers P5 corrected, recordings still accounted for
OPTIONAL { ?record prop:P5 ?speaker . }
?record prop:P4 ?language .
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?language ?languageLabel
ORDER BY ?languageLabel
|
|
See also
Sections above : #Recordings for corrupt data to fix.
✅ LL Languages → languages by type (if it exists)
- Note: query to improve.
#defaultEndpoint:Lingualibre
SELECT ?language ?languageLabel ?code ?type ?typeLabel
WHERE {
?language prop:P2 entity:Q4 .
OPTIONAL { ?language prop:P13 ?code } .
OPTIONAL {
?language rdfs:label ?languageLabel .
FILTER( lang(?languageLabel) = "en" ) } .
OPTIONAL { ?language prop:P24 ?type } .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
}
}
ORDER BY DESC (?typeLabel)
|
|
✅ LL Languages → média types
#defaultEndpoint:Lingualibre
SELECT ?id ?idLabel
WHERE {
?id prop:P2 entity:Q88888 .
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr,en". }
}
|
|
✅ LL Languages → language with media type (P24) is video (Q88890)
#defaultEndpoint:Lingualibre
SELECT ?id ?idLabel WHERE {
?id prop:P2 entity:Q4 .
?id prop:P24 entity:Q88890 .
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr,en". }
}
|
|
✅ LL Languages → language with media type (P24) is missing
- Empty if displaying rotating animation too long.
#defaultEndpoint:Lingualibre
SELECT ?id ?idLabel ?type ?typeLabel
WHERE {
?id prop:P2 entity:Q4 .
FILTER NOT EXISTS {
?id prop:P24 [].
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr,en". }
}
|
|
✅ LL Languages → language with commons category (P26) is missing
- Empty if displaying rotating animation too long.
#defaultEndpoint:Lingualibre
SELECT ?language ?languageLabel ?iso
(URL(CONCAT('<a href=https://commons.wikimedia.org/wiki/Category:Lingua_Libre_pronunciation-',?iso,'>',?iso,'</a>')) AS ?cat)
WHERE {
?record prop:P2 entity:Q2 .
?record prop:P4 ?language .
FILTER NOT EXISTS {
?language prop:P26 [].
}
?language prop:P13 ?iso .
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr,en". }
} GROUP BY ?language ?languageLabel ?iso ?cat
|
|
✅ Languages with media type video → written word (P7), url (P3) speakers (P5)
#defaultEndpoint:Lingualibre
SELECT ?word ?filename ?speaker WHERE {
?record prop:P4 entity:Q99628 .
?record prop:P2 entity:Q2 .
?record prop:P7 ?word .
?record prop:P3 ?filename .
?record prop:P5 ?speakerItem .
?speakerItem rdfs:label
?speaker filter ( lang( ?speaker ) = "en" ) .
}
|
|
✅ Languages iso from SignIt → relevant data
- This is based on Translatewiki:Translating:Lingua_Libre_SignIt and helps https://github.com/lingua-libre/Signit .
# List from https://translatewiki.net/wiki/Translating:Lingua_Libre_SignIt#sortable:3=desc
# 2023.07.26
#defaultEndpoint:Wikidata
SELECT ?wdQid ?wikimediaCode ?i18nCode ?labelEN
(SAMPLE(?nativeLabel) as ?labelNative)
WHERE {
# languages & scripts
VALUES ?i18nCode {
"anp" "ar" "bn" "br" "de" "en" "es" "fa" "fi" "fr"
"he" "hi" "ia" "id" "it" "ja" "kk-cyrl" "ko" "krc" "lmo"
"mk" "ms" "nb" "pnb" "pt" "pt-br" "qqq" "ru" "scn" "sl"
"sv" "sw" "tl" "tr" "uk" "zh-hans" "zh-hant" "mnw" "hu"
"kk" "zh"
}
# Scripts, not languages
VALUES ?i18nScript { "kk-cyrl" "zh-hans" "zh-hant" }
{ ?langId wdt:P218 ?i18nCode. } # has for iso 639-1 code
UNION {?langId wdt:P219 ?i18nCode. } # has for iso 639-2 code
UNION {?langId wdt:P220 ?i18nCode. } # has for iso 639-3 code
UNION {?langId wdt:P305 ?i18nCode. } # has for isoIETF code
# UNION {?langId wdt:P31 wd:Q1149626 ; # has for instance written language
# wdt:P424 ?i18nScript ; # has for wikimedia code
# rdfs:label ?labelEN . }
# UNION { ?langId wdt:P424 ?i18nCode. } # has for wikimedia code
OPTIONAL { ?langId wdt:P424 ?wikimediaCode. }
OPTIONAL { ?langId wdt:P1705 ?native. }
# wdt:P10369 ?lingualibreQid ;
?langId rdfs:label ?labelEN .
FILTER ( lang(?labelEN) = "en" )
BIND(REPLACE(STR(?langId), ".*/Q", "Q") AS ?wdQid)
BIND(CONCAT(UCASE(SUBSTR(?native, 1 , 1 )), SUBSTR(?native, 2 )) AS ?nativeLabel)
}
GROUP BY ?wdQid ?wikimediaCode ?i18nCode ?labelEN
ORDER BY ASC(?i18nCode)
|
|
❌ LL's Languages → WD: with Wikidata Lingua Libre ID (P10369)
- To create.
#defaultEndpoint:Wikidata
|
|
❌ LL's Languages → WD: without Lingua Libre ID (P10369)
- To create.
#defaultEndpoint:Wikidata
|
|
❌ LL's Languages → WD → en.wikipedia.org : article pagename
- To create.
#defaultEndpoint:Wikidata
|
|
✅ LL Languages → by date of first recording
Basic-slow query
#defaultEndpoint:Lingualibre
SELECT ?lang ?langLabel (SUBSTR(MIN(?date), 1, 10) AS ?first )
WHERE {
?audio prop:P4 ?lang ;
prop:P6 ?date .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
GROUP BY ?lang ?langLabel
ORDER BY (?first)
|
|
Faster query
#defaultEndpoint:Lingualibre
SELECT ?lang ?langLabel (SUBSTR(STR(?first),1,10) AS ?First)
(CONCAT('<a href=https://commons.wikimedia.org/wiki/Category:Lingua_Libre_pronunciation-',?iso,'>',?iso,'</a>') AS ?cat)
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
{ SELECT ?lang ?iso (MIN(?date) AS ?first)
WHERE {
?audio prop:P4 ?lang ;
prop:P6 ?date .
?lang prop:P13 ?iso .
} GROUP BY ?lang ?iso
}
} ORDER BY DESC (?First)
|
|
✅ WD Languages → language status (P3823,P1999)
#defaultEndpoint:Wikidata
SELECT ?item ?itemLabel ?statusUNLabel ?statusEthnoLabel
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
?item wdt:P31 wd:Q34770.
OPTIONAL { ?item wdt:P1999 ?statusUN. }
OPTIONAL { ?item wdt:P3823 ?statusEthno. }
}
ORDER BY DESC (?itemLabel)
|
|
#defaultEndpoint:Wikidata
SELECT ?text_corpus ?languageLabel ?website ?date ?size
WHERE {
?text_corpus wdt:P31 wd:Q461183.
OPTIONAL { ?text_corpus wdt:P407 ?language. }
OPTIONAL { ?text_corpus wdt:P856 ?website. }
OPTIONAL { ?text_corpus wdt:P577 ?date. }
OPTIONAL { ?text_corpus wdt:P2043 ?size. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
ORDER BY DESC (?languageLabel)
|
|
✅ Wikidata item with coordinates (d:P:P625) and pronunciation (d:P:P443)
#defaultEndpoint:Wikidata
#defaultView:Map
SELECT ?id ?idLabel ?audio ?audio_ocwLabel ?coord ?image
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],oc,fr,en". }
?id wdt:P443 ?audio.
?id wdt:P625 ?coord . #coordonnées géo
OPTIONAL { ?id wdt:P18 ?image }
}
|
|
✅ Wikidata item with coordinates (d:P:P625) and pronunciation (d:P:P443) from Lingualibre
#defaultEndpoint:Wikidata
#defaultView:Map
SELECT ?id ?idLabel ?audio ?audioLabel ?coord ?image
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],oc,fr,en". }
?id wdt:P443 ?audio.
FILTER(CONTAINS(STR(?audio), "LL-"))
?id wdt:P625 ?coord . #coordonnées géo
OPTIONAL { ?id wdt:P18 ?image }
}
|
|
✅ Wikidata item with coordinates (d:P:P625) and pronunciation (d:P:P443) from Lingualibre, with qualifiers
#defaultEndpoint:Wikidata
SELECT ?id ?idLabel ?audio ?audioLabel ?audioFile ?audioLangLabel ?speakerLabel
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
?id wdt:P443 ?audio.
FILTER(CONTAINS(STR(?audio), "LL-Q117707514")) # occitan whistled -Q117707514
?id wdt:P625 ?coord . # geocoordinates
OPTIONAL {
?id p:P443 ?audioStatement .
?audioStatement ps:P443 ?audioFile.
?audioStatement pq:P407 ?audioLang.
?audioStatement pq:P10894 ?speaker.
}
}
#defaultEndpoint:Wikidata
SELECT ?id ?idLabel ?audioFile ?audioLangLabel ?speakerLabel
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
?id p:P443 ?audioStatement .
?audioStatement ps:P443 ?audioFile.
FILTER(CONTAINS(STR(?audioFile), "LL-Q117707514")) # occitan whistled -Q117707514
# ?id wdt:P625 ?coord . # geocoordinates
?audioStatement pq:P407 ?audioLang.
?audioStatement pq:P10894 ?speaker.
}
|
|
✅ Wikidata toponyms from administrative entity (d:Q12703) with Occitan and French labels
- This query can be used in Recording Studio, Step 3 : « External tool » to provide list of Wikidata toponyms. Doing so, User:Lingua Libre Bot will update their Wikidata pages with the recorded pronunciations. This will occurs the next day about 6:00 UTC.
#defaultEndpoint:Wikidata
#defaultView:Map
SELECT DISTINCT ?id ?label ?label_fr ?coord ?population ?OSM_relation_ID
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],oc,fr,en". }
VALUES ?humanSettlementTypes {
wd:Q515 # city
wd:Q3957 # town
# wd:Q484170 # French communes
wd:Q486972 # human settlement
wd:Q618123 # geographic feature
wd:Q3257686 # locality
wd:Q123964505 # populated item
}
VALUES ?administrativeArea {
wd:Q12703 # Pyrennees-Atlantic
# wd:Q213763 # Béarn
# wd:Q12538 # Haute-Garonne
}
?id (wdt:P31/(wdt:P279*)) ?humanSettlementTypes ;
wdt:P131 ?administrativeArea .
?id rdfs:label ?label . FILTER(LANG(?label) = "oc")
?id rdfs:label ?label_fr . FILTER(LANG(?label_fr) = "fr")
?id wdt:P625 ?coord . # geo coordinates
OPTIONAL { ?id wdt:P1082 ?population . }
OPTIONAL { ?id wdt:P402 ?OSM_relation_ID }
}
|
|
Other
All properties with their Wikidata equivalent (if it exists)
- See also Special:ListProperties.
#defaultEndpoint:Lingualibre
SELECT
(SUBSTR(STR(?propLili),38) AS ?QidNum)
?propLiliClaimLabel ?propLili ?propWD
WHERE {
# { ?propLili rdf:type owl:ObjectProperty }
# UNION
# { ?propLili rdf:type owl:DatatypeProperty }
OPTIONAL { ?propLiliClaim <http://wikiba.se/ontology#directClaim> ?propLili ; prop:P12 ?propWD }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
ORDER BY xsd:integer(?QidNum)
|
|
See also