Help
Difference between revisions of "SPARQL (intermediate)"
Help:SPARQL 2 will explore federated queries fetching data from both LinguaguaLibre and Wikidata's endpoints, then Wikidata Lexemes, an emerging source of lexicographic data. The duo can be a solid combo to provide lexicographic and multimedia (audio recordings and images) for either Wikimedia modules or web developers.
Line 153: | Line 153: | ||
|style="padding: 0 3em;width:60%"| | |style="padding: 0 3em;width:60%"| | ||
<syntaxhighlight lang="sparql"> | <syntaxhighlight lang="sparql"> | ||
+ | SELECT ?country ?continentLabel ?ISO3 ?countryLabel (COUNT(?country) AS ?count) | ||
+ | WITH { | ||
+ | SELECT DISTINCT ?speaker { | ||
+ | ?speaker prop:P2 entity:Q3; | ||
+ | } | ||
+ | } AS %speakers | ||
+ | WITH { | ||
+ | SELECT DISTINCT | ||
+ | ?speaker | ||
+ | ?country | ||
+ | ?countryLabel | ||
+ | ?ISO3 | ||
+ | ?continentLabel | ||
+ | { | ||
+ | INCLUDE %speakers. | ||
+ | ?speaker prop:P14 ?residence. | ||
+ | # Avoids weird errors. | ||
+ | FILTER(REGEX(?residence, "^Q[0-9]+$")) | ||
+ | BIND(IRI(CONCAT('http://www.wikidata.org/entity/', ?residence)) AS ?residenceId) | ||
+ | |||
+ | # Get country from wikidata | ||
+ | SERVICE <https://query.wikidata.org/sparql> { | ||
+ | ?residenceId wdt:P17 ?country. | ||
+ | ?country rdfs:label ?countryLabel; | ||
+ | wdt:P298 ?ISO3; | ||
+ | wdt:P30 ?continent. | ||
+ | ?continent rdfs:label ?continentLabel. | ||
+ | FILTER(LANG(?countryLabel) = "en"). | ||
+ | FILTER(LANG(?continentLabel) = "en"). | ||
+ | } | ||
+ | } | ||
+ | } AS %speakersWithCountries | ||
+ | { | ||
+ | INCLUDE %speakersWithCountries. | ||
+ | } | ||
+ | GROUP BY ?country ?continentLabel ?ISO3 ?countryLabel | ||
+ | ORDER BY DESC(?count) | ||
</syntaxhighlight> | </syntaxhighlight> | ||
|| | || |
Revision as of 21:13, 8 January 2022
Tools
- Lexemes Queries Generator
Languages
✅ Language () → List of LL languages with wd speaker population
Lexemes
✅ Language (d:Q12107) → List of wd lexemes
Example : Q12107 breton.
✅ Language () → List of wd lexemes with LL audio
✅ Language () → List of wd lexemes with LL audio and wd translation (d:Q150)
✅ Language () → List of wd lexemes (d:Q150)
- Strange query from User:VIGNERON/common.js
SELECT DISTINCT ?lexemeLabel ?lexeme WITH { SELECT ?lexeme ?lexemeLabel ?lexical_category WHERE { ?lexeme a ontolex:LexicalEntry ; dct:language wd:Q12107 ; wikibase:lemma ?lexemeLabel . OPTIONAL { ?lexeme wikibase:lexicalCategory ?lexical_category . } } } AS %results WHERE { INCLUDE %results OPTIONAL { ?lexical_category rdfs:label ?lexical_categoryLabel . FILTER (LANG(?lexical_categoryLabel) = "en") } }
Speakers
✅ Speakers → Largest number of languages recorded and known
#Title: Speakers with recordings largest number of languages and known languages
SELECT ?speaker ?speakerLabel ?count ?languages
# Get audios, language, speaker triplet
WITH {
SELECT DISTINCT ?speaker ?language {
?audio prop:P4 ?language;
prop:P5 ?speaker.
}
} AS %speakers
# Get the count of languages per each speaker
WITH {
SELECT ?speaker (COUNT(?speaker) AS ?count) {
INCLUDE %speakers.
}
GROUP BY ?speaker
ORDER BY DESC(?count)
} AS %countOfLanguagesRecordedPerSpeaker
# Get the maximum number of languages per each speaker
WITH {
SELECT (MAX(?count) AS ?maxNumberOfLanguagesRecorded) {
INCLUDE %countOfLanguagesRecordedPerSpeaker.
}
} AS %maxNumberOfLanguagesRecorded
# Get those speakers whose count equals the maximum number of languages
WITH {
SELECT ?speaker ?count {
INCLUDE %countOfLanguagesRecordedPerSpeaker.
INCLUDE %maxNumberOfLanguagesRecorded.
FILTER(?count = ?maxNumberOfLanguagesRecorded).
}
} AS %speakersWithMostNumberOfLanguagesRecorded
# Get the languages of those speakers that have recorded audios in the
# most number of languages
WITH {
SELECT ?speaker (GROUP_CONCAT(?languageLabel; SEPARATOR = ", ") AS ?languages) {
INCLUDE %speakersWithMostNumberOfLanguagesRecorded.
?speaker prop:P4 [
rdfs:label ?languageLabel
]
FILTER(LANG(?languageLabel) = "en").
}
GROUP BY ?speaker
} AS %languagesOfSpeakersWithMostNumberOfLanguagesRecorded
{
INCLUDE %speakersWithMostNumberOfLanguagesRecorded.
INCLUDE %languagesOfSpeakersWithMostNumberOfLanguagesRecorded.
?speaker rdfs:label ?speakerLabel.
FILTER(LANG(?speakerLabel) = "en")
}
|
|
✅ Speakers → Countries with most speakers
SELECT ?country ?continentLabel ?ISO3 ?countryLabel (COUNT(?country) AS ?count)
WITH {
SELECT DISTINCT ?speaker {
?speaker prop:P2 entity:Q3;
}
} AS %speakers
WITH {
SELECT DISTINCT
?speaker
?country
?countryLabel
?ISO3
?continentLabel
{
INCLUDE %speakers.
?speaker prop:P14 ?residence.
# Avoids weird errors.
FILTER(REGEX(?residence, "^Q[0-9]+$"))
BIND(IRI(CONCAT('http://www.wikidata.org/entity/', ?residence)) AS ?residenceId)
# Get country from wikidata
SERVICE <https://query.wikidata.org/sparql> {
?residenceId wdt:P17 ?country.
?country rdfs:label ?countryLabel;
wdt:P298 ?ISO3;
wdt:P30 ?continent.
?continent rdfs:label ?continentLabel.
FILTER(LANG(?countryLabel) = "en").
FILTER(LANG(?continentLabel) = "en").
}
}
} AS %speakersWithCountries
{
INCLUDE %speakersWithCountries.
}
GROUP BY ?country ?continentLabel ?ISO3 ?countryLabel
ORDER BY DESC(?count)
|
|