Help
Difference between revisions of "SPARQL for lexemes"
(Moving former SPARQL to dedicated page.) |
|||
(4 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
− | == Lexemes == | + | == Tools == |
− | + | === Lexemes Queries Generator === | |
− | + | {{LexemeQueriesGenerator}} | |
+ | |||
+ | == Notable elements == | ||
:''Table of notable wikidata properties to create.'' | :''Table of notable wikidata properties to create.'' | ||
− | {|class="wikitable" | + | {| class="wikitable" |
− | ! | + | ! LinguaLibre endpoint |
− | ! | + | !colspan=2| Wikidata endpoint |
− | |- | + | |- style="vertical-align:top" |
| | | | ||
+ | For recordings: | ||
+ | * `instance of` [[Property:P2|P2]] : | ||
+ | ** is `record` [[Q2]] | ||
+ | ** is `speaker` [[Q3]] | ||
+ | ** is `language` [[Q4]] | ||
+ | * `language` [[Property:P4|P4]] | ||
+ | * `speaker` [[Property:P5|P5]] | ||
+ | * `gender` [[Property:P8|P8]] | ||
+ | * `wikidata` [[Property:P12|P12]] | ||
+ | * `iso` [[Property:P13|P13]] | ||
+ | * `media type` [[Property:P24|P24]] | ||
+ | ** media type [[Q88888]] | ||
+ | ** is `audio` [[Q88889]] | ||
+ | ** is `video` [[Q88890]] | ||
+ | ** is `written` [[Q1087276]] | ||
| | | | ||
+ | For languages: | ||
+ | * `instance of` [[:d:P:P31|P31]]/[[:d:P:P279|P279]]* | ||
+ | ** is `language` [[:d:Q34770]] (ethnic based) , [[:d:Q315]] (capacity) | ||
+ | ** is `sign language` [[:d:Q34228]] | ||
+ | ** is `endangered language` [[:d:Q335214]] | ||
+ | ** is `severely endangered language` [[:d:Q83365366]] | ||
+ | ** is `dead language` [[d:Q45762]] (no community) | ||
+ | ** is `instinct language` [[:d:Q38058796]] (no speaker) | ||
+ | * `ISO 639-1 code` [[:d:P:P218|P:P218]] | ||
+ | * `ISO 639-2 code` [[:d:P:P219|P:P219]] | ||
+ | * `ISO 639-3 code` [[:d:P:P220|P:P220]] | ||
+ | * `IETF language tag` [[:d:P:P305|P:P305]] | ||
+ | * `geographic coordinate` [[:d:P:P625|P625]] | ||
+ | * `number of speakers` [[:d:P:P1098|P1098]] | ||
+ | * `wikimedia code` [[:d:P:P424|P:P424]] | ||
+ | * `native name` [[:d:P:P1705|P:P1705]] | ||
+ | * `lingualibre ID` [[:d:P:P10369|P:P10369]] | ||
+ | | | ||
+ | For lexemes: | ||
+ | * <span style="color:orange;">TO BE COMPLETED !</span> | ||
+ | * Q82042 'part of speech' | ||
+ | * P5137 'item for this sense' | ||
+ | |||
|} | |} | ||
+ | |||
+ | == Lexemes == | ||
+ | :''[[LinguaLibre:Technical board/Reports/2021/Wikidata Lexemes & Lingua Libre coordination assessment]]''. | ||
=== Part of speech === | === Part of speech === |
Latest revision as of 15:42, 26 February 2024
Tools
Lexemes Queries Generator
Notable elements
- Table of notable wikidata properties to create.
LinguaLibre endpoint | Wikidata endpoint | |
---|---|---|
For recordings:
|
For languages:
|
For lexemes:
|
Lexemes
Part of speech
To run on WDQS.[1]
#defaultEndpoint:Wikidata
SELECT ?item ?itemLabel
WHERE {
?item wdt:P31 wd:Q82042 # Q82042 'part of speech'
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
# GROUP BY ?item
ORDER BY ASC(?itemLabel)
|
|
✅ Language (d:Q12107) → List of Wikidata lexemes for Breton
To run on WDQS.[1] Source: User:VIGNERON/common.js.
#defaultEndpoint:Wikidata
SELECT DISTINCT ?lexemeLabel ?lexeme
WITH {
SELECT ?lexeme ?lexemeLabel ?lexical_category WHERE {
?lexeme a ontolex:LexicalEntry ;
dct:language wd:Q12107 ;
wikibase:lemma ?lexemeLabel .
OPTIONAL {
?lexeme wikibase:lexicalCategory ?lexical_category .
}
}
} AS %results
WHERE {
INCLUDE %results
OPTIONAL {
?lexical_category rdfs:label ?lexical_categoryLabel .
FILTER (LANG(?lexical_categoryLabel) = "en")
}
}
|
||
#defaultEndpoint:Wikidata
SELECT ?lexeme ?lemma
WHERE {
?lexeme dct:language wd:Q12107;
wikibase:lemma ?lemma.
}
|
✅ Language () → List of wd lexemes with LL audio
To run on WDQS.[1]
#defaultEndpoint:Wikidata
SELECT * WHERE {
?l ontolex:lexicalForm ?x .
?x wdt:P443 ?value .
FILTER regex (str(?value), "^http://commons.wikimedia.org/wiki/Special:FilePath/LL-").
}
|
|
✅ Language () → List of wd lexemes with (LL audio and) wd translation (d:Q150)
✅ Concept (Q3142) → All lexeme whom sense is this concept
To run on WDQS.[1]
#defaultEndpoint:Wikidata
#title: Lexemes with senses linked to the item about the colour "red" (Q314
SELECT ?wikidataLexeme ?languageLabel ?lemma ?sense WHERE {
?wikidataLexeme dct:language ?language ;
wikibase:lemma ?lemma ;
ontolex:sense ?sense.
?language rdfs:label ?languageLabel .
Filter(lang(?languageLabel)="en").
?sense wdt:P5137 wd:Q3142 . # Filter : P5137 'item for this sense' is Q3142 'red'
}
|
|
✅ Given language (d:Q5146) → List existing parts of speech
To run on WDQS.[1]
#defaultEndpoint:Wikidata
SELECT ?pos ?posLabel (COUNT(?wikidataLexeme) AS ?quantity)
WHERE {
?wikidataLexeme dct:language wd:Q5146 ; # Portugese
wikibase:lexicalCategory ?pos . # Parts of speech
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?pos ?posLabel
ORDER BY DESC(?quantity)
|
|
✅ Given language (d:Q9192) → List wikidata lexemes
To run on WDQS.[1]
#defaultEndpoint:Wikidata
SELECT ?wikidataLexeme ?posLabel (GROUP_CONCAT(?lemma;separator=" / ") as ?lemmas )
WHERE {
?wikidataLexeme dct:language wd:Q9192 ; # Chinese
wikibase:lemma ?lemma ; # Words
wikibase:lexicalCategory ?pos . # Part of speech
?pos rdfs:label ?posLabel .
Filter(lang(?posLabel)="en").
}
GROUP BY ?wikidataLexeme ?posLabel
|
|