MediaWiki
Difference between revisions of "LexemeQueriesGenerator.js"
m |
|||
(15 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
− | + | /* ************************************************************************** */ | |
− | + | /* Lexeme Queries Generator (lqg) ******************************************* */ | |
− | + | // Description: Generates a form to search for words in Wikidata Lexeme and DBnary, a wiktionary-based wikibase. Returns a SPARQL query OR open tthat QUERY. | |
− | // | + | // Usage: [[Template:LexemeQueriesGenerator]] |
− | + | // Usage: open [[Help:SPARQL 2#Tools]] > fill the form > click "Generate" or "Run !" | |
− | + | // Hack pad:https://jsfiddle.net/hugolpz/9jqg72u8/ | |
− | + | ||
+ | /* ************************************************************************** */ | ||
+ | // Author: Sina Ahmadi @sinaahmadi | ||
+ | // Code: https://github.com/sinaahmadi/sinaahmadi.github.io/blob/master/_posts/2021-10-04-sparql-query-generator-for-lexicographical-data.md | ||
+ | // Revamp: @hugolpz | ||
+ | // LICENSE: MIT | ||
+ | |||
+ | /* ************************************************************************** */ | ||
+ | /* TODO ********************************************************************* * / | ||
+ | 1) List of languages with recordings AND number of speakers | ||
+ | See base : https://lingualibre.org/wiki/LinguaLibre:List_of_languages | ||
+ | See nbSp : https://w.wiki/4ZB3 | ||
− | // | + | Finish hacking based on https://jsfiddle.net/hugolpz/rygo9s5b/ |
− | + | [x] getData from elements | |
− | + | [x] Identify suitable query | |
− | + | [x] Edit suitable query | |
− | + | [x] Run/Open Query | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | Improve | |
+ | [ ] Query with federate SPARQL including LinguaLibre | ||
+ | [ ] Factorize SPARQL codes. | ||
− | + | Add to LinguaLibre | |
− | / | + | [x] Add to [[User:Yug/OOUI]], [[Help:SPARQL 2]] |
− | + | [x] Migrate to general space. : MediaWiki:LexemeQueriesGenerator.js ; MediaWiki:Common.js | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | Translate | |
− | + | [ ] Find a way to translate ? | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | MediaWiki Extension ? | |
− | + | - [x] Explore -> Nope. | |
− | + | */ | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | + | // Toolbox | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
/* ********************************************************************** */ | /* ********************************************************************** */ | ||
/* QUERIES BASIS ******************************************************** */ | /* QUERIES BASIS ******************************************************** */ | ||
// Current: Wikidata, Dbnary. | // Current: Wikidata, Dbnary. | ||
+ | var queries = { | ||
// Queries for Wikidata | // Queries for Wikidata | ||
− | + | wikidata : | |
− | + | { | |
− | + | values :`VALUES ?word {'book'@GLWSSA}`, | |
− | + | basic: `SELECT * WHERE { | |
− | + | VALUESTOBEADDEDHERE | |
− | + | ?l a ontolex:LexicalEntry ; | |
− | + | dct:language wd:LNGCDE ; | |
− | + | ontolex:lexicalForm ?form ; #test | |
+ | wikibase:lexicalCategory wd:POSTAG ; | ||
+ | wikibase:lemma ?lemma . | ||
+ | ?form ontolex:representation ?word . | ||
+ | }`, | ||
+ | senses: `SELECT * WHERE { | ||
+ | VALUESTOBEADDEDHERE | ||
+ | ?l a ontolex:LexicalEntry ; | ||
+ | dct:language wd:LNGCDE ; | ||
+ | ontolex:lexicalForm ?form ; | ||
+ | wikibase:lexicalCategory wd:POSTAG ; | ||
+ | wikibase:lemma ?lemma ; | ||
+ | ontolex:sense ?sense . | ||
+ | ?form ontolex:representation ?word . | ||
+ | }`, | ||
+ | sensesDefinitions : `SELECT * WHERE { | ||
+ | VALUESTOBEADDEDHERE | ||
+ | ?l a ontolex:LexicalEntry ; | ||
+ | dct:language wd:LNGCDE ; | ||
+ | wikibase:lemma ?lemma ; | ||
+ | ontolex:lexicalForm ?form ; | ||
+ | wikibase:lexicalCategory ?category ; | ||
+ | ontolex:sense ?sense . | ||
+ | ?form ontolex:representation ?word . | ||
+ | ?language wdt:P218 "GLWSSA" . | ||
+ | ?sense skos:definition ?gloss . | ||
+ | FILTER EXISTS {?l ontolex:sense ?sense } | ||
+ | FILTER(LANG(?gloss) = "GLWSSA") | ||
+ | }`, | ||
+ | sensesDefinitionsExamples: `SELECT * WHERE { | ||
+ | VALUESTOBEADDEDHERE | ||
+ | ?l a ontolex:LexicalEntry ; | ||
+ | dct:language wd:LNGCDE ; | ||
+ | wikibase:lemma ?lemma ; | ||
+ | ontolex:lexicalForm ?form ; | ||
+ | wikibase:lexicalCategory ?category ; | ||
+ | ontolex:sense ?sense . | ||
+ | ?language wdt:P218 "GLWSSA" . | ||
+ | ?form ontolex:representation ?word . | ||
+ | ?sense skos:definition ?gloss . | ||
+ | OPTIONAL{ | ||
+ | ?l p:P5831 ?statement . | ||
+ | ?statement ps:P5831 ?example . | ||
+ | } | ||
+ | FILTER EXISTS {?l ontolex:sense ?sense } | ||
+ | FILTER(LANG(?gloss) = "GLWSSA") | ||
+ | }`, | ||
+ | translation: `SELECT DISTINCT * WHERE { | ||
+ | ?source dct:language wd:LNGCDE; | ||
+ | wikibase:lemma ?sourceLemma; | ||
+ | ontolex:sense [ wdt:P5137 ?sense ]. | ||
+ | ?target dct:language wd:LNGCDETRG; | ||
+ | wikibase:lemma ?targetLemma; | ||
+ | ontolex:sense [ wdt:P5137 ?sense ]. | ||
+ | } ORDER BY ASC(UCASE(str(?sourceLemma)))`, | ||
+ | translationLemma : `SELECT DISTINCT * WHERE { | ||
+ | VALUESTOBEADDEDHERE | ||
+ | ?source dct:language wd:LNGCDE; | ||
+ | wikibase:lemma ?sourceLemma; | ||
+ | ontolex:lexicalForm ?form ; | ||
+ | wikibase:lexicalCategory wd:POSTAG ; | ||
+ | ontolex:sense [ wdt:P5137 ?sense ]. | ||
+ | ?target dct:language wd:LNGCDETRG; | ||
+ | wikibase:lemma ?targetLemma; | ||
+ | ontolex:sense [ wdt:P5137 ?sense ]. | ||
+ | ?form ontolex:representation ?word . | ||
+ | } ORDER BY ASC(UCASE(str(?sourceLemma)))` | ||
+ | }, | ||
// Queries for Dbnary | // Queries for Dbnary | ||
− | + | dbnary : { | |
− | + | values: `VALUES ?label {'book'@GLWSSA} | |
− | + | VALUES ?pos {<http://www.lexinfo.net/ontology/2.0/lexinfo#POSTAGNM>}`, | |
− | + | basic: `SELECT * WHERE {VALUESTOBEADDEDHERE | |
− | + | ?lexeme a ontolex:LexicalEntry ; | |
− | + | rdfs:label ?label ; | |
− | + | ontolex:canonicalForm ?form ; | |
− | + | lime:language ?lang ; | |
− | + | lexinfo:partOfSpeech ?pos . | |
− | + | FILTER(?lang = "GLWSSA") | |
− | + | }`, | |
− | + | senses: `SELECT * WHERE { | |
+ | VALUESTOBEADDEDHERE | ||
+ | ?lexeme a ontolex:LexicalEntry ; | ||
+ | rdfs:label ?label ; | ||
+ | ontolex:canonicalForm ?form ; | ||
+ | lime:language ?lang ; | ||
+ | lexinfo:partOfSpeech ?pos ; | ||
+ | ontolex:sense ?sense . | ||
+ | FILTER(?lang = "GLWSSA") | ||
+ | }`, | ||
+ | sensesDefinitions: `SELECT ?lexeme ?label ?pos ?sense ?definition | ||
+ | WHERE { | ||
+ | ?sense a ontolex:LexicalSense ; | ||
+ | skos:definition ?def . | ||
+ | ?def rdf:value ?definition . | ||
+ | FILTER(lang(?definition) = "GLWSSA") | ||
+ | { | ||
+ | SELECT * WHERE {VALUESTOBEADDEDHERE | ||
+ | ?lexeme a ontolex:LexicalEntry ; | ||
rdfs:label ?label ; | rdfs:label ?label ; | ||
ontolex:canonicalForm ?form ; | ontolex:canonicalForm ?form ; | ||
Line 200: | Line 158: | ||
lexinfo:partOfSpeech ?pos ; | lexinfo:partOfSpeech ?pos ; | ||
ontolex:sense ?sense . | ontolex:sense ?sense . | ||
+ | FILTER(?lang = "GLWSSA") | ||
+ | } | ||
+ | } | ||
+ | }`, | ||
+ | sensesDefinitionsExamples: ``, | ||
+ | translation: ``, | ||
+ | translationLemma: `` | ||
+ | } | ||
+ | }; | ||
+ | |||
+ | /* ********************************************************************** */ | ||
+ | /* OOJS / OOUI ********************************************************** */ | ||
+ | // Data & options | ||
+ | var baseLanguages = [ //Major languages: cmn, spa, eng, fra, ara, rus, hin, swa | ||
+ | // data https://w.wiki/4ZB3 | ||
+ | { data:'Q13955', ll:'', label:'ara — Arabic' }, | ||
+ | { data:'Q9610', ll:'', label:'ben — Bengali' }, | ||
+ | { data:'Q9192', ll:'Q113', label:'cmn — MandarinChinese' } , | ||
+ | { data:'Q188', ll:'', label:'deu — German' }, | ||
+ | { data:'Q1860', ll:'', label:'eng — English' }, | ||
+ | { data:'Q150', ll:'', label:'fra — French' }, | ||
+ | { data:'Q1568', ll:'', label:'hin — Hindi' }, | ||
+ | { data:'Q5287', ll:'', label:'jpn — Japanese' }, | ||
+ | { data:'Q1571', ll:'Q34', label:'mar — Marathi' }, | ||
+ | { data:'Q58635', ll:'', label:'pan — Punjabi' }, | ||
+ | { data:'Q5146', ll:'', label:'por — Portuguese' }, | ||
+ | { data:'Q7737', ll:'', label:'rus — Russian' }, | ||
+ | { data:'Q1321', ll:'', label:'spa — Spanish' }, | ||
+ | ], | ||
+ | availableLanguages = [ | ||
+ | { data:'Q13955',ll:'', iso1:'ar', label:'ara — Arabic' }, | ||
+ | { data:'Q9610',ll:'', iso1:'bn', label:'ben — Bengali' }, | ||
+ | { data:'Q12107', ll:'Q209', iso1:'br', label: 'bre: Breton' }, // lili | ||
+ | { data:'Q9192', ll:'Q113', iso1:'zh', label:'cmn — MandarinChinese' } , | ||
+ | { data:'Q188',ll:'', iso1:'de', label:'deu — German' }, | ||
+ | { data:'Q1860',ll:'', iso1:'en', label:'eng — English' }, | ||
+ | { data:'Q150',ll:'', iso1:'fr', label:'fra — French' }, | ||
+ | { data:'Q33454',ll:'', iso1:'ff', label:'ful — Fula' }, | ||
+ | { data:'Q56475',ll:'', iso1:'ha', label:'hau — Hausa' }, | ||
+ | { data:'Q1568',ll:'', iso1:'hi', label:'hin — Hindi' }, | ||
+ | { data:'Q9240',ll:'', iso1:'id', label:'ind — Indonesian' }, | ||
+ | { data:'Q652',ll:'', iso1:'it', label:'ita — Italian' }, | ||
+ | { data:'Q33549',ll:'', iso1:'jv', label:'jav — Javanese' }, | ||
+ | { data:'Q5287',ll:'', iso1:'ja', label:'jpn — Japanese' }, | ||
+ | { data:'Q9176',ll:'', iso1:'ko', label:'kor — Korean' }, | ||
+ | { data:'Q1571',ll:'', iso1:'mr', label:'mar — Marathi' }, | ||
+ | { data:'Q9237',ll:'', iso1:'ms', label:'msa — Malay' }, | ||
+ | { data:'Q58635',ll:'', iso1:'pa', label:'pan — Punjabi' }, | ||
+ | { data:'Q5146',ll:'', iso1:'pt', label:'por — Portuguese' }, | ||
+ | { data:'Q7737',ll:'', iso1:'ru', label:'rus — Russian' }, | ||
+ | { data:'Q1321',ll:'', iso1:'es', label:'spa — Spanish' }, | ||
+ | { data:'Q5885',ll:'', iso1:'ta', label:'tam — Tamil' }, | ||
+ | { data:'Q8097',ll:'', iso1:'te', label:'tel — Telugu' }, | ||
+ | { data:'Q256',ll:'', iso1:'tr', label:'tur — Turkish' }, | ||
+ | { data:'Q1617',ll:'', iso1:'ur', label:'urd — Urdu' }, | ||
+ | { data:'Q9199',ll:'', iso1:'vi', label:'vie — Vietnamese' }, | ||
+ | ], | ||
+ | languagesArray = [ ...availableLanguages ], | ||
+ | posArray = [ | ||
+ | { data: 'Q1084', label: 'noun' }, | ||
+ | { data: 'Q36224', label: 'pronoun' }, | ||
+ | { data: 'Q24905', label: 'verb' }, | ||
+ | { data: 'Q380057', label: 'adverb' }, | ||
+ | { data: 'Q34698', label: 'adjective' }, | ||
+ | { data: 'Q21087400', label: 'quantitative'}, | ||
+ | { data: 'Q83034', label: 'interjection' }, | ||
+ | { data: 'Q4833830', label: 'preposition' }, | ||
+ | { data: 'Q103184', label: 'article' }, | ||
+ | { data: 'Q36484', label: 'conjunction' }, | ||
+ | ].map(o => new OO.ui.MenuOptionWidget({ data: o.data, label: o.label })), | ||
+ | infoTypesArray = [ | ||
+ | { data: 'basic', label: 'Just look !' }, | ||
+ | { data: 'senses', label: 'Sense' }, | ||
+ | { data: 'sensesDefinitions', label: 'Sense, definition' }, | ||
+ | { data: 'sensesDefinitionsExamples', label: 'Sense, definition and examples'}, | ||
+ | ].map(o => new OO.ui.MenuOptionWidget({ data: o.data, label: o.label })), | ||
+ | languagesArrayT = [ ...baseLanguages, ], | ||
+ | endpointsArray = [ | ||
+ | { data: 'wikidata', label: 'Wikidata', selected: true }, | ||
+ | { data: 'Dbnary', label: 'Dbnary' }, | ||
+ | //{ data: 'LinguaLibre', label: 'LinguaLibre' }, | ||
+ | ]; | ||
+ | |||
+ | // Elements | ||
+ | var lemma = new OO.ui.TextInputWidget({ | ||
+ | id: 'lqg-lemma', | ||
+ | icon: 'search', | ||
+ | placeholder: 'books', | ||
+ | value: 'books', | ||
+ | label: 'Word to search', | ||
+ | }), | ||
+ | languages = new OO.ui.ComboBoxInputWidget({ // ComboBox | ||
+ | id: 'lqg-languages', | ||
+ | placeholder : 'Target language', | ||
+ | options: languagesArray, | ||
+ | menu: { filterFromInput: true, filterMode : 'substring' } | ||
+ | // value: 'Q34', | ||
+ | }), | ||
+ | pos = new OO.ui.DropdownWidget( { | ||
+ | id: 'lqg-pos', | ||
+ | label: 'Part-of-speech', | ||
+ | menu: { items: posArray } | ||
+ | }), | ||
+ | infoTypes = new OO.ui.DropdownWidget({ | ||
+ | id: 'lqg-infoTypes', | ||
+ | label: 'Information to fetch ... (if any)', | ||
+ | menu: { items: infoTypesArray } | ||
+ | }), | ||
+ | translations = new OO.ui.ComboBoxInputWidget({ // ComboBox | ||
+ | id: 'lqg-translations', | ||
+ | placeholder : 'Translations to fetch ... (if any)', | ||
+ | options: languagesArrayT, | ||
+ | menu: { filterFromInput: true, filterMode : 'substring' } | ||
+ | }), | ||
+ | endpoints = new OO.ui.RadioSelectInputWidget( { | ||
+ | id: 'lqg-endpoints', | ||
+ | label: 'Radios buttons', | ||
+ | options: endpointsArray, | ||
+ | } ), | ||
+ | limit = new OO.ui.CheckboxInputWidget({ id: 'lqg-limit', selected: true, }), | ||
+ | limitLabel = new OO.ui.LabelWidget( { label: 'Limit to 100 (faster)' }), | ||
+ | queryDisplay = new OO.ui.MultilineTextInputWidget( { | ||
+ | id: 'lqg-queryDisplay', | ||
+ | //value: `# Query will appear here.\n\n\n\n\n\n`, | ||
+ | placeholder: `# Query will appear here.\n\n\n\n\n\n`, | ||
+ | multiline: true, | ||
+ | autosize: true, | ||
+ | minRows: 10, | ||
+ | maxRows: 20 | ||
+ | } ), | ||
+ | generate = new OO.ui.ButtonWidget({ | ||
+ | id: 'lqg-generate', | ||
+ | label: 'Generate', | ||
+ | }), | ||
+ | run = new OO.ui.ButtonWidget({ | ||
+ | id: 'lqg-run', | ||
+ | label:'Run !', | ||
+ | }); | ||
+ | |||
+ | // An example of a fieldset with horizontal layout. | ||
+ | var fieldset = new OO.ui.FieldsetLayout( { | ||
+ | label: 'Queries Generator for Wikidata Lexemes' | ||
+ | } ); | ||
+ | fieldset.addItems( [ | ||
+ | new OO.ui.FieldLayout( | ||
+ | new OO.ui.Widget( { | ||
+ | content: [ | ||
+ | new OO.ui.HorizontalLayout( { items: [ | ||
+ | lemma, | ||
+ | pos, | ||
+ | languages, | ||
+ | infoTypes, | ||
+ | translations, | ||
+ | ]}), | ||
+ | new OO.ui.HorizontalLayout( { items: [ | ||
+ | generate, | ||
+ | run, | ||
+ | limit, | ||
+ | limitLabel, | ||
+ | endpoints, | ||
+ | ]}), | ||
+ | queryDisplay | ||
+ | ] | ||
+ | }),{ | ||
+ | label: 'Select the suitable values to build your Wikidata Lexemes Query.', | ||
+ | align: 'top' | ||
+ | }) | ||
+ | ] | ||
+ | ); | ||
+ | $( "#lqg" ).append( fieldset.$element ); | ||
+ | |||
+ | // Check data | ||
+ | var checkAvailableData = function(identity,element){ | ||
+ | // console.log('2',languages) | ||
+ | console.log(identity+'a getElementId(): ',element.getElementId()) | ||
+ | console.log(identity+'b getData(): ',element.getData()) | ||
+ | console.log(identity+'c getValue(): ',element.getValue()) | ||
+ | } | ||
+ | |||
+ | $('#lqg-lemma').on('focusout',function() {checkAvailableData(1,lemma);}) | ||
+ | $('#lqg-languages').on('focusout',function() { checkAvailableData(2,languages);}); | ||
+ | checkAvailableData(2,languages); | ||
+ | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
/* ********************************************************************** */ | /* ********************************************************************** */ | ||
/* GENERATE QUERY STRING ************************************************ */ | /* GENERATE QUERY STRING ************************************************ */ | ||
// Current: Wikidata, Dbnary. | // Current: Wikidata, Dbnary. | ||
− | function | + | var generateQuery = function () { |
− | var | + | // getData from elements |
− | + | var selectedDropdown = function (group){ | |
− | var | + | var items = group.getMenu().items.filter(item=> item.selected==true ) |
− | + | return items[0]?items[0].data:null; | |
− | + | } | |
− | var | + | var selectedRadio = function (group){ |
− | + | var items = group.getMenu().items.filter(item=> item.selected==true ) | |
− | + | return items[0]?items[0].data:null; | |
− | + | } | |
− | + | var qid2value = function (arr,qid,field){ | |
− | + | field = field || 'label' | |
− | + | var match = arr.filter(item=> item.data==qid); | |
− | + | return match[0]?match[0][field]:null; | |
− | + | } | |
− | + | var form = { | |
− | + | lemma: lemma.getValue(),//string | |
− | + | pos: selectedDropdown(pos),//Qid | |
− | + | posLabel: qid2value(posArray,selectedDropdown(pos)),//noun | |
− | + | languages: languages.getValue(),//Qid | |
− | + | languagesISO1: qid2value(availableLanguages,languages.getValue(),'iso1'),//en | |
− | + | infoTypes: selectedDropdown(infoTypes),//string | |
− | + | translations: translations.getValue(),//Qid | |
− | + | limit: limit.isSelected(),//boolean | |
− | + | endpoints: endpoints.getValue(),//wikidata|dbnary | |
− | + | queryDisplay: queryDisplay.getValue(),//string | |
− | + | }; | |
− | + | console.log(form) | |
− | + | // console.log(form); | |
− | + | var query = form.queryDisplay; | |
− | + | // Identify suitable query | |
− | + | if (form.endpoints == 'wikidata' && form.translations != "null") { | |
− | + | form.infoTypes = form.lemma? 'translationLemma': 'translation'; | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
} | } | ||
− | + | if (form.endpoints == 'dbnary' && form.infoTypes == 'sensesDefinitionsExamples') { | |
− | if ( | + | form.infoTypes = 'sensesDefinitions'; |
− | |||
− | |||
− | |||
− | |||
} | } | ||
− | query = | + | query = queries[form.endpoints][form.infoTypes]; |
− | query = | + | |
− | query = query. | + | // Edit suitable query |
− | + | val = form.lemma ? queries[form.endpoints].values : '# no lemma provided'; | |
− | + | ||
− | + | query = query | |
− | + | .replace("VALUESTOBEADDEDHERE", val.replace("book", form.lemma)) | |
+ | .replace("WORD", form.lemma) | ||
+ | .replace("LNGCDE", form.languages)// Q | ||
+ | .replaceAll("GLWSSA", form.languagesISO1)// en | ||
+ | .replaceAll("LNGCDETRG", form.translations)// Q | ||
+ | .replace("POSTAGNM", form.posLabel) // noun | ||
+ | .replace("POSTAG", form.pos)// Q | ||
+ | .concat(form.limit?`\nLIMIT 100`:''); | ||
− | + | // Inject query | |
− | + | queryDisplay.setValue(query); | |
− | |||
− | |||
} | } | ||
Line 296: | Line 405: | ||
/* OPENS EXTERNAL QUERY SERVICE ***************************************** */ | /* OPENS EXTERNAL QUERY SERVICE ***************************************** */ | ||
// Current: Wikidata, Dbnary. Broken: lingualibre. | // Current: Wikidata, Dbnary. Broken: lingualibre. | ||
− | + | var runQuery = function () { | |
− | + | var endpointLabel = endpoints.getValue(), | |
− | + | queryText = queryDisplay.getValue(), | |
− | + | queryEncoded = encodeURIComponent(queryText), | |
− | + | baseEndpointUrl = | |
− | + | endpointLabel == 'wikidata'? 'https://query.wikidata.org/#' | |
− | + | :'http://kaiko.getalp.org/sparql?default-graph-uri=&query='; | |
− | + | if (queryText) { | |
− | + | window.open(baseEndpointUrl.concat(queryEncoded), '_blank'); | |
− | window.open( | ||
− | |||
} | } | ||
} | } | ||
+ | $('#lqg-run').on('click',function(){ runQuery(); }); | ||
+ | $('#lqg-generate').on('click',function(){ generateQuery(); }); |
Latest revision as of 11:15, 17 January 2022
/* ************************************************************************** */
/* Lexeme Queries Generator (lqg) ******************************************* */
// Description: Generates a form to search for words in Wikidata Lexeme and DBnary, a wiktionary-based wikibase. Returns a SPARQL query OR open tthat QUERY.
// Usage: [[Template:LexemeQueriesGenerator]]
// Usage: open [[Help:SPARQL 2#Tools]] > fill the form > click "Generate" or "Run !"
// Hack pad:https://jsfiddle.net/hugolpz/9jqg72u8/
/* ************************************************************************** */
// Author: Sina Ahmadi @sinaahmadi
// Code: https://github.com/sinaahmadi/sinaahmadi.github.io/blob/master/_posts/2021-10-04-sparql-query-generator-for-lexicographical-data.md
// Revamp: @hugolpz
// LICENSE: MIT
/* ************************************************************************** */
/* TODO ********************************************************************* * /
1) List of languages with recordings AND number of speakers
See base : https://lingualibre.org/wiki/LinguaLibre:List_of_languages
See nbSp : https://w.wiki/4ZB3
Finish hacking based on https://jsfiddle.net/hugolpz/rygo9s5b/
[x] getData from elements
[x] Identify suitable query
[x] Edit suitable query
[x] Run/Open Query
Improve
[ ] Query with federate SPARQL including LinguaLibre
[ ] Factorize SPARQL codes.
Add to LinguaLibre
[x] Add to [[User:Yug/OOUI]], [[Help:SPARQL 2]]
[x] Migrate to general space. : MediaWiki:LexemeQueriesGenerator.js ; MediaWiki:Common.js
Translate
[ ] Find a way to translate ?
MediaWiki Extension ?
- [x] Explore -> Nope.
*/
// Toolbox
/* ********************************************************************** */
/* QUERIES BASIS ******************************************************** */
// Current: Wikidata, Dbnary.
var queries = {
// Queries for Wikidata
wikidata :
{
values :`VALUES ?word {'book'@GLWSSA}`,
basic: `SELECT * WHERE {
VALUESTOBEADDEDHERE
?l a ontolex:LexicalEntry ;
dct:language wd:LNGCDE ;
ontolex:lexicalForm ?form ; #test
wikibase:lexicalCategory wd:POSTAG ;
wikibase:lemma ?lemma .
?form ontolex:representation ?word .
}`,
senses: `SELECT * WHERE {
VALUESTOBEADDEDHERE
?l a ontolex:LexicalEntry ;
dct:language wd:LNGCDE ;
ontolex:lexicalForm ?form ;
wikibase:lexicalCategory wd:POSTAG ;
wikibase:lemma ?lemma ;
ontolex:sense ?sense .
?form ontolex:representation ?word .
}`,
sensesDefinitions : `SELECT * WHERE {
VALUESTOBEADDEDHERE
?l a ontolex:LexicalEntry ;
dct:language wd:LNGCDE ;
wikibase:lemma ?lemma ;
ontolex:lexicalForm ?form ;
wikibase:lexicalCategory ?category ;
ontolex:sense ?sense .
?form ontolex:representation ?word .
?language wdt:P218 "GLWSSA" .
?sense skos:definition ?gloss .
FILTER EXISTS {?l ontolex:sense ?sense }
FILTER(LANG(?gloss) = "GLWSSA")
}`,
sensesDefinitionsExamples: `SELECT * WHERE {
VALUESTOBEADDEDHERE
?l a ontolex:LexicalEntry ;
dct:language wd:LNGCDE ;
wikibase:lemma ?lemma ;
ontolex:lexicalForm ?form ;
wikibase:lexicalCategory ?category ;
ontolex:sense ?sense .
?language wdt:P218 "GLWSSA" .
?form ontolex:representation ?word .
?sense skos:definition ?gloss .
OPTIONAL{
?l p:P5831 ?statement .
?statement ps:P5831 ?example .
}
FILTER EXISTS {?l ontolex:sense ?sense }
FILTER(LANG(?gloss) = "GLWSSA")
}`,
translation: `SELECT DISTINCT * WHERE {
?source dct:language wd:LNGCDE;
wikibase:lemma ?sourceLemma;
ontolex:sense [ wdt:P5137 ?sense ].
?target dct:language wd:LNGCDETRG;
wikibase:lemma ?targetLemma;
ontolex:sense [ wdt:P5137 ?sense ].
} ORDER BY ASC(UCASE(str(?sourceLemma)))`,
translationLemma : `SELECT DISTINCT * WHERE {
VALUESTOBEADDEDHERE
?source dct:language wd:LNGCDE;
wikibase:lemma ?sourceLemma;
ontolex:lexicalForm ?form ;
wikibase:lexicalCategory wd:POSTAG ;
ontolex:sense [ wdt:P5137 ?sense ].
?target dct:language wd:LNGCDETRG;
wikibase:lemma ?targetLemma;
ontolex:sense [ wdt:P5137 ?sense ].
?form ontolex:representation ?word .
} ORDER BY ASC(UCASE(str(?sourceLemma)))`
},
// Queries for Dbnary
dbnary : {
values: `VALUES ?label {'book'@GLWSSA}
VALUES ?pos {<http://www.lexinfo.net/ontology/2.0/lexinfo#POSTAGNM>}`,
basic: `SELECT * WHERE {VALUESTOBEADDEDHERE
?lexeme a ontolex:LexicalEntry ;
rdfs:label ?label ;
ontolex:canonicalForm ?form ;
lime:language ?lang ;
lexinfo:partOfSpeech ?pos .
FILTER(?lang = "GLWSSA")
}`,
senses: `SELECT * WHERE {
VALUESTOBEADDEDHERE
?lexeme a ontolex:LexicalEntry ;
rdfs:label ?label ;
ontolex:canonicalForm ?form ;
lime:language ?lang ;
lexinfo:partOfSpeech ?pos ;
ontolex:sense ?sense .
FILTER(?lang = "GLWSSA")
}`,
sensesDefinitions: `SELECT ?lexeme ?label ?pos ?sense ?definition
WHERE {
?sense a ontolex:LexicalSense ;
skos:definition ?def .
?def rdf:value ?definition .
FILTER(lang(?definition) = "GLWSSA")
{
SELECT * WHERE {VALUESTOBEADDEDHERE
?lexeme a ontolex:LexicalEntry ;
rdfs:label ?label ;
ontolex:canonicalForm ?form ;
lime:language ?lang ;
lexinfo:partOfSpeech ?pos ;
ontolex:sense ?sense .
FILTER(?lang = "GLWSSA")
}
}
}`,
sensesDefinitionsExamples: ``,
translation: ``,
translationLemma: ``
}
};
/* ********************************************************************** */
/* OOJS / OOUI ********************************************************** */
// Data & options
var baseLanguages = [ //Major languages: cmn, spa, eng, fra, ara, rus, hin, swa
// data https://w.wiki/4ZB3
{ data:'Q13955', ll:'', label:'ara — Arabic' },
{ data:'Q9610', ll:'', label:'ben — Bengali' },
{ data:'Q9192', ll:'Q113', label:'cmn — MandarinChinese' } ,
{ data:'Q188', ll:'', label:'deu — German' },
{ data:'Q1860', ll:'', label:'eng — English' },
{ data:'Q150', ll:'', label:'fra — French' },
{ data:'Q1568', ll:'', label:'hin — Hindi' },
{ data:'Q5287', ll:'', label:'jpn — Japanese' },
{ data:'Q1571', ll:'Q34', label:'mar — Marathi' },
{ data:'Q58635', ll:'', label:'pan — Punjabi' },
{ data:'Q5146', ll:'', label:'por — Portuguese' },
{ data:'Q7737', ll:'', label:'rus — Russian' },
{ data:'Q1321', ll:'', label:'spa — Spanish' },
],
availableLanguages = [
{ data:'Q13955',ll:'', iso1:'ar', label:'ara — Arabic' },
{ data:'Q9610',ll:'', iso1:'bn', label:'ben — Bengali' },
{ data:'Q12107', ll:'Q209', iso1:'br', label: 'bre: Breton' }, // lili
{ data:'Q9192', ll:'Q113', iso1:'zh', label:'cmn — MandarinChinese' } ,
{ data:'Q188',ll:'', iso1:'de', label:'deu — German' },
{ data:'Q1860',ll:'', iso1:'en', label:'eng — English' },
{ data:'Q150',ll:'', iso1:'fr', label:'fra — French' },
{ data:'Q33454',ll:'', iso1:'ff', label:'ful — Fula' },
{ data:'Q56475',ll:'', iso1:'ha', label:'hau — Hausa' },
{ data:'Q1568',ll:'', iso1:'hi', label:'hin — Hindi' },
{ data:'Q9240',ll:'', iso1:'id', label:'ind — Indonesian' },
{ data:'Q652',ll:'', iso1:'it', label:'ita — Italian' },
{ data:'Q33549',ll:'', iso1:'jv', label:'jav — Javanese' },
{ data:'Q5287',ll:'', iso1:'ja', label:'jpn — Japanese' },
{ data:'Q9176',ll:'', iso1:'ko', label:'kor — Korean' },
{ data:'Q1571',ll:'', iso1:'mr', label:'mar — Marathi' },
{ data:'Q9237',ll:'', iso1:'ms', label:'msa — Malay' },
{ data:'Q58635',ll:'', iso1:'pa', label:'pan — Punjabi' },
{ data:'Q5146',ll:'', iso1:'pt', label:'por — Portuguese' },
{ data:'Q7737',ll:'', iso1:'ru', label:'rus — Russian' },
{ data:'Q1321',ll:'', iso1:'es', label:'spa — Spanish' },
{ data:'Q5885',ll:'', iso1:'ta', label:'tam — Tamil' },
{ data:'Q8097',ll:'', iso1:'te', label:'tel — Telugu' },
{ data:'Q256',ll:'', iso1:'tr', label:'tur — Turkish' },
{ data:'Q1617',ll:'', iso1:'ur', label:'urd — Urdu' },
{ data:'Q9199',ll:'', iso1:'vi', label:'vie — Vietnamese' },
],
languagesArray = [ ...availableLanguages ],
posArray = [
{ data: 'Q1084', label: 'noun' },
{ data: 'Q36224', label: 'pronoun' },
{ data: 'Q24905', label: 'verb' },
{ data: 'Q380057', label: 'adverb' },
{ data: 'Q34698', label: 'adjective' },
{ data: 'Q21087400', label: 'quantitative'},
{ data: 'Q83034', label: 'interjection' },
{ data: 'Q4833830', label: 'preposition' },
{ data: 'Q103184', label: 'article' },
{ data: 'Q36484', label: 'conjunction' },
].map(o => new OO.ui.MenuOptionWidget({ data: o.data, label: o.label })),
infoTypesArray = [
{ data: 'basic', label: 'Just look !' },
{ data: 'senses', label: 'Sense' },
{ data: 'sensesDefinitions', label: 'Sense, definition' },
{ data: 'sensesDefinitionsExamples', label: 'Sense, definition and examples'},
].map(o => new OO.ui.MenuOptionWidget({ data: o.data, label: o.label })),
languagesArrayT = [ ...baseLanguages, ],
endpointsArray = [
{ data: 'wikidata', label: 'Wikidata', selected: true },
{ data: 'Dbnary', label: 'Dbnary' },
//{ data: 'LinguaLibre', label: 'LinguaLibre' },
];
// Elements
var lemma = new OO.ui.TextInputWidget({
id: 'lqg-lemma',
icon: 'search',
placeholder: 'books',
value: 'books',
label: 'Word to search',
}),
languages = new OO.ui.ComboBoxInputWidget({ // ComboBox
id: 'lqg-languages',
placeholder : 'Target language',
options: languagesArray,
menu: { filterFromInput: true, filterMode : 'substring' }
// value: 'Q34',
}),
pos = new OO.ui.DropdownWidget( {
id: 'lqg-pos',
label: 'Part-of-speech',
menu: { items: posArray }
}),
infoTypes = new OO.ui.DropdownWidget({
id: 'lqg-infoTypes',
label: 'Information to fetch ... (if any)',
menu: { items: infoTypesArray }
}),
translations = new OO.ui.ComboBoxInputWidget({ // ComboBox
id: 'lqg-translations',
placeholder : 'Translations to fetch ... (if any)',
options: languagesArrayT,
menu: { filterFromInput: true, filterMode : 'substring' }
}),
endpoints = new OO.ui.RadioSelectInputWidget( {
id: 'lqg-endpoints',
label: 'Radios buttons',
options: endpointsArray,
} ),
limit = new OO.ui.CheckboxInputWidget({ id: 'lqg-limit', selected: true, }),
limitLabel = new OO.ui.LabelWidget( { label: 'Limit to 100 (faster)' }),
queryDisplay = new OO.ui.MultilineTextInputWidget( {
id: 'lqg-queryDisplay',
//value: `# Query will appear here.\n\n\n\n\n\n`,
placeholder: `# Query will appear here.\n\n\n\n\n\n`,
multiline: true,
autosize: true,
minRows: 10,
maxRows: 20
} ),
generate = new OO.ui.ButtonWidget({
id: 'lqg-generate',
label: 'Generate',
}),
run = new OO.ui.ButtonWidget({
id: 'lqg-run',
label:'Run !',
});
// An example of a fieldset with horizontal layout.
var fieldset = new OO.ui.FieldsetLayout( {
label: 'Queries Generator for Wikidata Lexemes'
} );
fieldset.addItems( [
new OO.ui.FieldLayout(
new OO.ui.Widget( {
content: [
new OO.ui.HorizontalLayout( { items: [
lemma,
pos,
languages,
infoTypes,
translations,
]}),
new OO.ui.HorizontalLayout( { items: [
generate,
run,
limit,
limitLabel,
endpoints,
]}),
queryDisplay
]
}),{
label: 'Select the suitable values to build your Wikidata Lexemes Query.',
align: 'top'
})
]
);
$( "#lqg" ).append( fieldset.$element );
// Check data
var checkAvailableData = function(identity,element){
// console.log('2',languages)
console.log(identity+'a getElementId(): ',element.getElementId())
console.log(identity+'b getData(): ',element.getData())
console.log(identity+'c getValue(): ',element.getValue())
}
$('#lqg-lemma').on('focusout',function() {checkAvailableData(1,lemma);})
$('#lqg-languages').on('focusout',function() { checkAvailableData(2,languages);});
checkAvailableData(2,languages);
/* ********************************************************************** */
/* GENERATE QUERY STRING ************************************************ */
// Current: Wikidata, Dbnary.
var generateQuery = function () {
// getData from elements
var selectedDropdown = function (group){
var items = group.getMenu().items.filter(item=> item.selected==true )
return items[0]?items[0].data:null;
}
var selectedRadio = function (group){
var items = group.getMenu().items.filter(item=> item.selected==true )
return items[0]?items[0].data:null;
}
var qid2value = function (arr,qid,field){
field = field || 'label'
var match = arr.filter(item=> item.data==qid);
return match[0]?match[0][field]:null;
}
var form = {
lemma: lemma.getValue(),//string
pos: selectedDropdown(pos),//Qid
posLabel: qid2value(posArray,selectedDropdown(pos)),//noun
languages: languages.getValue(),//Qid
languagesISO1: qid2value(availableLanguages,languages.getValue(),'iso1'),//en
infoTypes: selectedDropdown(infoTypes),//string
translations: translations.getValue(),//Qid
limit: limit.isSelected(),//boolean
endpoints: endpoints.getValue(),//wikidata|dbnary
queryDisplay: queryDisplay.getValue(),//string
};
console.log(form)
// console.log(form);
var query = form.queryDisplay;
// Identify suitable query
if (form.endpoints == 'wikidata' && form.translations != "null") {
form.infoTypes = form.lemma? 'translationLemma': 'translation';
}
if (form.endpoints == 'dbnary' && form.infoTypes == 'sensesDefinitionsExamples') {
form.infoTypes = 'sensesDefinitions';
}
query = queries[form.endpoints][form.infoTypes];
// Edit suitable query
val = form.lemma ? queries[form.endpoints].values : '# no lemma provided';
query = query
.replace("VALUESTOBEADDEDHERE", val.replace("book", form.lemma))
.replace("WORD", form.lemma)
.replace("LNGCDE", form.languages)// Q
.replaceAll("GLWSSA", form.languagesISO1)// en
.replaceAll("LNGCDETRG", form.translations)// Q
.replace("POSTAGNM", form.posLabel) // noun
.replace("POSTAG", form.pos)// Q
.concat(form.limit?`\nLIMIT 100`:'');
// Inject query
queryDisplay.setValue(query);
}
/* ********************************************************************** */
/* OPENS EXTERNAL QUERY SERVICE ***************************************** */
// Current: Wikidata, Dbnary. Broken: lingualibre.
var runQuery = function () {
var endpointLabel = endpoints.getValue(),
queryText = queryDisplay.getValue(),
queryEncoded = encodeURIComponent(queryText),
baseEndpointUrl =
endpointLabel == 'wikidata'? 'https://query.wikidata.org/#'
:'http://kaiko.getalp.org/sparql?default-graph-uri=&query=';
if (queryText) {
window.open(baseEndpointUrl.concat(queryEncoded), '_blank');
}
}
$('#lqg-run').on('click',function(){ runQuery(); });
$('#lqg-generate').on('click',function(){ generateQuery(); });