Help

Difference between revisions of "SPARQL for maintenance"

SPARQL for maintenance gather some SPARQL queries which help to monitors errors and bring fix to our resources.

Line 295: Line 295:
 
     bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
 
     bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
 
   }  
 
   }  
 +
}
 +
</query>
 +
|}
 +
 +
=== ✅ Languages → language with media type (P24) is video (Q88890) ===
 +
{| style="width:100%"
 +
|- style="vertical-align:top;"
 +
|style="padding: 0 3em;width:60%"|
 +
<syntaxhighlight lang="sparql">
 +
SELECT ?id ?idLabel WHERE {
 +
  ?id prop:P2 entity:Q4 .
 +
  ?id prop:P24 entity:Q88890 .
 +
  SERVICE wikibase:label { bd:serviceParam wikibase:language "fr,en". }
 +
}
 +
</syntaxhighlight>
 +
||
 +
<query _pagination="5">
 +
SELECT ?id ?idLabel WHERE {
 +
  ?id prop:P2 entity:Q4 .
 +
  ?id prop:P24 entity:Q88890 .
 +
  SERVICE wikibase:label { bd:serviceParam wikibase:language "fr,en". }
 +
}
 +
</query>
 +
|}
 +
 +
=== ✅ Languages with media type video → written word (P7), url (P3) speakers (P5) ===
 +
{| style="width:100%"
 +
|- style="vertical-align:top;"
 +
|style="padding: 0 3em;width:60%"|
 +
<syntaxhighlight lang="sparql">
 +
SELECT ?word ?filename ?speaker WHERE {
 +
  ?record prop:P4 entity:Q99628 .
 +
  ?record prop:P2 entity:Q2 .
 +
  ?record prop:P7 ?word .
 +
  ?record prop:P3 ?filename .
 +
  ?record prop:P5 ?speakerItem .
 +
  ?speakerItem rdfs:label
 +
  ?speaker filter ( lang( ?speaker ) = "en" ) .
 +
}
 +
</syntaxhighlight>
 +
||
 +
<query _pagination="5">
 +
SELECT ?word ?filename ?speaker WHERE {
 +
  ?record prop:P4 entity:Q99628 .
 +
  ?record prop:P2 entity:Q2 .
 +
  ?record prop:P7 ?word .
 +
  ?record prop:P3 ?filename .
 +
  ?record prop:P5 ?speakerItem .
 +
  ?speakerItem rdfs:label
 +
  ?speaker filter ( lang( ?speaker ) = "en" ) .
 
}
 
}
 
</query>
 
</query>

Revision as of 10:02, 28 July 2022


Speakers

✅ Speakers → Username containing reserved sign `-`

See also T297635
SELECT *
WHERE {
  ?id prop:P2 entity:Q3 .
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
    ?id rdfs:label ?name .
  }
  BIND (regex(STR(?name),"-") AS ?has_separator)
}
ORDER BY DESC (?has_separator)
... Loading ...
SELECT ?has_separator (COUNT(?has_separator) AS ?found)
WHERE {
  ?id prop:P2 entity:Q3 .
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
    ?id rdfs:label ?name .
  }
  
  BIND (regex(STR(?name),"-") AS ?has_separator)
  # filter( regex(?name, "-" ))
}
#ORDER BY DESC (?has_separator)
GROUP BY (?has_separator)
... Loading ...

✅ Speakers → Speakers of `zho` to change to `cmn`

Should be maintained to 0.
SELECT ?langLabel ?isoLabel (SUBSTR(STR(?speaker),32) AS ?speaker) ?speakerLabel
WHERE {
  ?speaker prop:P4 entity:Q130 .   # Filter: P4 language is Q130 zho  ;
  ?speaker prop:P2 entity:Q3 .   # Filter: speakers
  ?speaker prop:P4 ?lang .
  ?lang  prop:P13 ?iso .
  # Add Labels
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . } 
} ORDER BY ASC(?isoLabel)
# Helper: Chinese writing zho:Q130/wd:Q7850 ; Chinese mandarin cmn:Q113/wd:Q9192 .
... Loading ...

✅ Speakers → Undefined place of residence

The following may need precision.
SELECT ?item ?itemLabel ?filledGenderLabel
WHERE {
  ?item prop:P2 entity:Q3 .   # Filter: is Q3 `speaker`
  ?item prop:P14  .   # Filter: missing P14 `place of residence`   <-------------------------------------
  # Add labels to each variable used.
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
  } 
}
... Loading ...


✅ Speakers → Undefined gender

The following may need a merge
SELECT ?item ?itemLabel ?filledGenderLabel
WHERE {
  ?item prop:P2 entity:Q3 .
  { ?item prop:P8 entity:Q608982 . }   # Filter: 'gender' is 'agender'
  UNION 
  { ?item prop:P8 entity:Q710460 . }    # Filter: 'gender' is 'not documented'
  # UNION 
  # { ?item prop:P8 entity:? . }    # Filter: 'gender' is missing
  ?item prop:P8 ?filledGender .
  # Add labels to each variable used.
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
  } 
}
... Loading ...

Recordings

✅ Recordings → With ISO-639-3 `zho` to change to `cmn`

SELECT ?langLabel ?isoLabel (SUBSTR(STR(?audio),32) AS ?Audio) ?audioLabel
WHERE {
  ?audio prop:P4 entity:Q130 .   # Filter: P4 language is Q130 zho  ;
  ?audio prop:P2 entity:Q2 .   # Filter: audios
  ?audio prop:P4 ?lang .
  ?lang  prop:P13 ?iso .
  # Add Labels
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . } 
} ORDER BY ASC(?isoLabel)
# Helper: Chinese writing zho:Q130/wd:Q7850 ; Chinese mandarin cmn:Q113/wd:Q9192 .
... Loading ...


SELECT ?isoLabel (COUNT(?iso) AS ?found)
WHERE {
  ?audio prop:P4 entity:Q130 .   # Filter: P4 language is Q130 zho  ;
  ?audio prop:P2 entity:Q2 .   # Filter: audios
  ?audio prop:P4 ?lang .
  ?lang  prop:P13 ?iso .
  # Add Labels
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" . } 
} GROUP BY ?isoLabel
# Helper: Chinese writing zho:Q130/wd:Q7850 ; Chinese mandarin cmn:Q113/wd:Q9192
... Loading ...


Languages

✅ Languages → duplicata by Wikidata ID

Duplicated languages can be merged.
SELECT ?idWD (COUNT(?item) AS ?count) (GROUP_CONCAT(?item) AS ?items)
WHERE {
  ?item prop:P2 entity:Q4 ;
        prop:P12 ?idWD .
}
GROUP BY ?idWD
HAVING ( ?count > 1 )
... Loading ...

✅ Languages → languages by type (if it exists)

Note: query to improve.
SELECT ?language ?languageLabel ?code ?typeLabel
WHERE {
  ?language prop:P2 entity:Q4 .
  OPTIONAL { ?language prop:P13 ?code } .
  OPTIONAL { 
    ?language rdfs:label ?languageLabel .
    FILTER( lang(?languageLabel) = "en" ) } .
  OPTIONAL { ?language prop:P24 ?type } .
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
  } 
}
... Loading ...

✅ Languages → language with media type (P24) is video (Q88890)

SELECT ?id ?idLabel WHERE { 
  ?id prop:P2 entity:Q4 . 
  ?id prop:P24 entity:Q88890 . 
  SERVICE wikibase:label { bd:serviceParam wikibase:language "fr,en". } 
}
... Loading ...

✅ Languages with media type video → written word (P7), url (P3) speakers (P5)

SELECT ?word ?filename ?speaker WHERE { 
  ?record prop:P4 entity:Q99628 . 
  ?record prop:P2 entity:Q2 . 
  ?record prop:P7 ?word . 
  ?record prop:P3 ?filename . 
  ?record prop:P5 ?speakerItem . 
  ?speakerItem rdfs:label 
  ?speaker filter ( lang( ?speaker ) = "en" ) . 
}
... Loading ...

Other

All properties with their Wikidata equivalent (if it exists)

SELECT * WHERE {
  { ?propLili rdf:type owl:ObjectProperty }
  UNION
  { ?propLili rdf:type owl:DatatypeProperty }
  OPTIONAL { 
    ?propLiliClaim <http://wikiba.se/ontology#directClaim> ?propLili ;
       prop:P12 ?propWD
  }
}
... Loading ...

See also

Lingua Libre technical helps
Template {{Speakers category}} • {{Recommended lists}} • {{To iso 639-2}} • {{To iso 639-3}} • {{Userbox-records}} • {{Bot steps}}
Audio files How to create a frequency list?Convert files formatsDenoise files with SoXRename and mass rename
Bots Help:BotsLinguaLibre:BotHelp:Log in to Lingua Libre with PywikibotLingua Libre Bot (gh) • OlafbotPamputtBotDragons Bot (gh)
MediaWiki MediaWiki: Help:Documentation opérationelle MediawikiHelp:Database structureHelp:CSSHelp:RenameHelp:OAuthLinguaLibre:User rights (rate limit) • Module:Lingua Libre record & {{Lingua Libre record}}JS scripts: MediaWiki:Common.jsLastAudios.jsSoundLibrary.jsItemsSugar.jsLexemeQueriesGenerator.js (pad) • Sparql2data.js (pad) • LanguagesGallery.js (pad) • Gadgets: Gadget-LinguaImporter.jsGadget-Demo.jsGadget-RecentNonAudio.jsLiLiZip.js
Queries Help:APIsHelp:SPARQLSPARQL (intermediate) (stub) • SPARQL for lexemes (stub) • SPARQL for maintenanceLingualibre:Wikidata (stub) • Help:SPARQL (HAL)
Reuses Help:Download datasetsHelp:Embed audio in HTML
Unstable & tests Help:SPARQL/test
Categories Category:Technical reports