locale: Add translations for layout names

Pulled in some Unix-specific code from https://github.com/rust-locale/locale_config to reduce dependencies.

First reason to reduce dependencies: gettext-rs is not in Debian. Copying gettext-sys might have made sense, but the interface is somewhat confusing.

For translating a couple identifiers, detection and some hand-rolled hash map is all that is needed, and the option to move to gettext later remains.

locale_config has been stripped of the lazy_static dependency, which, messing with the strtup sequence, might be a source of debugging woes. Plus setting language once in the beginning is somewhat inflexible regarding runtime changes.
This commit is contained in:
Dorota Czaplejewicz
2019-11-07 18:43:02 +00:00
parent 47c4119ab7
commit 9f88660d99
10 changed files with 761 additions and 6 deletions

50
Cargo.lock generated
View File

@ -1,5 +1,13 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "aho-corasick"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "atk-sys"
version = "0.7.0"
@ -234,6 +242,11 @@ name = "maplit"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "memchr"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "memmap"
version = "0.7.0"
@ -289,6 +302,23 @@ dependencies = [
"proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rs"
version = "0.1.0"
@ -300,6 +330,7 @@ dependencies = [
"gtk 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"gtk-sys 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"maplit 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_yaml 0.8.9 (registry+https://github.com/rust-lang/crates.io-index)",
"xkbcommon 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -344,11 +375,24 @@ dependencies = [
"unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thread_local"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unicode-xid"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "utf8-ranges"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi"
version = "0.3.8"
@ -386,6 +430,7 @@ dependencies = [
]
[metadata]
"checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"
"checksum atk-sys 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c7017e53393e713212aed7aea336b6553be4927f58c37070a56c2fe3d107e489"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum cairo-rs 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dd940f0d609699e343ef71c4af5f66423afbf30d666f796dabd8fd15229cf5b6"
@ -408,17 +453,22 @@ dependencies = [
"checksum libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)" = "34fcd2c08d2f832f376f4173a231990fa5aef4e99fb569867318a227ef4c06ba"
"checksum linked-hash-map 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ae91b68aebc4ddb91978b11a1b02ddd8602a05ec19002801c5666000e05e0f83"
"checksum maplit 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
"checksum pango 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4c2cb169402a3eb1ba034a7cc7d95b8b1c106e9be5ba4be79a5a93dc1a2795f4"
"checksum pango-sys 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d6eb49268e69dd0c1da5d3001a61aac08e2e9d2bfbe4ae4b19b9963c998f6453"
"checksum pkg-config 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)" = "72d5370d90f49f70bd033c3d75e87fc529fbfff9d6f7cccef07d6170079d91ea"
"checksum proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afdc77cc74ec70ed262262942ebb7dac3d479e9e5cfa2da1841c0806f6cdabcc"
"checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe"
"checksum regex 1.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "d9d8297cc20bbb6184f8b45ff61c8ee6a9ac56c156cec8e38c3e5084773c44ad"
"checksum regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716"
"checksum serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)" = "9796c9b7ba2ffe7a9ce53c2287dfc48080f4b2b362fcc245a259b3a7201119dd"
"checksum serde_derive 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)" = "4b133a43a1ecd55d4086bd5b4dc6c1751c68b1bfbeba7a5040442022c7e7c02e"
"checksum serde_yaml 0.8.9 (registry+https://github.com/rust-lang/crates.io-index)" = "38b08a9a90e5260fe01c6480ec7c811606df6d3a660415808c3c3fa8ed95b582"
"checksum syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "66850e97125af79138385e9b88339cbcd037e3f28ceab8c5ad98e64f0f1f80bf"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
"checksum utf8-ranges 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba"
"checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@ -5,6 +5,7 @@ version = "0.1.0"
[dependencies]
bitflags = "1.0.*"
maplit = "1.0.*"
regex = "1.1.*"
serde = { version = "1.0.*", features = ["derive"] }
serde_yaml = "0.8.*"
xkbcommon = { version = "0.4.*", features = ["wayland"] }

8
data/langs/en-US.txt Normal file
View File

@ -0,0 +1,8 @@
us English (US)
de German
el Greek
es Spanish
it Italian
jp+kana Japanese (kana)
nb Norwegian

0
data/langs/pl-PL.txt Normal file
View File

1
debian/control vendored
View File

@ -18,6 +18,7 @@ Build-Depends:
librust-gtk+v3-22-dev (>= 0.5),
librust-gtk-sys-dev,
librust-maplit-1-dev (>= 1.0),
librust-regex-1-dev (>= 1.1),
librust-serde-derive-1-dev (>= 1.0),
librust-serde-yaml-0.8-dev (>= 0.8),
librust-xkbcommon-0.4+wayland-dev (>= 0.4),

View File

@ -8,6 +8,7 @@ extern crate gtk_sys;
#[allow(unused_imports)]
#[macro_use] // only for tests
extern crate maplit;
extern crate regex;
extern crate serde;
extern crate xkbcommon;
@ -17,6 +18,8 @@ pub mod float_ord;
pub mod imservice;
mod keyboard;
mod layout;
mod locale;
mod locale_config;
mod outputs;
mod popover;
mod resources;

39
src/locale.rs Normal file
View File

@ -0,0 +1,39 @@
/*! Locale-specific functions */
use std::cmp;
use std::ffi::CString;
mod c {
use std::os::raw::c_char;
#[allow(non_camel_case_types)]
pub type c_int = i32;
#[no_mangle]
extern "C" {
// from libc
pub fn strcoll(cs: *const c_char, ct: *const c_char) -> c_int;
}
}
fn cstring_safe(s: &str) -> CString {
CString::new(s)
.unwrap_or(CString::new("").unwrap())
}
pub fn compare_current_locale(a: &str, b: &str) -> cmp::Ordering {
let a = cstring_safe(a);
let b = cstring_safe(b);
let a = a.as_ptr();
let b = b.as_ptr();
let result = unsafe { c::strcoll(a, b) };
if result == 0 {
cmp::Ordering::Equal
} else if result > 0 {
cmp::Ordering::Greater
} else if result < 0 {
cmp::Ordering::Less
} else {
unreachable!()
}
}

541
src/locale_config.rs Normal file
View File

@ -0,0 +1,541 @@
/*! Locale detection and management.
* Based on https://github.com/rust-locale/locale_config
*
* Ready for deletion/replacement once Debian starts packaging this,
* although this version doesn't need lazy_static.
*
* Copyright (c) 20162019 Jan Hudec <bulb@ucw.cz>
Copyright (c) 2016 A.J. Gardner <aaron.j.gardner@gmail.com>
Copyright (c) 2019, Bastien Orivel <eijebong@bananium.fr>
Copyright (c) 2019, Igor Gnatenko <i.gnatenko.brain@gmail.com>
Copyright (c) 2019, Sophie Tauchert <999eagle@999eagle.moe>
*/
use regex::Regex;
use std::borrow::Cow;
use std::env;
/// Errors that may be returned by `locale_config`.
#[derive(Copy,Clone,Debug,PartialEq,Eq)]
pub enum Error {
/// Provided definition was not well formed.
///
/// This is returned when provided configuration string does not match even the rather loose
/// definition for language range from [RFC4647] or the composition format used by `Locale`.
///
/// [RFC4647]: https://www.rfc-editor.org/rfc/rfc4647.txt
NotWellFormed,
/// Placeholder for adding more errors in future. **Do not match!**.
__NonExhaustive,
}
impl ::std::fmt::Display for Error {
fn fmt(&self, out: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
use ::std::error::Error;
out.write_str(self.description())
}
}
impl ::std::error::Error for Error {
fn description(&self) -> &str {
match self {
&Error::NotWellFormed => "Language tag is not well-formed.",
// this is exception: here we do want exhaustive match so we don't publish version with
// missing descriptions by mistake.
&Error::__NonExhaustive => panic!("Placeholder error must not be instantiated!"),
}
}
}
/// Convenience Result alias.
type Result<T> = ::std::result::Result<T, Error>;
/// Iterator over `LanguageRange`s for specific category in a `Locale`
///
/// Returns `LanguageRange`s in the `Locale` that are applicable to provided category. The tags
/// are returned in order of preference, which means the category-specific ones first and then
/// the generic ones.
///
/// The iterator is guaranteed to return at least one value.
pub struct TagsFor<'a, 'c> {
src: &'a str,
tags: std::str::Split<'a, &'static str>,
category: Option<&'c str>,
}
impl<'a, 'c> Iterator for TagsFor<'a, 'c> {
type Item = LanguageRange<'a>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(cat) = self.category {
while let Some(s) = self.tags.next() {
if s.starts_with(cat) && s[cat.len()..].starts_with("=") {
return Some(
LanguageRange { language: Cow::Borrowed(&s[cat.len()+1..]) });
}
}
self.category = None;
self.tags = self.src.split(",");
}
while let Some(s) = self.tags.next() {
if s.find('=').is_none() {
return Some(
LanguageRange{ language: Cow::Borrowed(s) });
}
}
return None;
}
}
/// Language and culture identifier.
///
/// This object holds a [RFC4647] extended language range.
///
/// The internal data may be owned or shared from object with lifetime `'a`. The lifetime can be
/// extended using the `into_static()` method, which internally clones the data as needed.
///
/// # Syntax
///
/// The range is composed of `-`-separated alphanumeric subtags, possibly replaced by `*`s. It
/// might be empty.
///
/// In agreement with [RFC4647], this object only requires that the tag matches:
///
/// ```ebnf
/// language_tag = (alpha{1,8} | "*")
/// ("-" (alphanum{1,8} | "*"))*
/// ```
///
/// The exact interpretation is up to the downstream localization provider, but it expected that
/// it will be matched against a normalized [RFC5646] language tag, which has the structure:
///
/// ```ebnf
/// language_tag = language
/// ("-" script)?
/// ("-" region)?
/// ("-" variant)*
/// ("-" extension)*
/// ("-" private)?
///
/// language = alpha{2,3} ("-" alpha{3}){0,3}
///
/// script = aplha{4}
///
/// region = alpha{2}
/// | digit{3}
///
/// variant = alphanum{5,8}
/// | digit alphanum{3}
///
/// extension = [0-9a-wyz] ("-" alphanum{2,8})+
///
/// private = "x" ("-" alphanum{1,8})+
/// ```
///
/// * `language` is an [ISO639] 2-letter or, where not defined, 3-letter code. A code for
/// macro-language might be followed by code of specific dialect.
/// * `script` is an [ISO15924] 4-letter code.
/// * `region` is either an [ISO3166] 2-letter code or, for areas other than countries, [UN M.49]
/// 3-digit numeric code.
/// * `variant` is a string indicating variant of the language.
/// * `extension` and `private` define additional options. The private part has same structure as
/// the Unicode [`-u-` extension][u_ext]. Available options are documented for the facets that
/// use them.
///
/// The values obtained by inspecting the system are normalized according to those rules.
///
/// The content will be case-normalized as recommended in [RFC5646] §2.1.1, namely:
///
/// * `language` is written in lowercase,
/// * `script` is written with first capital,
/// * `country` is written in uppercase and
/// * all other subtags are written in lowercase.
///
/// When detecting system configuration, additional options that may be generated under the
/// [`-u-` extension][u_ext] currently are:
///
/// * `cf` — Currency format (`account` for parenthesized negative values, `standard` for minus
/// sign).
/// * `fw` — First day of week (`mon` to `sun`).
/// * `hc` — Hour cycle (`h12` for 112, `h23` for 023).
/// * `ms` — Measurement system (`metric` or `ussystem`).
/// * `nu` — Numbering system—only decimal systems are currently used.
/// * `va` — Variant when locale is specified in Unix format and the tag after `@` does not
/// correspond to any variant defined in [Language subtag registry].
///
/// And under the `-x-` extension, following options are defined:
///
/// * `df` — Date format:
///
/// * `iso`: Short date should be in ISO format of `yyyy-MM-dd`.
///
/// For example `-df-iso`.
///
/// * `dm` — Decimal separator for monetary:
///
/// Followed by one or more Unicode codepoints in hexadecimal. For example `-dm-002d` means to
/// use comma.
///
/// * `ds` — Decimal separator for numbers:
///
/// Followed by one or more Unicode codepoints in hexadecimal. For example `-ds-002d` means to
/// use comma.
///
/// * `gm` — Group (thousand) separator for monetary:
///
/// Followed by one or more Unicode codepoints in hexadecimal. For example `-dm-00a0` means to
/// use non-breaking space.
///
/// * `gs` — Group (thousand) separator for numbers:
///
/// Followed by one or more Unicode codepoints in hexadecimal. For example `-ds-00a0` means to
/// use non-breaking space.
///
/// * `ls` — List separator:
///
/// Followed by one or more Unicode codepoints in hexadecimal. For example, `-ds-003b` means to
/// use a semicolon.
///
/// [RFC5646]: https://www.rfc-editor.org/rfc/rfc5646.txt
/// [RFC4647]: https://www.rfc-editor.org/rfc/rfc4647.txt
/// [ISO639]: https://en.wikipedia.org/wiki/ISO_639
/// [ISO15924]: https://en.wikipedia.org/wiki/ISO_15924
/// [ISO3166]: https://en.wikipedia.org/wiki/ISO_3166
/// [UN M.49]: https://en.wikipedia.org/wiki/UN_M.49
/// [u_ext]: http://www.unicode.org/reports/tr35/#u_Extension
/// [Language subtag registry]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
#[derive(Clone,Debug,Eq,Hash,PartialEq)]
pub struct LanguageRange<'a> {
language: Cow<'a, str>
}
impl<'a> LanguageRange<'a> {
/// Return LanguageRange for the invariant locale.
///
/// Invariant language is identified simply by empty string.
pub fn invariant() -> LanguageRange<'static> {
LanguageRange { language: Cow::Borrowed("") }
}
/// Create language tag from Unix/Linux/GNU locale tag.
///
/// Unix locale tags have the form
///
/// > *language* [ `_` *region* ] [ `.` *encoding* ] [ `@` *variant* ]
///
/// The *language* and *region* have the same format as RFC5646. *Encoding* is not relevant
/// here, since Rust always uses Utf-8. That leaves *variant*, which is unfortunately rather
/// free-form. So this function will translate known variants to corresponding RFC5646 subtags
/// and represent anything else with Unicode POSIX variant (`-u-va-`) extension.
///
/// Note: This function is public here for benefit of applications that may come across this
/// kind of tags from other sources than system configuration.
pub fn from_unix(s: &str) -> Result<LanguageRange<'static>> {
let unix_tag_regex = Regex::new(r"(?ix) ^
(?P<language> [[:alpha:]]{2,3} )
(?: _ (?P<region> [[:alpha:]]{2} | [[:digit:]]{3} ))?
(?: \. (?P<encoding> [0-9a-zA-Z-]{1,20} ))?
(?: @ (?P<variant> [[:alnum:]]{1,20} ))?
$ ").unwrap();
let unix_invariant_regex = Regex::new(r"(?ix) ^
(?: c | posix )
(?: \. (?: [0-9a-zA-Z-]{1,20} ))?
$ ").unwrap();
if let Some(caps) = unix_tag_regex.captures(s) {
let src_variant = caps.name("variant").map(|m| m.as_str()).unwrap_or("").to_ascii_lowercase();
let mut res = caps.name("language").map(|m| m.as_str()).unwrap().to_ascii_lowercase();
let region = caps.name("region").map(|m| m.as_str()).unwrap_or("");
let mut script = "";
let mut variant = "";
let mut uvariant = "";
match src_variant.as_ref() {
// Variants seen in the wild in GNU LibC (via http://lh.2xlibre.net/) or in Debian
// GNU/Linux Stretch system. Treatment of things not found in RFC5646 subtag registry
// (http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry)
// or CLDR according to notes at https://wiki.openoffice.org/wiki/LocaleMapping.
// Dialects:
// aa_ER@saaho - NOTE: Can't be found under that name in RFC5646 subtag registry,
// but there is language Saho with code ssy, which is likely that thing.
"saaho" if res == "aa" => res = String::from("ssy"),
// Scripts:
// @arabic
"arabic" => script = "Arab",
// @cyrillic
"cyrl" => script = "Cyrl",
"cyrillic" => script = "Cyrl",
// @devanagari
"devanagari" => script = "Deva",
// @hebrew
"hebrew" => script = "Hebr",
// tt@iqtelif
// Neither RFC5646 subtag registry nor CLDR knows anything about this, but as best
// as I can tell it is Tatar name for Latin (default is Cyrillic).
"iqtelif" => script = "Latn",
// @Latn
"latn" => script = "Latn",
// @latin
"latin" => script = "Latn",
// en@shaw
"shaw" => script = "Shaw",
// Variants:
// sr@ijekavianlatin
"ijekavianlatin" => {
script = "Latn";
variant = "ijekavsk";
},
// sr@ije
"ije" => variant = "ijekavsk",
// sr@ijekavian
"ijekavian" => variant = "ijekavsk",
// ca@valencia
"valencia" => variant = "valencia",
// Currencies:
// @euro - NOTE: We follow suite of Java and Openoffice and ignore it, because it
// is default for all locales where it sometimes appears now, and because we use
// explicit currency in monetary formatting anyway.
"euro" => {},
// Collation:
// gez@abegede - NOTE: This is collation, but CLDR does not have any code for it,
// so we for the moment leave it fall through as -u-va- instead of -u-co-.
// Anything else:
// en@boldquot, en@quot, en@piglatin - just randomish stuff
// @cjknarrow - beware, it's gonna end up as -u-va-cjknarro due to lenght limit
s if s.len() <= 8 => uvariant = &*s,
s => uvariant = &s[0..8], // the subtags are limited to 8 chars, but some are longer
};
if script != "" {
res.push('-');
res.push_str(script);
}
if region != "" {
res.push('-');
res.push_str(&*region.to_ascii_uppercase());
}
if variant != "" {
res.push('-');
res.push_str(variant);
}
if uvariant != "" {
res.push_str("-u-va-");
res.push_str(uvariant);
}
return Ok(LanguageRange {
language: Cow::Owned(res)
});
} else if unix_invariant_regex.is_match(s) {
return Ok(LanguageRange::invariant())
} else {
return Err(Error::NotWellFormed);
}
}
}
impl<'a> AsRef<str> for LanguageRange<'a> {
fn as_ref(&self) -> &str {
self.language.as_ref()
}
}
/// Locale configuration.
///
/// Users may accept several languages in some order of preference and may want to use rules from
/// different culture for some particular aspect of the program behaviour, and operating systems
/// allow them to specify this (to various extent).
///
/// The `Locale` objects represent the user configuration. They contain:
///
/// - The primary `LanguageRange`.
/// - Optional category-specific overrides.
/// - Optional fallbacks in case data (usually translations) for the primary language are not
/// available.
///
/// The set of categories is open-ended. The `locale` crate uses five well-known categories
/// `messages`, `numeric`, `time`, `collate` and `monetary`, but some systems define additional
/// ones (GNU Linux has additionally `paper`, `name`, `address`, `telephone` and `measurement`) and
/// these are provided in the user default `Locale` and other libraries can use them.
///
/// `Locale` is represented by a `,`-separated sequence of tags in `LanguageRange` syntax, where
/// all except the first one may be preceded by category name and `=` sign.
///
/// The first tag indicates the default locale, the tags prefixed by category names indicate
/// _overrides_ for those categories and the remaining tags indicate fallbacks.
///
/// Note that a syntactically valid value of HTTP `Accept-Language` header is a valid `Locale`. Not
/// the other way around though due to the presence of category selectors.
// TODO: Interning
#[derive(Clone,Debug,Eq,Hash,PartialEq)]
pub struct Locale {
// TODO: Intern the string for performance reasons
// XXX: Store pre-split to LanguageTags?
inner: String,
}
impl Locale {
/// Construct invariant locale.
///
/// Invariant locale is represented simply with empty string.
pub fn invariant() -> Locale {
Locale::from(LanguageRange::invariant())
}
/// Append fallback language tag.
///
/// Adds fallback to the end of the list.
pub fn add(&mut self, tag: &LanguageRange) {
for i in self.inner.split(',') {
if i == tag.as_ref() {
return; // don't add duplicates
}
}
self.inner.push_str(",");
self.inner.push_str(tag.as_ref());
}
/// Append category override.
///
/// Appending new override for a category that already has one will not replace the existing
/// override. This might change in future.
pub fn add_category(&mut self, category: &str, tag: &LanguageRange) {
if self.inner.split(',').next().unwrap() == tag.as_ref() {
return; // don't add useless override equal to the primary tag
}
for i in self.inner.split(',') {
if i.starts_with(category) &&
i[category.len()..].starts_with("=") &&
&i[category.len() + 1..] == tag.as_ref() {
return; // don't add duplicates
}
}
self.inner.push_str(",");
self.inner.push_str(category);
self.inner.push_str("=");
self.inner.push_str(tag.as_ref());
}
/// Iterate over `LanguageRange`s in this `Locale` applicable to given category.
///
/// Returns `LanguageRange`s in the `Locale` that are applicable to provided category. The tags
/// are returned in order of preference, which means the category-specific ones first and then
/// the generic ones.
///
/// The iterator is guaranteed to return at least one value.
pub fn tags_for<'a, 'c>(&'a self, category: &'c str) -> TagsFor<'a, 'c> {
let mut tags = self.inner.split(",");
while let Some(s) = tags.clone().next() {
if s.starts_with(category) && s[category.len()..].starts_with("=") {
return TagsFor {
src: self.inner.as_ref(),
tags: tags,
category: Some(category),
};
}
tags.next();
}
return TagsFor {
src: self.inner.as_ref(),
tags: self.inner.split(","),
category: None,
};
}
}
/// Locale is specified by a string tag. This is the way to access it.
// FIXME: Do we want to provide the full string representation? We would have it as single string
// then.
impl AsRef<str> for Locale {
fn as_ref(&self) -> &str {
self.inner.as_ref()
}
}
impl<'a> From<LanguageRange<'a>> for Locale {
fn from(t: LanguageRange<'a>) -> Locale {
Locale {
inner: t.language.into_owned(),
}
}
}
fn tag(s: &str) -> Result<LanguageRange> {
LanguageRange::from_unix(s)
}
// TODO: Read /etc/locale.alias
fn tag_inv(s: &str) -> LanguageRange {
tag(s).unwrap_or(LanguageRange::invariant())
}
pub fn system_locale() -> Option<Locale> {
// LC_ALL overrides everything
if let Ok(all) = env::var("LC_ALL") {
if let Ok(t) = tag(all.as_ref()) {
return Some(Locale::from(t));
}
}
// LANG is default
let mut loc =
if let Ok(lang) = env::var("LANG") {
Locale::from(tag_inv(lang.as_ref()))
} else {
Locale::invariant()
};
// category overrides
for &(cat, var) in [
("ctype", "LC_CTYPE"),
("numeric", "LC_NUMERIC"),
("time", "LC_TIME"),
("collate", "LC_COLLATE"),
("monetary", "LC_MONETARY"),
("messages", "LC_MESSAGES"),
("paper", "LC_PAPER"),
("name", "LC_NAME"),
("address", "LC_ADDRESS"),
("telephone", "LC_TELEPHONE"),
("measurement", "LC_MEASUREMENT"),
].iter() {
if let Ok(val) = env::var(var) {
if let Ok(tag) = tag(val.as_ref())
{
loc.add_category(cat, &tag);
}
}
}
// LANGUAGE defines fallbacks
if let Ok(langs) = env::var("LANGUAGE") {
for i in langs.split(':') {
if i != "" {
if let Ok(tag) = tag(i) {
loc.add(&tag);
}
}
}
}
if loc.as_ref() != "" {
return Some(loc);
} else {
return None;
}
}
#[cfg(test)]
mod test {
use super::LanguageRange;
#[test]
fn unix_tags() {
assert_eq!("cs-CZ", LanguageRange::from_unix("cs_CZ.UTF-8").unwrap().as_ref());
assert_eq!("sr-RS-ijekavsk", LanguageRange::from_unix("sr_RS@ijekavian").unwrap().as_ref());
assert_eq!("sr-Latn-ijekavsk", LanguageRange::from_unix("sr.UTF-8@ijekavianlatin").unwrap().as_ref());
assert_eq!("en-Arab", LanguageRange::from_unix("en@arabic").unwrap().as_ref());
assert_eq!("en-Arab", LanguageRange::from_unix("en.UTF-8@arabic").unwrap().as_ref());
assert_eq!("de-DE", LanguageRange::from_unix("DE_de.UTF-8@euro").unwrap().as_ref());
assert_eq!("ssy-ER", LanguageRange::from_unix("aa_ER@saaho").unwrap().as_ref());
assert!(LanguageRange::from_unix("foo_BAR").is_err());
assert!(LanguageRange::from_unix("en@arabic.UTF-8").is_err());
assert_eq!("", LanguageRange::from_unix("C").unwrap().as_ref());
assert_eq!("", LanguageRange::from_unix("C.UTF-8").unwrap().as_ref());
assert_eq!("", LanguageRange::from_unix("C.ISO-8859-1").unwrap().as_ref());
assert_eq!("", LanguageRange::from_unix("POSIX").unwrap().as_ref());
}
}

View File

@ -3,6 +3,9 @@
use gio;
use gtk;
use ::layout::c::EekGtkKeyboard;
use ::locale::compare_current_locale;
use ::locale_config::system_locale;
use ::resources;
use gio::ActionExt;
use gio::ActionMapExt;
@ -48,7 +51,7 @@ mod variants {
}
}
fn make_menu_builder(inputs: Vec<&str>) -> gtk::Builder {
fn make_menu_builder(inputs: Vec<(&str, &str)>) -> gtk::Builder {
let mut xml: Vec<u8> = Vec::new();
writeln!(
xml,
@ -57,16 +60,17 @@ fn make_menu_builder(inputs: Vec<&str>) -> gtk::Builder {
<menu id=\"app-menu\">
<section>"
).unwrap();
for input in inputs {
for (input_name, human_name) in inputs {
writeln!(
xml,
"
<item>
<attribute name=\"label\" translatable=\"yes\">{}</attribute>
<attribute name=\"action\">layout</attribute>
<attribute name=\"target\">{0}</attribute>
<attribute name=\"target\">{}</attribute>
</item>",
input,
human_name,
input_name,
).unwrap();
}
writeln!(
@ -96,7 +100,7 @@ fn set_layout(kind: String, name: String) {
pub fn show(window: EekGtkKeyboard, position: ::layout::c::Bounds) {
unsafe { gtk::set_initialized() };
let window = unsafe { gtk::Widget::from_glib_none(window.0) };
let settings = gio::Settings::new("org.gnome.desktop.input-sources");
let inputs = settings.get_value("sources").unwrap();
let current = settings.get_uint("current") as usize;
@ -106,7 +110,35 @@ pub fn show(window: EekGtkKeyboard, position: ::layout::c::Bounds) {
.map(|(_kind, name)| name.as_str())
.collect();
let builder = make_menu_builder(input_names.clone());
let translations = system_locale()
.map(|locale|
locale.tags_for("messages")
.next().unwrap() // guaranteed to exist
.as_ref()
.to_owned()
)
.and_then(|lang| resources::get_layout_names(lang.as_str()));
// sorted collection of human and machine names
let mut human_names: Vec<(&str, &str)> = match translations {
Some(translations) => {
input_names.iter()
.map(|name| (*name, *translations.get(name).unwrap_or(name)))
.collect()
},
// display bare codes
None => {
input_names.iter()
.map(|n| (*n, *n)) // turns &&str into &str
.collect()
}
};
human_names.sort_unstable_by(|(_, human_label_a), (_, human_label_b)| {
compare_current_locale(human_label_a, human_label_b)
});
let builder = make_menu_builder(human_names);
// Much more debuggable to populate the model & menu
// from a string representation
// than add items imperatively

View File

@ -2,6 +2,13 @@
* This could be done using GResource, but that would need additional work.
*/
use std::collections::HashMap;
use std::iter::FromIterator;
// TODO: keep a list of what is a language layout,
// and what a convenience layout. "_wide" is not a layout,
// neither is "number"
const KEYBOARDS: &[(*const str, *const str)] = &[
("us", include_str!("../data/keyboards/us.yaml")),
("us_wide", include_str!("../data/keyboards/us_wide.yaml")),
@ -27,3 +34,76 @@ pub fn get_keyboard(needle: &str) -> Option<&'static str> {
unsafe { &*value }
})
}
/// Translations of the layout identifier strings
const LAYOUT_NAMES: &[(*const str, *const str)] = &[
("en-US", include_str!("../data/langs/en-US.txt")),
("pl-PL", include_str!("../data/langs/pl-PL.txt")),
];
pub fn get_layout_names(lang: &str)
-> Option<HashMap<&'static str, &'static str>>
{
let translations = LAYOUT_NAMES.iter()
.find(|(name, _data)| {
let name: *const str = *name;
(unsafe { &*name }) == lang
})
.map(|(_name, data)| {
let data: *const str = *data;
unsafe { &*data }
});
translations.map(make_mapping)
}
fn parse_line(line: &str) -> Option<(&str, &str)> {
let comment = line.trim().starts_with("#");
if comment {
None
} else {
let mut iter = line.splitn(2, " ");
let name = iter.next().unwrap();
// will skip empty and unfinished lines
iter.next().map(|tr| (name, tr.trim()))
}
}
fn make_mapping(data: &str) -> HashMap<&str, &str> {
HashMap::from_iter(
data.split("\n")
.filter_map(parse_line)
)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn mapping_line() {
assert_eq!(
Some(("name", "translation")),
parse_line("name translation")
);
}
#[test]
fn mapping_bad() {
assert_eq!(None, parse_line("bad"));
}
#[test]
fn mapping_empty() {
assert_eq!(None, parse_line(""));
}
#[test]
fn mapping_comment() {
assert_eq!(None, parse_line("# comment"));
}
#[test]
fn mapping_comment_offset() {
assert_eq!(None, parse_line(" # comment"));
}
}