diff --git a/Cargo.lock b/Cargo.lock index 3302d5a8..bb1d07ef 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,13 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "atk-sys" version = "0.7.0" @@ -234,6 +242,11 @@ name = "maplit" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "memchr" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "memmap" version = "0.7.0" @@ -289,6 +302,23 @@ dependencies = [ "proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "regex" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "rs" version = "0.1.0" @@ -300,6 +330,7 @@ dependencies = [ "gtk 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "gtk-sys 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "maplit 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.1.9 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)", "serde_yaml 0.8.9 (registry+https://github.com/rust-lang/crates.io-index)", "xkbcommon 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -344,11 +375,24 @@ dependencies = [ "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "thread_local" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "unicode-xid" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "utf8-ranges" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "winapi" version = "0.3.8" @@ -386,6 +430,7 @@ dependencies = [ ] [metadata] +"checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d" "checksum atk-sys 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c7017e53393e713212aed7aea336b6553be4927f58c37070a56c2fe3d107e489" "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" "checksum cairo-rs 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dd940f0d609699e343ef71c4af5f66423afbf30d666f796dabd8fd15229cf5b6" @@ -408,17 +453,22 @@ dependencies = [ "checksum libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)" = "34fcd2c08d2f832f376f4173a231990fa5aef4e99fb569867318a227ef4c06ba" "checksum linked-hash-map 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ae91b68aebc4ddb91978b11a1b02ddd8602a05ec19002801c5666000e05e0f83" "checksum maplit 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" "checksum memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" "checksum pango 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4c2cb169402a3eb1ba034a7cc7d95b8b1c106e9be5ba4be79a5a93dc1a2795f4" "checksum pango-sys 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d6eb49268e69dd0c1da5d3001a61aac08e2e9d2bfbe4ae4b19b9963c998f6453" "checksum pkg-config 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)" = "72d5370d90f49f70bd033c3d75e87fc529fbfff9d6f7cccef07d6170079d91ea" "checksum proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afdc77cc74ec70ed262262942ebb7dac3d479e9e5cfa2da1841c0806f6cdabcc" "checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" +"checksum regex 1.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "d9d8297cc20bbb6184f8b45ff61c8ee6a9ac56c156cec8e38c3e5084773c44ad" +"checksum regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716" "checksum serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)" = "9796c9b7ba2ffe7a9ce53c2287dfc48080f4b2b362fcc245a259b3a7201119dd" "checksum serde_derive 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)" = "4b133a43a1ecd55d4086bd5b4dc6c1751c68b1bfbeba7a5040442022c7e7c02e" "checksum serde_yaml 0.8.9 (registry+https://github.com/rust-lang/crates.io-index)" = "38b08a9a90e5260fe01c6480ec7c811606df6d3a660415808c3c3fa8ed95b582" "checksum syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "66850e97125af79138385e9b88339cbcd037e3f28ceab8c5ad98e64f0f1f80bf" +"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" "checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" +"checksum utf8-ranges 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba" "checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index d7a9d835..b7fbd203 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ version = "0.1.0" [dependencies] bitflags = "1.0.*" maplit = "1.0.*" +regex = "1.1.*" serde = { version = "1.0.*", features = ["derive"] } serde_yaml = "0.8.*" xkbcommon = { version = "0.4.*", features = ["wayland"] } diff --git a/data/langs/en-US.txt b/data/langs/en-US.txt new file mode 100644 index 00000000..6d1ef2b1 --- /dev/null +++ b/data/langs/en-US.txt @@ -0,0 +1,8 @@ +us English (US) +de German +el Greek +es Spanish +it Italian +jp+kana Japanese (kana) +nb Norwegian + diff --git a/data/langs/pl-PL.txt b/data/langs/pl-PL.txt new file mode 100644 index 00000000..e69de29b diff --git a/debian/control b/debian/control index 7dfac5ec..b11e6a92 100644 --- a/debian/control +++ b/debian/control @@ -18,6 +18,7 @@ Build-Depends: librust-gtk+v3-22-dev (>= 0.5), librust-gtk-sys-dev, librust-maplit-1-dev (>= 1.0), + librust-regex-1-dev (>= 1.1), librust-serde-derive-1-dev (>= 1.0), librust-serde-yaml-0.8-dev (>= 0.8), librust-xkbcommon-0.4+wayland-dev (>= 0.4), diff --git a/src/lib.rs b/src/lib.rs index 40de413d..21bfde8a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,7 @@ extern crate gtk_sys; #[allow(unused_imports)] #[macro_use] // only for tests extern crate maplit; +extern crate regex; extern crate serde; extern crate xkbcommon; @@ -17,6 +18,8 @@ pub mod float_ord; pub mod imservice; mod keyboard; mod layout; +mod locale; +mod locale_config; mod outputs; mod popover; mod resources; diff --git a/src/locale.rs b/src/locale.rs new file mode 100644 index 00000000..8333a24d --- /dev/null +++ b/src/locale.rs @@ -0,0 +1,39 @@ +/*! Locale-specific functions */ + +use std::cmp; +use std::ffi::CString; + +mod c { + use std::os::raw::c_char; + + #[allow(non_camel_case_types)] + pub type c_int = i32; + + #[no_mangle] + extern "C" { + // from libc + pub fn strcoll(cs: *const c_char, ct: *const c_char) -> c_int; + } +} + +fn cstring_safe(s: &str) -> CString { + CString::new(s) + .unwrap_or(CString::new("").unwrap()) +} + +pub fn compare_current_locale(a: &str, b: &str) -> cmp::Ordering { + let a = cstring_safe(a); + let b = cstring_safe(b); + let a = a.as_ptr(); + let b = b.as_ptr(); + let result = unsafe { c::strcoll(a, b) }; + if result == 0 { + cmp::Ordering::Equal + } else if result > 0 { + cmp::Ordering::Greater + } else if result < 0 { + cmp::Ordering::Less + } else { + unreachable!() + } +} diff --git a/src/locale_config.rs b/src/locale_config.rs new file mode 100644 index 00000000..0595f2b7 --- /dev/null +++ b/src/locale_config.rs @@ -0,0 +1,541 @@ +/*! Locale detection and management. + * Based on https://github.com/rust-locale/locale_config + * + * Ready for deletion/replacement once Debian starts packaging this, + * although this version doesn't need lazy_static. + * + * Copyright (c) 2016–2019 Jan Hudec + Copyright (c) 2016 A.J. Gardner + Copyright (c) 2019, Bastien Orivel + Copyright (c) 2019, Igor Gnatenko + Copyright (c) 2019, Sophie Tauchert <999eagle@999eagle.moe> + */ + +use regex::Regex; +use std::borrow::Cow; +use std::env; + +/// Errors that may be returned by `locale_config`. +#[derive(Copy,Clone,Debug,PartialEq,Eq)] +pub enum Error { + /// Provided definition was not well formed. + /// + /// This is returned when provided configuration string does not match even the rather loose + /// definition for language range from [RFC4647] or the composition format used by `Locale`. + /// + /// [RFC4647]: https://www.rfc-editor.org/rfc/rfc4647.txt + NotWellFormed, + /// Placeholder for adding more errors in future. **Do not match!**. + __NonExhaustive, +} + +impl ::std::fmt::Display for Error { + fn fmt(&self, out: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { + use ::std::error::Error; + out.write_str(self.description()) + } +} + +impl ::std::error::Error for Error { + fn description(&self) -> &str { + match self { + &Error::NotWellFormed => "Language tag is not well-formed.", + // this is exception: here we do want exhaustive match so we don't publish version with + // missing descriptions by mistake. + &Error::__NonExhaustive => panic!("Placeholder error must not be instantiated!"), + } + } +} + +/// Convenience Result alias. +type Result = ::std::result::Result; + +/// Iterator over `LanguageRange`s for specific category in a `Locale` +/// +/// Returns `LanguageRange`s in the `Locale` that are applicable to provided category. The tags +/// are returned in order of preference, which means the category-specific ones first and then +/// the generic ones. +/// +/// The iterator is guaranteed to return at least one value. +pub struct TagsFor<'a, 'c> { + src: &'a str, + tags: std::str::Split<'a, &'static str>, + category: Option<&'c str>, +} + +impl<'a, 'c> Iterator for TagsFor<'a, 'c> { + type Item = LanguageRange<'a>; + fn next(&mut self) -> Option { + if let Some(cat) = self.category { + while let Some(s) = self.tags.next() { + if s.starts_with(cat) && s[cat.len()..].starts_with("=") { + return Some( + LanguageRange { language: Cow::Borrowed(&s[cat.len()+1..]) }); + } + } + self.category = None; + self.tags = self.src.split(","); + } + while let Some(s) = self.tags.next() { + if s.find('=').is_none() { + return Some( + LanguageRange{ language: Cow::Borrowed(s) }); + } + } + return None; + } +} + +/// Language and culture identifier. +/// +/// This object holds a [RFC4647] extended language range. +/// +/// The internal data may be owned or shared from object with lifetime `'a`. The lifetime can be +/// extended using the `into_static()` method, which internally clones the data as needed. +/// +/// # Syntax +/// +/// The range is composed of `-`-separated alphanumeric subtags, possibly replaced by `*`s. It +/// might be empty. +/// +/// In agreement with [RFC4647], this object only requires that the tag matches: +/// +/// ```ebnf +/// language_tag = (alpha{1,8} | "*") +/// ("-" (alphanum{1,8} | "*"))* +/// ``` +/// +/// The exact interpretation is up to the downstream localization provider, but it expected that +/// it will be matched against a normalized [RFC5646] language tag, which has the structure: +/// +/// ```ebnf +/// language_tag = language +/// ("-" script)? +/// ("-" region)? +/// ("-" variant)* +/// ("-" extension)* +/// ("-" private)? +/// +/// language = alpha{2,3} ("-" alpha{3}){0,3} +/// +/// script = aplha{4} +/// +/// region = alpha{2} +/// | digit{3} +/// +/// variant = alphanum{5,8} +/// | digit alphanum{3} +/// +/// extension = [0-9a-wyz] ("-" alphanum{2,8})+ +/// +/// private = "x" ("-" alphanum{1,8})+ +/// ``` +/// +/// * `language` is an [ISO639] 2-letter or, where not defined, 3-letter code. A code for +/// macro-language might be followed by code of specific dialect. +/// * `script` is an [ISO15924] 4-letter code. +/// * `region` is either an [ISO3166] 2-letter code or, for areas other than countries, [UN M.49] +/// 3-digit numeric code. +/// * `variant` is a string indicating variant of the language. +/// * `extension` and `private` define additional options. The private part has same structure as +/// the Unicode [`-u-` extension][u_ext]. Available options are documented for the facets that +/// use them. +/// +/// The values obtained by inspecting the system are normalized according to those rules. +/// +/// The content will be case-normalized as recommended in [RFC5646] §2.1.1, namely: +/// +/// * `language` is written in lowercase, +/// * `script` is written with first capital, +/// * `country` is written in uppercase and +/// * all other subtags are written in lowercase. +/// +/// When detecting system configuration, additional options that may be generated under the +/// [`-u-` extension][u_ext] currently are: +/// +/// * `cf` — Currency format (`account` for parenthesized negative values, `standard` for minus +/// sign). +/// * `fw` — First day of week (`mon` to `sun`). +/// * `hc` — Hour cycle (`h12` for 1–12, `h23` for 0–23). +/// * `ms` — Measurement system (`metric` or `ussystem`). +/// * `nu` — Numbering system—only decimal systems are currently used. +/// * `va` — Variant when locale is specified in Unix format and the tag after `@` does not +/// correspond to any variant defined in [Language subtag registry]. +/// +/// And under the `-x-` extension, following options are defined: +/// +/// * `df` — Date format: +/// +/// * `iso`: Short date should be in ISO format of `yyyy-MM-dd`. +/// +/// For example `-df-iso`. +/// +/// * `dm` — Decimal separator for monetary: +/// +/// Followed by one or more Unicode codepoints in hexadecimal. For example `-dm-002d` means to +/// use comma. +/// +/// * `ds` — Decimal separator for numbers: +/// +/// Followed by one or more Unicode codepoints in hexadecimal. For example `-ds-002d` means to +/// use comma. +/// +/// * `gm` — Group (thousand) separator for monetary: +/// +/// Followed by one or more Unicode codepoints in hexadecimal. For example `-dm-00a0` means to +/// use non-breaking space. +/// +/// * `gs` — Group (thousand) separator for numbers: +/// +/// Followed by one or more Unicode codepoints in hexadecimal. For example `-ds-00a0` means to +/// use non-breaking space. +/// +/// * `ls` — List separator: +/// +/// Followed by one or more Unicode codepoints in hexadecimal. For example, `-ds-003b` means to +/// use a semicolon. +/// +/// [RFC5646]: https://www.rfc-editor.org/rfc/rfc5646.txt +/// [RFC4647]: https://www.rfc-editor.org/rfc/rfc4647.txt +/// [ISO639]: https://en.wikipedia.org/wiki/ISO_639 +/// [ISO15924]: https://en.wikipedia.org/wiki/ISO_15924 +/// [ISO3166]: https://en.wikipedia.org/wiki/ISO_3166 +/// [UN M.49]: https://en.wikipedia.org/wiki/UN_M.49 +/// [u_ext]: http://www.unicode.org/reports/tr35/#u_Extension +/// [Language subtag registry]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry +#[derive(Clone,Debug,Eq,Hash,PartialEq)] +pub struct LanguageRange<'a> { + language: Cow<'a, str> +} + +impl<'a> LanguageRange<'a> { + /// Return LanguageRange for the invariant locale. + /// + /// Invariant language is identified simply by empty string. + pub fn invariant() -> LanguageRange<'static> { + LanguageRange { language: Cow::Borrowed("") } + } + + /// Create language tag from Unix/Linux/GNU locale tag. + /// + /// Unix locale tags have the form + /// + /// > *language* [ `_` *region* ] [ `.` *encoding* ] [ `@` *variant* ] + /// + /// The *language* and *region* have the same format as RFC5646. *Encoding* is not relevant + /// here, since Rust always uses Utf-8. That leaves *variant*, which is unfortunately rather + /// free-form. So this function will translate known variants to corresponding RFC5646 subtags + /// and represent anything else with Unicode POSIX variant (`-u-va-`) extension. + /// + /// Note: This function is public here for benefit of applications that may come across this + /// kind of tags from other sources than system configuration. + pub fn from_unix(s: &str) -> Result> { + let unix_tag_regex = Regex::new(r"(?ix) ^ + (?P [[:alpha:]]{2,3} ) + (?: _ (?P [[:alpha:]]{2} | [[:digit:]]{3} ))? + (?: \. (?P [0-9a-zA-Z-]{1,20} ))? + (?: @ (?P [[:alnum:]]{1,20} ))? + $ ").unwrap(); + + let unix_invariant_regex = Regex::new(r"(?ix) ^ + (?: c | posix ) + (?: \. (?: [0-9a-zA-Z-]{1,20} ))? + $ ").unwrap(); + + if let Some(caps) = unix_tag_regex.captures(s) { + let src_variant = caps.name("variant").map(|m| m.as_str()).unwrap_or("").to_ascii_lowercase(); + let mut res = caps.name("language").map(|m| m.as_str()).unwrap().to_ascii_lowercase(); + let region = caps.name("region").map(|m| m.as_str()).unwrap_or(""); + let mut script = ""; + let mut variant = ""; + let mut uvariant = ""; + match src_variant.as_ref() { + // Variants seen in the wild in GNU LibC (via http://lh.2xlibre.net/) or in Debian + // GNU/Linux Stretch system. Treatment of things not found in RFC5646 subtag registry + // (http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry) + // or CLDR according to notes at https://wiki.openoffice.org/wiki/LocaleMapping. + // Dialects: + // aa_ER@saaho - NOTE: Can't be found under that name in RFC5646 subtag registry, + // but there is language Saho with code ssy, which is likely that thing. + "saaho" if res == "aa" => res = String::from("ssy"), + // Scripts: + // @arabic + "arabic" => script = "Arab", + // @cyrillic + "cyrl" => script = "Cyrl", + "cyrillic" => script = "Cyrl", + // @devanagari + "devanagari" => script = "Deva", + // @hebrew + "hebrew" => script = "Hebr", + // tt@iqtelif + // Neither RFC5646 subtag registry nor CLDR knows anything about this, but as best + // as I can tell it is Tatar name for Latin (default is Cyrillic). + "iqtelif" => script = "Latn", + // @Latn + "latn" => script = "Latn", + // @latin + "latin" => script = "Latn", + // en@shaw + "shaw" => script = "Shaw", + // Variants: + // sr@ijekavianlatin + "ijekavianlatin" => { + script = "Latn"; + variant = "ijekavsk"; + }, + // sr@ije + "ije" => variant = "ijekavsk", + // sr@ijekavian + "ijekavian" => variant = "ijekavsk", + // ca@valencia + "valencia" => variant = "valencia", + // Currencies: + // @euro - NOTE: We follow suite of Java and Openoffice and ignore it, because it + // is default for all locales where it sometimes appears now, and because we use + // explicit currency in monetary formatting anyway. + "euro" => {}, + // Collation: + // gez@abegede - NOTE: This is collation, but CLDR does not have any code for it, + // so we for the moment leave it fall through as -u-va- instead of -u-co-. + // Anything else: + // en@boldquot, en@quot, en@piglatin - just randomish stuff + // @cjknarrow - beware, it's gonna end up as -u-va-cjknarro due to lenght limit + s if s.len() <= 8 => uvariant = &*s, + s => uvariant = &s[0..8], // the subtags are limited to 8 chars, but some are longer + }; + if script != "" { + res.push('-'); + res.push_str(script); + } + if region != "" { + res.push('-'); + res.push_str(&*region.to_ascii_uppercase()); + } + if variant != "" { + res.push('-'); + res.push_str(variant); + } + if uvariant != "" { + res.push_str("-u-va-"); + res.push_str(uvariant); + } + return Ok(LanguageRange { + language: Cow::Owned(res) + }); + } else if unix_invariant_regex.is_match(s) { + return Ok(LanguageRange::invariant()) + } else { + return Err(Error::NotWellFormed); + } + } +} + +impl<'a> AsRef for LanguageRange<'a> { + fn as_ref(&self) -> &str { + self.language.as_ref() + } +} + +/// Locale configuration. +/// +/// Users may accept several languages in some order of preference and may want to use rules from +/// different culture for some particular aspect of the program behaviour, and operating systems +/// allow them to specify this (to various extent). +/// +/// The `Locale` objects represent the user configuration. They contain: +/// +/// - The primary `LanguageRange`. +/// - Optional category-specific overrides. +/// - Optional fallbacks in case data (usually translations) for the primary language are not +/// available. +/// +/// The set of categories is open-ended. The `locale` crate uses five well-known categories +/// `messages`, `numeric`, `time`, `collate` and `monetary`, but some systems define additional +/// ones (GNU Linux has additionally `paper`, `name`, `address`, `telephone` and `measurement`) and +/// these are provided in the user default `Locale` and other libraries can use them. +/// +/// `Locale` is represented by a `,`-separated sequence of tags in `LanguageRange` syntax, where +/// all except the first one may be preceded by category name and `=` sign. +/// +/// The first tag indicates the default locale, the tags prefixed by category names indicate +/// _overrides_ for those categories and the remaining tags indicate fallbacks. +/// +/// Note that a syntactically valid value of HTTP `Accept-Language` header is a valid `Locale`. Not +/// the other way around though due to the presence of category selectors. +// TODO: Interning +#[derive(Clone,Debug,Eq,Hash,PartialEq)] +pub struct Locale { + // TODO: Intern the string for performance reasons + // XXX: Store pre-split to LanguageTags? + inner: String, +} + +impl Locale { + /// Construct invariant locale. + /// + /// Invariant locale is represented simply with empty string. + pub fn invariant() -> Locale { + Locale::from(LanguageRange::invariant()) + } + + /// Append fallback language tag. + /// + /// Adds fallback to the end of the list. + pub fn add(&mut self, tag: &LanguageRange) { + for i in self.inner.split(',') { + if i == tag.as_ref() { + return; // don't add duplicates + } + } + self.inner.push_str(","); + self.inner.push_str(tag.as_ref()); + } + + /// Append category override. + /// + /// Appending new override for a category that already has one will not replace the existing + /// override. This might change in future. + pub fn add_category(&mut self, category: &str, tag: &LanguageRange) { + if self.inner.split(',').next().unwrap() == tag.as_ref() { + return; // don't add useless override equal to the primary tag + } + for i in self.inner.split(',') { + if i.starts_with(category) && + i[category.len()..].starts_with("=") && + &i[category.len() + 1..] == tag.as_ref() { + return; // don't add duplicates + } + } + self.inner.push_str(","); + self.inner.push_str(category); + self.inner.push_str("="); + self.inner.push_str(tag.as_ref()); + } + + /// Iterate over `LanguageRange`s in this `Locale` applicable to given category. + /// + /// Returns `LanguageRange`s in the `Locale` that are applicable to provided category. The tags + /// are returned in order of preference, which means the category-specific ones first and then + /// the generic ones. + /// + /// The iterator is guaranteed to return at least one value. + pub fn tags_for<'a, 'c>(&'a self, category: &'c str) -> TagsFor<'a, 'c> { + let mut tags = self.inner.split(","); + while let Some(s) = tags.clone().next() { + if s.starts_with(category) && s[category.len()..].starts_with("=") { + return TagsFor { + src: self.inner.as_ref(), + tags: tags, + category: Some(category), + }; + } + tags.next(); + } + return TagsFor { + src: self.inner.as_ref(), + tags: self.inner.split(","), + category: None, + }; + } +} + +/// Locale is specified by a string tag. This is the way to access it. +// FIXME: Do we want to provide the full string representation? We would have it as single string +// then. +impl AsRef for Locale { + fn as_ref(&self) -> &str { + self.inner.as_ref() + } +} + +impl<'a> From> for Locale { + fn from(t: LanguageRange<'a>) -> Locale { + Locale { + inner: t.language.into_owned(), + } + } +} + +fn tag(s: &str) -> Result { + LanguageRange::from_unix(s) +} + +// TODO: Read /etc/locale.alias +fn tag_inv(s: &str) -> LanguageRange { + tag(s).unwrap_or(LanguageRange::invariant()) +} + +pub fn system_locale() -> Option { + // LC_ALL overrides everything + if let Ok(all) = env::var("LC_ALL") { + if let Ok(t) = tag(all.as_ref()) { + return Some(Locale::from(t)); + } + } + // LANG is default + let mut loc = + if let Ok(lang) = env::var("LANG") { + Locale::from(tag_inv(lang.as_ref())) + } else { + Locale::invariant() + }; + // category overrides + for &(cat, var) in [ + ("ctype", "LC_CTYPE"), + ("numeric", "LC_NUMERIC"), + ("time", "LC_TIME"), + ("collate", "LC_COLLATE"), + ("monetary", "LC_MONETARY"), + ("messages", "LC_MESSAGES"), + ("paper", "LC_PAPER"), + ("name", "LC_NAME"), + ("address", "LC_ADDRESS"), + ("telephone", "LC_TELEPHONE"), + ("measurement", "LC_MEASUREMENT"), + ].iter() { + if let Ok(val) = env::var(var) { + if let Ok(tag) = tag(val.as_ref()) + { + loc.add_category(cat, &tag); + } + } + } + // LANGUAGE defines fallbacks + if let Ok(langs) = env::var("LANGUAGE") { + for i in langs.split(':') { + if i != "" { + if let Ok(tag) = tag(i) { + loc.add(&tag); + } + } + } + } + if loc.as_ref() != "" { + return Some(loc); + } else { + return None; + } +} + +#[cfg(test)] +mod test { + use super::LanguageRange; + + #[test] + fn unix_tags() { + assert_eq!("cs-CZ", LanguageRange::from_unix("cs_CZ.UTF-8").unwrap().as_ref()); + assert_eq!("sr-RS-ijekavsk", LanguageRange::from_unix("sr_RS@ijekavian").unwrap().as_ref()); + assert_eq!("sr-Latn-ijekavsk", LanguageRange::from_unix("sr.UTF-8@ijekavianlatin").unwrap().as_ref()); + assert_eq!("en-Arab", LanguageRange::from_unix("en@arabic").unwrap().as_ref()); + assert_eq!("en-Arab", LanguageRange::from_unix("en.UTF-8@arabic").unwrap().as_ref()); + assert_eq!("de-DE", LanguageRange::from_unix("DE_de.UTF-8@euro").unwrap().as_ref()); + assert_eq!("ssy-ER", LanguageRange::from_unix("aa_ER@saaho").unwrap().as_ref()); + assert!(LanguageRange::from_unix("foo_BAR").is_err()); + assert!(LanguageRange::from_unix("en@arabic.UTF-8").is_err()); + assert_eq!("", LanguageRange::from_unix("C").unwrap().as_ref()); + assert_eq!("", LanguageRange::from_unix("C.UTF-8").unwrap().as_ref()); + assert_eq!("", LanguageRange::from_unix("C.ISO-8859-1").unwrap().as_ref()); + assert_eq!("", LanguageRange::from_unix("POSIX").unwrap().as_ref()); + } +} diff --git a/src/popover.rs b/src/popover.rs index 7f51b881..c9b60fb7 100644 --- a/src/popover.rs +++ b/src/popover.rs @@ -3,6 +3,9 @@ use gio; use gtk; use ::layout::c::EekGtkKeyboard; +use ::locale::compare_current_locale; +use ::locale_config::system_locale; +use ::resources; use gio::ActionExt; use gio::ActionMapExt; @@ -48,7 +51,7 @@ mod variants { } } -fn make_menu_builder(inputs: Vec<&str>) -> gtk::Builder { +fn make_menu_builder(inputs: Vec<(&str, &str)>) -> gtk::Builder { let mut xml: Vec = Vec::new(); writeln!( xml, @@ -57,16 +60,17 @@ fn make_menu_builder(inputs: Vec<&str>) -> gtk::Builder {
" ).unwrap(); - for input in inputs { + for (input_name, human_name) in inputs { writeln!( xml, " {} layout - {0} + {} ", - input, + human_name, + input_name, ).unwrap(); } writeln!( @@ -96,7 +100,7 @@ fn set_layout(kind: String, name: String) { pub fn show(window: EekGtkKeyboard, position: ::layout::c::Bounds) { unsafe { gtk::set_initialized() }; let window = unsafe { gtk::Widget::from_glib_none(window.0) }; - + let settings = gio::Settings::new("org.gnome.desktop.input-sources"); let inputs = settings.get_value("sources").unwrap(); let current = settings.get_uint("current") as usize; @@ -106,7 +110,35 @@ pub fn show(window: EekGtkKeyboard, position: ::layout::c::Bounds) { .map(|(_kind, name)| name.as_str()) .collect(); - let builder = make_menu_builder(input_names.clone()); + let translations = system_locale() + .map(|locale| + locale.tags_for("messages") + .next().unwrap() // guaranteed to exist + .as_ref() + .to_owned() + ) + .and_then(|lang| resources::get_layout_names(lang.as_str())); + + // sorted collection of human and machine names + let mut human_names: Vec<(&str, &str)> = match translations { + Some(translations) => { + input_names.iter() + .map(|name| (*name, *translations.get(name).unwrap_or(name))) + .collect() + }, + // display bare codes + None => { + input_names.iter() + .map(|n| (*n, *n)) // turns &&str into &str + .collect() + } + }; + + human_names.sort_unstable_by(|(_, human_label_a), (_, human_label_b)| { + compare_current_locale(human_label_a, human_label_b) + }); + + let builder = make_menu_builder(human_names); // Much more debuggable to populate the model & menu // from a string representation // than add items imperatively diff --git a/src/resources.rs b/src/resources.rs index e33ff433..e9e5d0f8 100644 --- a/src/resources.rs +++ b/src/resources.rs @@ -2,6 +2,13 @@ * This could be done using GResource, but that would need additional work. */ +use std::collections::HashMap; + +use std::iter::FromIterator; + +// TODO: keep a list of what is a language layout, +// and what a convenience layout. "_wide" is not a layout, +// neither is "number" const KEYBOARDS: &[(*const str, *const str)] = &[ ("us", include_str!("../data/keyboards/us.yaml")), ("us_wide", include_str!("../data/keyboards/us_wide.yaml")), @@ -27,3 +34,76 @@ pub fn get_keyboard(needle: &str) -> Option<&'static str> { unsafe { &*value } }) } + +/// Translations of the layout identifier strings +const LAYOUT_NAMES: &[(*const str, *const str)] = &[ + ("en-US", include_str!("../data/langs/en-US.txt")), + ("pl-PL", include_str!("../data/langs/pl-PL.txt")), +]; + +pub fn get_layout_names(lang: &str) + -> Option> +{ + let translations = LAYOUT_NAMES.iter() + .find(|(name, _data)| { + let name: *const str = *name; + (unsafe { &*name }) == lang + }) + .map(|(_name, data)| { + let data: *const str = *data; + unsafe { &*data } + }); + translations.map(make_mapping) +} + +fn parse_line(line: &str) -> Option<(&str, &str)> { + let comment = line.trim().starts_with("#"); + if comment { + None + } else { + let mut iter = line.splitn(2, " "); + let name = iter.next().unwrap(); + // will skip empty and unfinished lines + iter.next().map(|tr| (name, tr.trim())) + } +} + +fn make_mapping(data: &str) -> HashMap<&str, &str> { + HashMap::from_iter( + data.split("\n") + .filter_map(parse_line) + ) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn mapping_line() { + assert_eq!( + Some(("name", "translation")), + parse_line("name translation") + ); + } + + #[test] + fn mapping_bad() { + assert_eq!(None, parse_line("bad")); + } + + #[test] + fn mapping_empty() { + assert_eq!(None, parse_line("")); + } + + #[test] + fn mapping_comment() { + assert_eq!(None, parse_line("# comment")); + } + + #[test] + fn mapping_comment_offset() { + assert_eq!(None, parse_line(" # comment")); + } +}