1 //! Locale implementation using GNU libc
2 
3 use ::std::borrow::Cow;
4 use ::std::ffi::{CStr,CString};
5 use ::std::io::{Error,Result};
6 use ::std::sync::Arc;
7 use super::{LocaleFactory,Numeric,Time};
8 
9 pub mod ffi;
10 pub mod langinfo;
11 
12 /// Wrapper for libc's locale_t.
13 #[derive(Debug)]
14 pub struct CLocale {
15     c_locale: ffi::locale_t,
16 }
17 
18 impl CLocale {
19     /// Constructs new complete locale.
20     ///
21     /// Constructs `CLocale` with all categories from locale `locale`. See
22     /// [`newlocale`](http://man7.org/linux/man-pages/man3/newlocale.3.html).
new(locale: &str) -> Result<Self>23     pub fn new(locale: &str) -> Result<Self> {
24         let cloc = try!(CString::new(locale));
25         let res = unsafe { ffi::newlocale(ffi::LC_ALL_MASK, cloc.as_ptr(), ::std::ptr::null_mut()) };
26         if res.is_null() {
27             Err(Error::last_os_error())
28         } else {
29             Ok(CLocale { c_locale: res, })
30         }
31     }
32 
33     /// Constructs new complete locale.
34     ///
35     /// Constructs `CLocale` with specified categories from locale `locale` and the rest
36     /// from `from`. `from` is destroyed in the process. See
37     /// [`newlocale`(3)](http://man7.org/linux/man-pages/man3/newlocale.3.html).
new_from(mask: ::libc::c_int, locale: &str, mut from: Self) -> Result<CLocale>38     pub fn new_from(mask: ::libc::c_int, locale: &str, mut from: Self) -> Result<CLocale> {
39         let cloc = try!(CString::new(locale));
40         let res = unsafe { ffi::newlocale(mask, cloc.as_ptr(), from.c_locale) };
41         // XXX: Is there better way to skip Drop then zeroing+check? And the associated need to
42         // have the field mut though it's otherwise not needed and not desired?
43         from.c_locale = ::std::ptr::null_mut();
44         if res.is_null() {
45             Err(Error::last_os_error())
46         } else {
47             Ok(CLocale { c_locale: res, })
48         }
49     }
50 
51     /// Returns locale ID that is in use for given category.
52     ///
53     /// As indicated by `locale_t::names[category]`.
name<'a>(&'a self, category: ::libc::c_int) -> Cow<'a, str>54     pub fn name<'a>(&'a self, category: ::libc::c_int) -> Cow<'a, str> {
55         assert!(category >= 0 && category <= 12);
56         unsafe {
57             let ptr = (*self.c_locale).__names[category as usize];
58             if ptr.is_null() {
59                 return Cow::Borrowed("C");
60             }
61             let cres: &'a CStr = CStr::from_ptr(ptr);
62             return String::from_utf8_lossy(cres.to_bytes());
63         }
64     }
65 }
66 
67 impl Drop for CLocale {
drop(&mut self)68     fn drop(&mut self) {
69         if !self.c_locale.is_null() {
70             unsafe { ffi::freelocale(self.c_locale) };
71         }
72     }
73 }
74 
75 impl Clone for CLocale {
clone(&self) -> Self76     fn clone(&self) -> Self {
77         CLocale {
78             c_locale: unsafe { ffi::duplocale(self.c_locale) },
79         }
80     }
81 }
82 
83 #[derive(Debug)]
84 pub struct IConv {
85     iconv: ffi::iconv_t,
86 }
87 
88 /// Wrapper for iconv.
89 ///
90 /// See [`iconv`(3)](http://man7.org/linux/man-pages/man3/iconv.3.html).
91 ///
92 /// On Linux this is part of standard C library and should always be able to convert any charset
93 /// that the locale component presents, so we can conveniently use it for translating that to the
94 /// Rust's internal utf-8 encoding there.
95 impl IConv {
96     /// Construct iconv converter.
97     ///
98     /// See [`iconv_open`(3)](http://man7.org/linux/man-pages/man3/iconv_open.3.html).
new(to: &str, from: &str) -> Result<Self>99     pub fn new(to: &str, from: &str) -> Result<Self> {
100         let cto = try!(::std::ffi::CString::new(to));
101         let cfrom = try!(::std::ffi::CString::new(from));
102         let res = unsafe { ffi::iconv_open(cto.as_ptr(), cfrom.as_ptr()) };
103         if res.is_null() {
104             Err(Error::last_os_error())
105         } else {
106             Ok(IConv { iconv: res, })
107         }
108     }
109 
110     /// Convert data with iconv
111     ///
112     /// See [`iconv`(3)](http://man7.org/linux/man-pages/man3/iconv.3.html). The parameters are
113     ///
114     ///  1. `src`: The input buffer.
115     ///  2. `dst`: The output buffer.
116     ///
117     /// Return values are:
118     ///
119     ///  1. Result of `iconv`. If -1, the reason can be read from `errno` (unfortunately
120     ///     `::std::io::Error::last_os_error()` does not seem to be able to distinguish them at the
121     ///     moment).
122     ///  2. Number of bytes processed from `src`.
123     ///  3. Number of bytes written to `dst`.
124     ///
125     /// The C interface returns the remaining buffers instead, but that is actually hard to work
126     /// with in Rust.
convert(&self, src: &[u8], dst: &mut [u8]) -> (isize, usize, usize)127     pub fn convert(&self, src: &[u8], dst: &mut [u8]) -> (isize, usize, usize) {
128         let mut inptr: *const ::libc::c_char = src.as_ptr() as *const ::libc::c_char;
129         let mut insize: ::libc::size_t = src.len() as ::libc::size_t;
130         let mut outptr: *mut ::libc::c_char = dst.as_ptr() as *mut ::libc::c_char;
131         let mut outsize: ::libc::size_t = dst.len() as ::libc::size_t;
132         // XXX: Do we need error handling? We don't expect errors and can't do much about them here.
133         let res = unsafe {
134             ffi::iconv(self.iconv,
135                 &mut inptr, &mut insize,
136                 &mut outptr, &mut outsize)
137         };
138         (res as isize, src.len() - (insize as usize), dst.len() - (outsize as usize))
139     }
140 }
141 
142 impl Drop for IConv {
drop(&mut self)143     fn drop(&mut self) {
144         if !self.iconv.is_null() {
145             unsafe { ffi::iconv_close(self.iconv); }
146         }
147     }
148 }
149 
150 // FIXME FIXME FIXME #[derive(Clone)]
151 #[derive(Debug)]
152 pub struct LibCLocaleFactory {
153     locale: Arc<CLocale>,
154     iconv: [Option<Arc<IConv>>; 12],
155 }
156 
157 impl LibCLocaleFactory {
codeset_index(item: langinfo::CodesetItems) -> usize158     fn codeset_index(item: langinfo::CodesetItems) -> usize {
159         match item {
160             langinfo::_NL_COLLATE_CODESET => 0,
161             langinfo::_NL_CTYPE_CODESET_NAME => 1,
162             langinfo::_NL_MONETARY_CODESET => 2,
163             langinfo::_NL_NUMERIC_CODESET => 3,
164             langinfo::_NL_TIME_CODESET => 4,
165             langinfo::_NL_MESSAGES_CODESET => 5,
166             langinfo::_NL_PAPER_CODESET => 6,
167             langinfo::_NL_NAME_CODESET => 7,
168             langinfo::_NL_ADDRESS_CODESET => 8,
169             langinfo::_NL_TELEPHONE_CODESET => 9,
170             langinfo::_NL_MEASUREMENT_CODESET => 10,
171             langinfo::_NL_IDENTIFICATION_CODESET => 11,
172         }
173     }
174 
175     // TODO TODO: Could also try overriding all components to their corresponding UTF-8 variants,
176     // though that's quite a bit more work.
new_from_c_locale(c_locale: CLocale) -> Self177     pub fn new_from_c_locale(c_locale: CLocale) -> Self {
178         fn get_iconv(codeset: langinfo::CodesetItems, locale: &CLocale) -> Option<Arc<IConv>> {
179             let cs = unsafe {
180                 ::std::str::from_utf8_unchecked(
181                     ::std::ffi::CStr::from_ptr(
182                         ffi::nl_langinfo_l(codeset as ::libc::c_uint, locale.c_locale)).to_bytes())
183             };
184             if cs != "UTF-8" {
185                 if let Ok(i) = IConv::new("UTF-8", cs) {
186                     return Some(Arc::new(i));
187                 }
188             }
189             return None;
190         }
191         return LibCLocaleFactory{
192             iconv: [
193                 get_iconv(langinfo::_NL_COLLATE_CODESET, &c_locale),
194                 get_iconv(langinfo::_NL_CTYPE_CODESET_NAME, &c_locale),
195                 get_iconv(langinfo::_NL_MONETARY_CODESET, &c_locale),
196                 get_iconv(langinfo::_NL_NUMERIC_CODESET, &c_locale),
197                 get_iconv(langinfo::_NL_TIME_CODESET, &c_locale),
198                 get_iconv(langinfo::_NL_MESSAGES_CODESET, &c_locale),
199                 get_iconv(langinfo::_NL_PAPER_CODESET, &c_locale),
200                 get_iconv(langinfo::_NL_NAME_CODESET, &c_locale),
201                 get_iconv(langinfo::_NL_ADDRESS_CODESET, &c_locale),
202                 get_iconv(langinfo::_NL_TELEPHONE_CODESET, &c_locale),
203                 get_iconv(langinfo::_NL_MEASUREMENT_CODESET, &c_locale),
204                 get_iconv(langinfo::_NL_IDENTIFICATION_CODESET, &c_locale),
205             ],
206             locale: Arc::new(c_locale),
207         };
208     }
209 
new(locale: &str) -> Result<Self>210     pub fn new(locale: &str) -> Result<Self> {
211         let loc = try!(CLocale::new(locale));
212 
213         return Ok(LibCLocaleFactory::new_from_c_locale(loc));
214     }
215 
langinfo<'a, I>(&'a self, item: I) -> I::Type where I: langinfo::LanginfoItem<'a>216     pub fn langinfo<'a, I>(&'a self, item: I) -> I::Type
217         where I: langinfo::LanginfoItem<'a>
218     {
219         let mut conv = None;
220         if let Some(cs) = I::needs_iconv() {
221             if let Some(ref iconv) = self.iconv[LibCLocaleFactory::codeset_index(cs)] {
222                 conv = Some(&**iconv);
223             }
224         }
225         unsafe {
226             item.decode(ffi::nl_langinfo_l(item.to_ffi(), self.locale.c_locale), conv)
227         }
228     }
229 }
230 
231 impl LocaleFactory for LibCLocaleFactory {
get_numeric(&mut self) -> Option<Box<Numeric>>232     fn get_numeric(&mut self) -> Option<Box<Numeric>> {
233         return Some(
234             Box::new(
235                 Numeric::new(
236                     &self.langinfo(langinfo::RADIXCHAR),
237                     &self.langinfo(langinfo::THOUSEP))));
238     }
239 
get_time(&mut self) -> Option<Box<Time>>240     fn get_time(&mut self) -> Option<Box<Time>> {
241         return Some(
242             Box::new(
243                 Time {
244                     month_names: vec![
245                         self.langinfo(langinfo::ABMON_1).into_owned(),
246                         self.langinfo(langinfo::ABMON_2).into_owned(),
247                         self.langinfo(langinfo::ABMON_3).into_owned(),
248                         self.langinfo(langinfo::ABMON_4).into_owned(),
249                         self.langinfo(langinfo::ABMON_5).into_owned(),
250                         self.langinfo(langinfo::ABMON_6).into_owned(),
251                         self.langinfo(langinfo::ABMON_7).into_owned(),
252                         self.langinfo(langinfo::ABMON_8).into_owned(),
253                         self.langinfo(langinfo::ABMON_9).into_owned(),
254                         self.langinfo(langinfo::ABMON_10).into_owned(),
255                         self.langinfo(langinfo::ABMON_11).into_owned(),
256                         self.langinfo(langinfo::ABMON_12).into_owned(),
257                     ],
258                     long_month_names: vec![
259                         self.langinfo(langinfo::MON_1).into_owned(),
260                         self.langinfo(langinfo::MON_2).into_owned(),
261                         self.langinfo(langinfo::MON_3).into_owned(),
262                         self.langinfo(langinfo::MON_4).into_owned(),
263                         self.langinfo(langinfo::MON_5).into_owned(),
264                         self.langinfo(langinfo::MON_6).into_owned(),
265                         self.langinfo(langinfo::MON_7).into_owned(),
266                         self.langinfo(langinfo::MON_8).into_owned(),
267                         self.langinfo(langinfo::MON_9).into_owned(),
268                         self.langinfo(langinfo::MON_10).into_owned(),
269                         self.langinfo(langinfo::MON_11).into_owned(),
270                         self.langinfo(langinfo::MON_12).into_owned(),
271                     ],
272                     day_names: vec![
273                         self.langinfo(langinfo::ABDAY_1).into_owned(),
274                         self.langinfo(langinfo::ABDAY_2).into_owned(),
275                         self.langinfo(langinfo::ABDAY_3).into_owned(),
276                         self.langinfo(langinfo::ABDAY_4).into_owned(),
277                         self.langinfo(langinfo::ABDAY_5).into_owned(),
278                         self.langinfo(langinfo::ABDAY_6).into_owned(),
279                         self.langinfo(langinfo::ABDAY_7).into_owned(),
280                     ],
281                     long_day_names: vec![
282                         self.langinfo(langinfo::DAY_1).into_owned(),
283                         self.langinfo(langinfo::DAY_2).into_owned(),
284                         self.langinfo(langinfo::DAY_3).into_owned(),
285                         self.langinfo(langinfo::DAY_4).into_owned(),
286                         self.langinfo(langinfo::DAY_5).into_owned(),
287                         self.langinfo(langinfo::DAY_6).into_owned(),
288                         self.langinfo(langinfo::DAY_7).into_owned(),
289                     ],
290                 }));
291     }
292 }
293 
294 #[cfg(test)]
295 mod test {
296     use ::std::ffi::CStr;
297     use super::*;
298 
has_locale(locale: &str) -> bool299     fn has_locale(locale: &str) -> bool {
300         CLocale::new(locale).is_ok()
301     }
302 
303     #[test]
c_locale()304     fn c_locale() {
305         if has_locale("C.UTF-8") {
306             let l = LibCLocaleFactory::new("C.UTF-8").unwrap();
307             assert_eq!("UTF-8", l.langinfo(langinfo::CODESET));
308         } else {
309             println!("Skipped!");
310         }
311     }
312 
313     #[test]
en_locale()314     fn en_locale() {
315         if has_locale("en_GB") {
316             let l = LibCLocaleFactory::new("en_GB").unwrap();
317             assert_eq!("ISO-8859-1", l.langinfo(langinfo::CODESET));
318         } else {
319             println!("Skipped!");
320         }
321     }
322 
323     #[test]
bad_locale()324     fn bad_locale() {
325         let l = LibCLocaleFactory::new("wrong");
326         assert!(l.is_err());
327     }
328 
329     #[test]
mixed_locale()330     fn mixed_locale() {
331         fn langinfo(loc: &CLocale, item: ::libc::c_uint) -> &str {
332             let res = unsafe { CStr::from_ptr(ffi::nl_langinfo_l(item, loc.c_locale)) };
333             ::std::str::from_utf8(res.to_bytes()).unwrap()
334         }
335 
336         if let Ok(l) = CLocale::new("cs_CZ") {
337             // only test if the host has these locales (travis boxen don't)
338             assert_eq!(",", langinfo(&l, ffi::RADIXCHAR));
339             assert_eq!("Po", langinfo(&l, ffi::ABDAY_2));
340             if let Ok(m) = CLocale::new_from(ffi::LC_NUMERIC_MASK, "en_GB", l) {
341                 assert_eq!(".", langinfo(&m, ffi::RADIXCHAR));
342                 assert_eq!("Po", langinfo(&m, ffi::ABDAY_2));
343                 if let Ok(n) = CLocale::new_from(ffi::LC_TIME_MASK, "de_DE", m.clone()) {
344                     assert_eq!(".", langinfo(&n, ffi::RADIXCHAR));
345                     assert_eq!("Mi", langinfo(&n, ffi::ABDAY_4));
346                     assert_eq!(".", langinfo(&m, ffi::RADIXCHAR));
347                     assert_eq!("Po", langinfo(&m, ffi::ABDAY_2));
348                     assert_eq!("cs_CZ", n.name(ffi::LC_CTYPE));
349                     assert_eq!("en_GB", n.name(ffi::LC_NUMERIC));
350                     assert_eq!("de_DE", n.name(ffi::LC_TIME));
351                 }
352             }
353         }
354     }
355 
356     #[test]
locale_with_convert()357     fn locale_with_convert() {
358         if let Ok(lf) = LibCLocaleFactory::new("cs_CZ") {
359             // only test if the host has cs_CZ (non-unicode) locale (travis boxen don't)
360             assert_eq!("ISO-8859-2", lf.langinfo(langinfo::CODESET));
361             assert_eq!("Út", lf.langinfo(langinfo::ABDAY_3));
362         }
363     }
364 }
365