1 //! Locale implementation using GNU libc 2 3 use ::std::borrow::Cow; 4 use ::std::ffi::{CStr,CString}; 5 use ::std::io::{Error,Result}; 6 use ::std::sync::Arc; 7 use super::{LocaleFactory,Numeric,Time}; 8 9 pub mod ffi; 10 pub mod langinfo; 11 12 /// Wrapper for libc's locale_t. 13 #[derive(Debug)] 14 pub struct CLocale { 15 c_locale: ffi::locale_t, 16 } 17 18 impl CLocale { 19 /// Constructs new complete locale. 20 /// 21 /// Constructs `CLocale` with all categories from locale `locale`. See 22 /// [`newlocale`](http://man7.org/linux/man-pages/man3/newlocale.3.html). new(locale: &str) -> Result<Self>23 pub fn new(locale: &str) -> Result<Self> { 24 let cloc = try!(CString::new(locale)); 25 let res = unsafe { ffi::newlocale(ffi::LC_ALL_MASK, cloc.as_ptr(), ::std::ptr::null_mut()) }; 26 if res.is_null() { 27 Err(Error::last_os_error()) 28 } else { 29 Ok(CLocale { c_locale: res, }) 30 } 31 } 32 33 /// Constructs new complete locale. 34 /// 35 /// Constructs `CLocale` with specified categories from locale `locale` and the rest 36 /// from `from`. `from` is destroyed in the process. See 37 /// [`newlocale`(3)](http://man7.org/linux/man-pages/man3/newlocale.3.html). new_from(mask: ::libc::c_int, locale: &str, mut from: Self) -> Result<CLocale>38 pub fn new_from(mask: ::libc::c_int, locale: &str, mut from: Self) -> Result<CLocale> { 39 let cloc = try!(CString::new(locale)); 40 let res = unsafe { ffi::newlocale(mask, cloc.as_ptr(), from.c_locale) }; 41 // XXX: Is there better way to skip Drop then zeroing+check? And the associated need to 42 // have the field mut though it's otherwise not needed and not desired? 43 from.c_locale = ::std::ptr::null_mut(); 44 if res.is_null() { 45 Err(Error::last_os_error()) 46 } else { 47 Ok(CLocale { c_locale: res, }) 48 } 49 } 50 51 /// Returns locale ID that is in use for given category. 52 /// 53 /// As indicated by `locale_t::names[category]`. name<'a>(&'a self, category: ::libc::c_int) -> Cow<'a, str>54 pub fn name<'a>(&'a self, category: ::libc::c_int) -> Cow<'a, str> { 55 assert!(category >= 0 && category <= 12); 56 unsafe { 57 let ptr = (*self.c_locale).__names[category as usize]; 58 if ptr.is_null() { 59 return Cow::Borrowed("C"); 60 } 61 let cres: &'a CStr = CStr::from_ptr(ptr); 62 return String::from_utf8_lossy(cres.to_bytes()); 63 } 64 } 65 } 66 67 impl Drop for CLocale { drop(&mut self)68 fn drop(&mut self) { 69 if !self.c_locale.is_null() { 70 unsafe { ffi::freelocale(self.c_locale) }; 71 } 72 } 73 } 74 75 impl Clone for CLocale { clone(&self) -> Self76 fn clone(&self) -> Self { 77 CLocale { 78 c_locale: unsafe { ffi::duplocale(self.c_locale) }, 79 } 80 } 81 } 82 83 #[derive(Debug)] 84 pub struct IConv { 85 iconv: ffi::iconv_t, 86 } 87 88 /// Wrapper for iconv. 89 /// 90 /// See [`iconv`(3)](http://man7.org/linux/man-pages/man3/iconv.3.html). 91 /// 92 /// On Linux this is part of standard C library and should always be able to convert any charset 93 /// that the locale component presents, so we can conveniently use it for translating that to the 94 /// Rust's internal utf-8 encoding there. 95 impl IConv { 96 /// Construct iconv converter. 97 /// 98 /// See [`iconv_open`(3)](http://man7.org/linux/man-pages/man3/iconv_open.3.html). new(to: &str, from: &str) -> Result<Self>99 pub fn new(to: &str, from: &str) -> Result<Self> { 100 let cto = try!(::std::ffi::CString::new(to)); 101 let cfrom = try!(::std::ffi::CString::new(from)); 102 let res = unsafe { ffi::iconv_open(cto.as_ptr(), cfrom.as_ptr()) }; 103 if res.is_null() { 104 Err(Error::last_os_error()) 105 } else { 106 Ok(IConv { iconv: res, }) 107 } 108 } 109 110 /// Convert data with iconv 111 /// 112 /// See [`iconv`(3)](http://man7.org/linux/man-pages/man3/iconv.3.html). The parameters are 113 /// 114 /// 1. `src`: The input buffer. 115 /// 2. `dst`: The output buffer. 116 /// 117 /// Return values are: 118 /// 119 /// 1. Result of `iconv`. If -1, the reason can be read from `errno` (unfortunately 120 /// `::std::io::Error::last_os_error()` does not seem to be able to distinguish them at the 121 /// moment). 122 /// 2. Number of bytes processed from `src`. 123 /// 3. Number of bytes written to `dst`. 124 /// 125 /// The C interface returns the remaining buffers instead, but that is actually hard to work 126 /// with in Rust. convert(&self, src: &[u8], dst: &mut [u8]) -> (isize, usize, usize)127 pub fn convert(&self, src: &[u8], dst: &mut [u8]) -> (isize, usize, usize) { 128 let mut inptr: *const ::libc::c_char = src.as_ptr() as *const ::libc::c_char; 129 let mut insize: ::libc::size_t = src.len() as ::libc::size_t; 130 let mut outptr: *mut ::libc::c_char = dst.as_ptr() as *mut ::libc::c_char; 131 let mut outsize: ::libc::size_t = dst.len() as ::libc::size_t; 132 // XXX: Do we need error handling? We don't expect errors and can't do much about them here. 133 let res = unsafe { 134 ffi::iconv(self.iconv, 135 &mut inptr, &mut insize, 136 &mut outptr, &mut outsize) 137 }; 138 (res as isize, src.len() - (insize as usize), dst.len() - (outsize as usize)) 139 } 140 } 141 142 impl Drop for IConv { drop(&mut self)143 fn drop(&mut self) { 144 if !self.iconv.is_null() { 145 unsafe { ffi::iconv_close(self.iconv); } 146 } 147 } 148 } 149 150 // FIXME FIXME FIXME #[derive(Clone)] 151 #[derive(Debug)] 152 pub struct LibCLocaleFactory { 153 locale: Arc<CLocale>, 154 iconv: [Option<Arc<IConv>>; 12], 155 } 156 157 impl LibCLocaleFactory { codeset_index(item: langinfo::CodesetItems) -> usize158 fn codeset_index(item: langinfo::CodesetItems) -> usize { 159 match item { 160 langinfo::_NL_COLLATE_CODESET => 0, 161 langinfo::_NL_CTYPE_CODESET_NAME => 1, 162 langinfo::_NL_MONETARY_CODESET => 2, 163 langinfo::_NL_NUMERIC_CODESET => 3, 164 langinfo::_NL_TIME_CODESET => 4, 165 langinfo::_NL_MESSAGES_CODESET => 5, 166 langinfo::_NL_PAPER_CODESET => 6, 167 langinfo::_NL_NAME_CODESET => 7, 168 langinfo::_NL_ADDRESS_CODESET => 8, 169 langinfo::_NL_TELEPHONE_CODESET => 9, 170 langinfo::_NL_MEASUREMENT_CODESET => 10, 171 langinfo::_NL_IDENTIFICATION_CODESET => 11, 172 } 173 } 174 175 // TODO TODO: Could also try overriding all components to their corresponding UTF-8 variants, 176 // though that's quite a bit more work. new_from_c_locale(c_locale: CLocale) -> Self177 pub fn new_from_c_locale(c_locale: CLocale) -> Self { 178 fn get_iconv(codeset: langinfo::CodesetItems, locale: &CLocale) -> Option<Arc<IConv>> { 179 let cs = unsafe { 180 ::std::str::from_utf8_unchecked( 181 ::std::ffi::CStr::from_ptr( 182 ffi::nl_langinfo_l(codeset as ::libc::c_uint, locale.c_locale)).to_bytes()) 183 }; 184 if cs != "UTF-8" { 185 if let Ok(i) = IConv::new("UTF-8", cs) { 186 return Some(Arc::new(i)); 187 } 188 } 189 return None; 190 } 191 return LibCLocaleFactory{ 192 iconv: [ 193 get_iconv(langinfo::_NL_COLLATE_CODESET, &c_locale), 194 get_iconv(langinfo::_NL_CTYPE_CODESET_NAME, &c_locale), 195 get_iconv(langinfo::_NL_MONETARY_CODESET, &c_locale), 196 get_iconv(langinfo::_NL_NUMERIC_CODESET, &c_locale), 197 get_iconv(langinfo::_NL_TIME_CODESET, &c_locale), 198 get_iconv(langinfo::_NL_MESSAGES_CODESET, &c_locale), 199 get_iconv(langinfo::_NL_PAPER_CODESET, &c_locale), 200 get_iconv(langinfo::_NL_NAME_CODESET, &c_locale), 201 get_iconv(langinfo::_NL_ADDRESS_CODESET, &c_locale), 202 get_iconv(langinfo::_NL_TELEPHONE_CODESET, &c_locale), 203 get_iconv(langinfo::_NL_MEASUREMENT_CODESET, &c_locale), 204 get_iconv(langinfo::_NL_IDENTIFICATION_CODESET, &c_locale), 205 ], 206 locale: Arc::new(c_locale), 207 }; 208 } 209 new(locale: &str) -> Result<Self>210 pub fn new(locale: &str) -> Result<Self> { 211 let loc = try!(CLocale::new(locale)); 212 213 return Ok(LibCLocaleFactory::new_from_c_locale(loc)); 214 } 215 langinfo<'a, I>(&'a self, item: I) -> I::Type where I: langinfo::LanginfoItem<'a>216 pub fn langinfo<'a, I>(&'a self, item: I) -> I::Type 217 where I: langinfo::LanginfoItem<'a> 218 { 219 let mut conv = None; 220 if let Some(cs) = I::needs_iconv() { 221 if let Some(ref iconv) = self.iconv[LibCLocaleFactory::codeset_index(cs)] { 222 conv = Some(&**iconv); 223 } 224 } 225 unsafe { 226 item.decode(ffi::nl_langinfo_l(item.to_ffi(), self.locale.c_locale), conv) 227 } 228 } 229 } 230 231 impl LocaleFactory for LibCLocaleFactory { get_numeric(&mut self) -> Option<Box<Numeric>>232 fn get_numeric(&mut self) -> Option<Box<Numeric>> { 233 return Some( 234 Box::new( 235 Numeric::new( 236 &self.langinfo(langinfo::RADIXCHAR), 237 &self.langinfo(langinfo::THOUSEP)))); 238 } 239 get_time(&mut self) -> Option<Box<Time>>240 fn get_time(&mut self) -> Option<Box<Time>> { 241 return Some( 242 Box::new( 243 Time { 244 month_names: vec![ 245 self.langinfo(langinfo::ABMON_1).into_owned(), 246 self.langinfo(langinfo::ABMON_2).into_owned(), 247 self.langinfo(langinfo::ABMON_3).into_owned(), 248 self.langinfo(langinfo::ABMON_4).into_owned(), 249 self.langinfo(langinfo::ABMON_5).into_owned(), 250 self.langinfo(langinfo::ABMON_6).into_owned(), 251 self.langinfo(langinfo::ABMON_7).into_owned(), 252 self.langinfo(langinfo::ABMON_8).into_owned(), 253 self.langinfo(langinfo::ABMON_9).into_owned(), 254 self.langinfo(langinfo::ABMON_10).into_owned(), 255 self.langinfo(langinfo::ABMON_11).into_owned(), 256 self.langinfo(langinfo::ABMON_12).into_owned(), 257 ], 258 long_month_names: vec![ 259 self.langinfo(langinfo::MON_1).into_owned(), 260 self.langinfo(langinfo::MON_2).into_owned(), 261 self.langinfo(langinfo::MON_3).into_owned(), 262 self.langinfo(langinfo::MON_4).into_owned(), 263 self.langinfo(langinfo::MON_5).into_owned(), 264 self.langinfo(langinfo::MON_6).into_owned(), 265 self.langinfo(langinfo::MON_7).into_owned(), 266 self.langinfo(langinfo::MON_8).into_owned(), 267 self.langinfo(langinfo::MON_9).into_owned(), 268 self.langinfo(langinfo::MON_10).into_owned(), 269 self.langinfo(langinfo::MON_11).into_owned(), 270 self.langinfo(langinfo::MON_12).into_owned(), 271 ], 272 day_names: vec![ 273 self.langinfo(langinfo::ABDAY_1).into_owned(), 274 self.langinfo(langinfo::ABDAY_2).into_owned(), 275 self.langinfo(langinfo::ABDAY_3).into_owned(), 276 self.langinfo(langinfo::ABDAY_4).into_owned(), 277 self.langinfo(langinfo::ABDAY_5).into_owned(), 278 self.langinfo(langinfo::ABDAY_6).into_owned(), 279 self.langinfo(langinfo::ABDAY_7).into_owned(), 280 ], 281 long_day_names: vec![ 282 self.langinfo(langinfo::DAY_1).into_owned(), 283 self.langinfo(langinfo::DAY_2).into_owned(), 284 self.langinfo(langinfo::DAY_3).into_owned(), 285 self.langinfo(langinfo::DAY_4).into_owned(), 286 self.langinfo(langinfo::DAY_5).into_owned(), 287 self.langinfo(langinfo::DAY_6).into_owned(), 288 self.langinfo(langinfo::DAY_7).into_owned(), 289 ], 290 })); 291 } 292 } 293 294 #[cfg(test)] 295 mod test { 296 use ::std::ffi::CStr; 297 use super::*; 298 has_locale(locale: &str) -> bool299 fn has_locale(locale: &str) -> bool { 300 CLocale::new(locale).is_ok() 301 } 302 303 #[test] c_locale()304 fn c_locale() { 305 if has_locale("C.UTF-8") { 306 let l = LibCLocaleFactory::new("C.UTF-8").unwrap(); 307 assert_eq!("UTF-8", l.langinfo(langinfo::CODESET)); 308 } else { 309 println!("Skipped!"); 310 } 311 } 312 313 #[test] en_locale()314 fn en_locale() { 315 if has_locale("en_GB") { 316 let l = LibCLocaleFactory::new("en_GB").unwrap(); 317 assert_eq!("ISO-8859-1", l.langinfo(langinfo::CODESET)); 318 } else { 319 println!("Skipped!"); 320 } 321 } 322 323 #[test] bad_locale()324 fn bad_locale() { 325 let l = LibCLocaleFactory::new("wrong"); 326 assert!(l.is_err()); 327 } 328 329 #[test] mixed_locale()330 fn mixed_locale() { 331 fn langinfo(loc: &CLocale, item: ::libc::c_uint) -> &str { 332 let res = unsafe { CStr::from_ptr(ffi::nl_langinfo_l(item, loc.c_locale)) }; 333 ::std::str::from_utf8(res.to_bytes()).unwrap() 334 } 335 336 if let Ok(l) = CLocale::new("cs_CZ") { 337 // only test if the host has these locales (travis boxen don't) 338 assert_eq!(",", langinfo(&l, ffi::RADIXCHAR)); 339 assert_eq!("Po", langinfo(&l, ffi::ABDAY_2)); 340 if let Ok(m) = CLocale::new_from(ffi::LC_NUMERIC_MASK, "en_GB", l) { 341 assert_eq!(".", langinfo(&m, ffi::RADIXCHAR)); 342 assert_eq!("Po", langinfo(&m, ffi::ABDAY_2)); 343 if let Ok(n) = CLocale::new_from(ffi::LC_TIME_MASK, "de_DE", m.clone()) { 344 assert_eq!(".", langinfo(&n, ffi::RADIXCHAR)); 345 assert_eq!("Mi", langinfo(&n, ffi::ABDAY_4)); 346 assert_eq!(".", langinfo(&m, ffi::RADIXCHAR)); 347 assert_eq!("Po", langinfo(&m, ffi::ABDAY_2)); 348 assert_eq!("cs_CZ", n.name(ffi::LC_CTYPE)); 349 assert_eq!("en_GB", n.name(ffi::LC_NUMERIC)); 350 assert_eq!("de_DE", n.name(ffi::LC_TIME)); 351 } 352 } 353 } 354 } 355 356 #[test] locale_with_convert()357 fn locale_with_convert() { 358 if let Ok(lf) = LibCLocaleFactory::new("cs_CZ") { 359 // only test if the host has cs_CZ (non-unicode) locale (travis boxen don't) 360 assert_eq!("ISO-8859-2", lf.langinfo(langinfo::CODESET)); 361 assert_eq!("Út", lf.langinfo(langinfo::ABDAY_3)); 362 } 363 } 364 } 365