locale.py 76 KB


  1. """Locale support module.
  2. The module provides low-level access to the C lib's locale APIs and adds high
  3. level number formatting APIs as well as a locale aliasing engine to complement
  4. these.
  5. The aliasing engine includes support for many commonly used locale names and
  6. maps them to values suitable for passing to the C lib's setlocale() function. It
  7. also includes default encodings for all supported locale names.
  8. """
  9. import sys
  10. import encodings
  11. import encodings.aliases
  12. import re
  13. import _collections_abc
  14. from builtins import str as _builtin_str
  15. import functools
  16. # Try importing the _locale module.
  17. #
  18. # If this fails, fall back on a basic 'C' locale emulation.
  19. # Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
  20. # trying the import. So __all__ is also fiddled at the end of the file.
  21. __all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
  22. "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
  23. "str", "atof", "atoi", "format", "format_string", "currency",
  24. "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
  25. "LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
  26. def _strcoll(a,b):
  27. """ strcoll(string,string) -> int.
  28. Compares two strings according to the locale.
  29. """
  30. return (a > b) - (a < b)
  31. def _strxfrm(s):
  32. """ strxfrm(string) -> string.
  33. Returns a string that behaves for cmp locale-aware.
  34. """
  35. return s
  36. try:
  37. from _locale import *
  38. except ImportError:
  39. # Locale emulation
  40. CHAR_MAX = 127
  41. LC_ALL = 6
  42. LC_COLLATE = 3
  43. LC_CTYPE = 0
  44. LC_MESSAGES = 5
  45. LC_MONETARY = 4
  46. LC_NUMERIC = 1
  47. LC_TIME = 2
  48. Error = ValueError
  49. def localeconv():
  50. """ localeconv() -> dict.
  51. Returns numeric and monetary locale-specific parameters.
  52. """
  53. # 'C' locale default values
  54. return {'grouping': [127],
  55. 'currency_symbol': '',
  56. 'n_sign_posn': 127,
  57. 'p_cs_precedes': 127,
  58. 'n_cs_precedes': 127,
  59. 'mon_grouping': [],
  60. 'n_sep_by_space': 127,
  61. 'decimal_point': '.',
  62. 'negative_sign': '',
  63. 'positive_sign': '',
  64. 'p_sep_by_space': 127,
  65. 'int_curr_symbol': '',
  66. 'p_sign_posn': 127,
  67. 'thousands_sep': '',
  68. 'mon_thousands_sep': '',
  69. 'frac_digits': 127,
  70. 'mon_decimal_point': '',
  71. 'int_frac_digits': 127}
  72. def setlocale(category, value=None):
  73. """ setlocale(integer,string=None) -> string.
  74. Activates/queries locale processing.
  75. """
  76. if value not in (None, '', 'C'):
  77. raise Error('_locale emulation only supports "C" locale')
  78. return 'C'
  79. # These may or may not exist in _locale, so be sure to set them.
  80. if 'strxfrm' not in globals():
  81. strxfrm = _strxfrm
  82. if 'strcoll' not in globals():
  83. strcoll = _strcoll
  84. _localeconv = localeconv
  85. # With this dict, you can override some items of localeconv's return value.
  86. # This is useful for testing purposes.
  87. _override_localeconv = {}
  88. @functools.wraps(_localeconv)
  89. def localeconv():
  90. d = _localeconv()
  91. if _override_localeconv:
  92. d.update(_override_localeconv)
  93. return d
  94. ### Number formatting APIs
  95. # Author: Martin von Loewis
  96. # improved by Georg Brandl
  97. # Iterate over grouping intervals
  98. def _grouping_intervals(grouping):
  99. last_interval = None
  100. for interval in grouping:
  101. # if grouping is -1, we are done
  102. if interval == CHAR_MAX:
  103. return
  104. # 0: re-use last group ad infinitum
  105. if interval == 0:
  106. if last_interval is None:
  107. raise ValueError("invalid grouping")
  108. while True:
  109. yield last_interval
  110. yield interval
  111. last_interval = interval
  112. #perform the grouping from right to left
  113. def _group(s, monetary=False):
  114. conv = localeconv()
  115. thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
  116. grouping = conv[monetary and 'mon_grouping' or 'grouping']
  117. if not grouping:
  118. return (s, 0)
  119. if s[-1] == ' ':
  120. stripped = s.rstrip()
  121. right_spaces = s[len(stripped):]
  122. s = stripped
  123. else:
  124. right_spaces = ''
  125. left_spaces = ''
  126. groups = []
  127. for interval in _grouping_intervals(grouping):
  128. if not s or s[-1] not in "0123456789":
  129. # only non-digit characters remain (sign, spaces)
  130. left_spaces = s
  131. s = ''
  132. break
  133. groups.append(s[-interval:])
  134. s = s[:-interval]
  135. if s:
  136. groups.append(s)
  137. groups.reverse()
  138. return (
  139. left_spaces + thousands_sep.join(groups) + right_spaces,
  140. len(thousands_sep) * (len(groups) - 1)
  141. )
  142. # Strip a given amount of excess padding from the given string
  143. def _strip_padding(s, amount):
  144. lpos = 0
  145. while amount and s[lpos] == ' ':
  146. lpos += 1
  147. amount -= 1
  148. rpos = len(s) - 1
  149. while amount and s[rpos] == ' ':
  150. rpos -= 1
  151. amount -= 1
  152. return s[lpos:rpos+1]
  153. _percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
  154. r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
  155. def _format(percent, value, grouping=False, monetary=False, *additional):
  156. if additional:
  157. formatted = percent % ((value,) + additional)
  158. else:
  159. formatted = percent % value
  160. # floats and decimal ints need special action!
  161. if percent[-1] in 'eEfFgG':
  162. seps = 0
  163. parts = formatted.split('.')
  164. if grouping:
  165. parts[0], seps = _group(parts[0], monetary=monetary)
  166. decimal_point = localeconv()[monetary and 'mon_decimal_point'
  167. or 'decimal_point']
  168. formatted = decimal_point.join(parts)
  169. if seps:
  170. formatted = _strip_padding(formatted, seps)
  171. elif percent[-1] in 'diu':
  172. seps = 0
  173. if grouping:
  174. formatted, seps = _group(formatted, monetary=monetary)
  175. if seps:
  176. formatted = _strip_padding(formatted, seps)
  177. return formatted
  178. def format_string(f, val, grouping=False, monetary=False):
  179. """Formats a string in the same way that the % formatting would use,
  180. but takes the current locale into account.
  181. Grouping is applied if the third parameter is true.
  182. Conversion uses monetary thousands separator and grouping strings if
  183. forth parameter monetary is true."""
  184. percents = list(_percent_re.finditer(f))
  185. new_f = _percent_re.sub('%s', f)
  186. if isinstance(val, _collections_abc.Mapping):
  187. new_val = []
  188. for perc in percents:
  189. if perc.group()[-1]=='%':
  190. new_val.append('%')
  191. else:
  192. new_val.append(_format(perc.group(), val, grouping, monetary))
  193. else:
  194. if not isinstance(val, tuple):
  195. val = (val,)
  196. new_val = []
  197. i = 0
  198. for perc in percents:
  199. if perc.group()[-1]=='%':
  200. new_val.append('%')
  201. else:
  202. starcount = perc.group('modifiers').count('*')
  203. new_val.append(_format(perc.group(),
  204. val[i],
  205. grouping,
  206. monetary,
  207. *val[i+1:i+1+starcount]))
  208. i += (1 + starcount)
  209. val = tuple(new_val)
  210. return new_f % val
  211. def format(percent, value, grouping=False, monetary=False, *additional):
  212. """Deprecated, use format_string instead."""
  213. import warnings
  214. warnings.warn(
  215. "This method will be removed in a future version of Python. "
  216. "Use 'locale.format_string()' instead.",
  217. DeprecationWarning, stacklevel=2
  218. )
  219. match = _percent_re.match(percent)
  220. if not match or len(match.group())!= len(percent):
  221. raise ValueError(("format() must be given exactly one %%char "
  222. "format specifier, %s not valid") % repr(percent))
  223. return _format(percent, value, grouping, monetary, *additional)
  224. def currency(val, symbol=True, grouping=False, international=False):
  225. """Formats val according to the currency settings
  226. in the current locale."""
  227. conv = localeconv()
  228. # check for illegal values
  229. digits = conv[international and 'int_frac_digits' or 'frac_digits']
  230. if digits == 127:
  231. raise ValueError("Currency formatting is not possible using "
  232. "the 'C' locale.")
  233. s = _format('%%.%if' % digits, abs(val), grouping, monetary=True)
  234. # '<' and '>' are markers if the sign must be inserted between symbol and value
  235. s = '<' + s + '>'
  236. if symbol:
  237. smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
  238. precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
  239. separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
  240. if precedes:
  241. s = smb + (separated and ' ' or '') + s
  242. else:
  243. s = s + (separated and ' ' or '') + smb
  244. sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
  245. sign = conv[val<0 and 'negative_sign' or 'positive_sign']
  246. if sign_pos == 0:
  247. s = '(' + s + ')'
  248. elif sign_pos == 1:
  249. s = sign + s
  250. elif sign_pos == 2:
  251. s = s + sign
  252. elif sign_pos == 3:
  253. s = s.replace('<', sign)
  254. elif sign_pos == 4:
  255. s = s.replace('>', sign)
  256. else:
  257. # the default if nothing specified;
  258. # this should be the most fitting sign position
  259. s = sign + s
  260. return s.replace('<', '').replace('>', '')
  261. def str(val):
  262. """Convert float to string, taking the locale into account."""
  263. return _format("%.12g", val)
  264. def delocalize(string):
  265. "Parses a string as a normalized number according to the locale settings."
  266. conv = localeconv()
  267. #First, get rid of the grouping
  268. ts = conv['thousands_sep']
  269. if ts:
  270. string = string.replace(ts, '')
  271. #next, replace the decimal point with a dot
  272. dd = conv['decimal_point']
  273. if dd:
  274. string = string.replace(dd, '.')
  275. return string
  276. def atof(string, func=float):
  277. "Parses a string as a float according to the locale settings."
  278. return func(delocalize(string))
  279. def atoi(string):
  280. "Converts a string to an integer according to the locale settings."
  281. return int(delocalize(string))
  282. def _test():
  283. setlocale(LC_ALL, "")
  284. #do grouping
  285. s1 = format_string("%d", 123456789,1)
  286. print(s1, "is", atoi(s1))
  287. #standard formatting
  288. s1 = str(3.14)
  289. print(s1, "is", atof(s1))
  290. ### Locale name aliasing engine
  291. # Author: Marc-Andre Lemburg, mal@lemburg.com
  292. # Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
  293. # store away the low-level version of setlocale (it's
  294. # overridden below)
  295. _setlocale = setlocale
  296. def _replace_encoding(code, encoding):
  297. if '.' in code:
  298. langname = code[:code.index('.')]
  299. else:
  300. langname = code
  301. # Convert the encoding to a C lib compatible encoding string
  302. norm_encoding = encodings.normalize_encoding(encoding)
  303. #print('norm encoding: %r' % norm_encoding)
  304. norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
  305. norm_encoding)
  306. #print('aliased encoding: %r' % norm_encoding)
  307. encoding = norm_encoding
  308. norm_encoding = norm_encoding.lower()
  309. if norm_encoding in locale_encoding_alias:
  310. encoding = locale_encoding_alias[norm_encoding]
  311. else:
  312. norm_encoding = norm_encoding.replace('_', '')
  313. norm_encoding = norm_encoding.replace('-', '')
  314. if norm_encoding in locale_encoding_alias:
  315. encoding = locale_encoding_alias[norm_encoding]
  316. #print('found encoding %r' % encoding)
  317. return langname + '.' + encoding
  318. def _append_modifier(code, modifier):
  319. if modifier == 'euro':
  320. if '.' not in code:
  321. return code + '.ISO8859-15'
  322. _, _, encoding = code.partition('.')
  323. if encoding in ('ISO8859-15', 'UTF-8'):
  324. return code
  325. if encoding == 'ISO8859-1':
  326. return _replace_encoding(code, 'ISO8859-15')
  327. return code + '@' + modifier
  328. def normalize(localename):
  329. """ Returns a normalized locale code for the given locale
  330. name.
  331. The returned locale code is formatted for use with
  332. setlocale().
  333. If normalization fails, the original name is returned
  334. unchanged.
  335. If the given encoding is not known, the function defaults to
  336. the default encoding for the locale code just like setlocale()
  337. does.
  338. """
  339. # Normalize the locale name and extract the encoding and modifier
  340. code = localename.lower()
  341. if ':' in code:
  342. # ':' is sometimes used as encoding delimiter.
  343. code = code.replace(':', '.')
  344. if '@' in code:
  345. code, modifier = code.split('@', 1)
  346. else:
  347. modifier = ''
  348. if '.' in code:
  349. langname, encoding = code.split('.')[:2]
  350. else:
  351. langname = code
  352. encoding = ''
  353. # First lookup: fullname (possibly with encoding and modifier)
  354. lang_enc = langname
  355. if encoding:
  356. norm_encoding = encoding.replace('-', '')
  357. norm_encoding = norm_encoding.replace('_', '')
  358. lang_enc += '.' + norm_encoding
  359. lookup_name = lang_enc
  360. if modifier:
  361. lookup_name += '@' + modifier
  362. code = locale_alias.get(lookup_name, None)
  363. if code is not None:
  364. return code
  365. #print('first lookup failed')
  366. if modifier:
  367. # Second try: fullname without modifier (possibly with encoding)
  368. code = locale_alias.get(lang_enc, None)
  369. if code is not None:
  370. #print('lookup without modifier succeeded')
  371. if '@' not in code:
  372. return _append_modifier(code, modifier)
  373. if code.split('@', 1)[1].lower() == modifier:
  374. return code
  375. #print('second lookup failed')
  376. if encoding:
  377. # Third try: langname (without encoding, possibly with modifier)
  378. lookup_name = langname
  379. if modifier:
  380. lookup_name += '@' + modifier
  381. code = locale_alias.get(lookup_name, None)
  382. if code is not None:
  383. #print('lookup without encoding succeeded')
  384. if '@' not in code:
  385. return _replace_encoding(code, encoding)
  386. code, modifier = code.split('@', 1)
  387. return _replace_encoding(code, encoding) + '@' + modifier
  388. if modifier:
  389. # Fourth try: langname (without encoding and modifier)
  390. code = locale_alias.get(langname, None)
  391. if code is not None:
  392. #print('lookup without modifier and encoding succeeded')
  393. if '@' not in code:
  394. code = _replace_encoding(code, encoding)
  395. return _append_modifier(code, modifier)
  396. code, defmod = code.split('@', 1)
  397. if defmod.lower() == modifier:
  398. return _replace_encoding(code, encoding) + '@' + defmod
  399. return localename
  400. def _parse_localename(localename):
  401. """ Parses the locale code for localename and returns the
  402. result as tuple (language code, encoding).
  403. The localename is normalized and passed through the locale
  404. alias engine. A ValueError is raised in case the locale name
  405. cannot be parsed.
  406. The language code corresponds to RFC 1766. code and encoding
  407. can be None in case the values cannot be determined or are
  408. unknown to this implementation.
  409. """
  410. code = normalize(localename)
  411. if '@' in code:
  412. # Deal with locale modifiers
  413. code, modifier = code.split('@', 1)
  414. if modifier == 'euro' and '.' not in code:
  415. # Assume Latin-9 for @euro locales. This is bogus,
  416. # since some systems may use other encodings for these
  417. # locales. Also, we ignore other modifiers.
  418. return code, 'iso-8859-15'
  419. if '.' in code:
  420. return tuple(code.split('.')[:2])
  421. elif code == 'C':
  422. return None, None
  423. elif code == 'UTF-8':
  424. # On macOS "LC_CTYPE=UTF-8" is a valid locale setting
  425. # for getting UTF-8 handling for text.
  426. return None, 'UTF-8'
  427. raise ValueError('unknown locale: %s' % localename)
  428. def _build_localename(localetuple):
  429. """ Builds a locale code from the given tuple (language code,
  430. encoding).
  431. No aliasing or normalizing takes place.
  432. """
  433. try:
  434. language, encoding = localetuple
  435. if language is None:
  436. language = 'C'
  437. if encoding is None:
  438. return language
  439. else:
  440. return language + '.' + encoding
  441. except (TypeError, ValueError):
  442. raise TypeError('Locale must be None, a string, or an iterable of '
  443. 'two strings -- language code, encoding.') from None
  444. def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
  445. """ Tries to determine the default locale settings and returns
  446. them as tuple (language code, encoding).
  447. According to POSIX, a program which has not called
  448. setlocale(LC_ALL, "") runs using the portable 'C' locale.
  449. Calling setlocale(LC_ALL, "") lets it use the default locale as
  450. defined by the LANG variable. Since we don't want to interfere
  451. with the current locale setting we thus emulate the behavior
  452. in the way described above.
  453. To maintain compatibility with other platforms, not only the
  454. LANG variable is tested, but a list of variables given as
  455. envvars parameter. The first found to be defined will be
  456. used. envvars defaults to the search path used in GNU gettext;
  457. it must always contain the variable name 'LANG'.
  458. Except for the code 'C', the language code corresponds to RFC
  459. 1766. code and encoding can be None in case the values cannot
  460. be determined.
  461. """
  462. try:
  463. # check if it's supported by the _locale module
  464. import _locale
  465. code, encoding = _locale._getdefaultlocale()
  466. except (ImportError, AttributeError):
  467. pass
  468. else:
  469. # make sure the code/encoding values are valid
  470. if sys.platform == "win32" and code and code[:2] == "0x":
  471. # map windows language identifier to language name
  472. code = windows_locale.get(int(code, 0))
  473. # ...add other platform-specific processing here, if
  474. # necessary...
  475. return code, encoding
  476. # fall back on POSIX behaviour
  477. import os
  478. lookup = os.environ.get
  479. for variable in envvars:
  480. localename = lookup(variable,None)
  481. if localename:
  482. if variable == 'LANGUAGE':
  483. localename = localename.split(':')[0]
  484. break
  485. else:
  486. localename = 'C'
  487. return _parse_localename(localename)
  488. def getlocale(category=LC_CTYPE):
  489. """ Returns the current setting for the given locale category as
  490. tuple (language code, encoding).
  491. category may be one of the LC_* value except LC_ALL. It
  492. defaults to LC_CTYPE.
  493. Except for the code 'C', the language code corresponds to RFC
  494. 1766. code and encoding can be None in case the values cannot
  495. be determined.
  496. """
  497. localename = _setlocale(category)
  498. if category == LC_ALL and ';' in localename:
  499. raise TypeError('category LC_ALL is not supported')
  500. return _parse_localename(localename)
  501. def setlocale(category, locale=None):
  502. """ Set the locale for the given category. The locale can be
  503. a string, an iterable of two strings (language code and encoding),
  504. or None.
  505. Iterables are converted to strings using the locale aliasing
  506. engine. Locale strings are passed directly to the C lib.
  507. category may be given as one of the LC_* values.
  508. """
  509. if locale and not isinstance(locale, _builtin_str):
  510. # convert to string
  511. locale = normalize(_build_localename(locale))
  512. return _setlocale(category, locale)
  513. def resetlocale(category=LC_ALL):
  514. """ Sets the locale for category to the default setting.
  515. The default setting is determined by calling
  516. getdefaultlocale(). category defaults to LC_ALL.
  517. """
  518. _setlocale(category, _build_localename(getdefaultlocale()))
  519. if sys.platform.startswith("win"):
  520. # On Win32, this will return the ANSI code page
  521. def getpreferredencoding(do_setlocale = True):
  522. """Return the charset that the user is likely using."""
  523. if sys.flags.utf8_mode:
  524. return 'UTF-8'
  525. import _bootlocale
  526. return _bootlocale.getpreferredencoding(False)
  527. else:
  528. # On Unix, if CODESET is available, use that.
  529. try:
  530. CODESET
  531. except NameError:
  532. if hasattr(sys, 'getandroidapilevel'):
  533. # On Android langinfo.h and CODESET are missing, and UTF-8 is
  534. # always used in mbstowcs() and wcstombs().
  535. def getpreferredencoding(do_setlocale = True):
  536. return 'UTF-8'
  537. else:
  538. # Fall back to parsing environment variables :-(
  539. def getpreferredencoding(do_setlocale = True):
  540. """Return the charset that the user is likely using,
  541. by looking at environment variables."""
  542. if sys.flags.utf8_mode:
  543. return 'UTF-8'
  544. res = getdefaultlocale()[1]
  545. if res is None:
  546. # LANG not set, default conservatively to ASCII
  547. res = 'ascii'
  548. return res
  549. else:
  550. def getpreferredencoding(do_setlocale = True):
  551. """Return the charset that the user is likely using,
  552. according to the system configuration."""
  553. if sys.flags.utf8_mode:
  554. return 'UTF-8'
  555. import _bootlocale
  556. if do_setlocale:
  557. oldloc = setlocale(LC_CTYPE)
  558. try:
  559. setlocale(LC_CTYPE, "")
  560. except Error:
  561. pass
  562. result = _bootlocale.getpreferredencoding(False)
  563. if do_setlocale:
  564. setlocale(LC_CTYPE, oldloc)
  565. return result
  566. ### Database
  567. #
  568. # The following data was extracted from the locale.alias file which
  569. # comes with X11 and then hand edited removing the explicit encoding
  570. # definitions and adding some more aliases. The file is usually
  571. # available as /usr/lib/X11/locale/locale.alias.
  572. #
  573. #
  574. # The local_encoding_alias table maps lowercase encoding alias names
  575. # to C locale encoding names (case-sensitive). Note that normalize()
  576. # first looks up the encoding in the encodings.aliases dictionary and
  577. # then applies this mapping to find the correct C lib name for the
  578. # encoding.
  579. #
  580. locale_encoding_alias = {
  581. # Mappings for non-standard encoding names used in locale names
  582. '437': 'C',
  583. 'c': 'C',
  584. 'en': 'ISO8859-1',
  585. 'jis': 'JIS7',
  586. 'jis7': 'JIS7',
  587. 'ajec': 'eucJP',
  588. 'koi8c': 'KOI8-C',
  589. 'microsoftcp1251': 'CP1251',
  590. 'microsoftcp1255': 'CP1255',
  591. 'microsoftcp1256': 'CP1256',
  592. '88591': 'ISO8859-1',
  593. '88592': 'ISO8859-2',
  594. '88595': 'ISO8859-5',
  595. '885915': 'ISO8859-15',
  596. # Mappings from Python codec names to C lib encoding names
  597. 'ascii': 'ISO8859-1',
  598. 'latin_1': 'ISO8859-1',
  599. 'iso8859_1': 'ISO8859-1',
  600. 'iso8859_10': 'ISO8859-10',
  601. 'iso8859_11': 'ISO8859-11',
  602. 'iso8859_13': 'ISO8859-13',
  603. 'iso8859_14': 'ISO8859-14',
  604. 'iso8859_15': 'ISO8859-15',
  605. 'iso8859_16': 'ISO8859-16',
  606. 'iso8859_2': 'ISO8859-2',
  607. 'iso8859_3': 'ISO8859-3',
  608. 'iso8859_4': 'ISO8859-4',
  609. 'iso8859_5': 'ISO8859-5',
  610. 'iso8859_6': 'ISO8859-6',
  611. 'iso8859_7': 'ISO8859-7',
  612. 'iso8859_8': 'ISO8859-8',
  613. 'iso8859_9': 'ISO8859-9',
  614. 'iso2022_jp': 'JIS7',
  615. 'shift_jis': 'SJIS',
  616. 'tactis': 'TACTIS',
  617. 'euc_jp': 'eucJP',
  618. 'euc_kr': 'eucKR',
  619. 'utf_8': 'UTF-8',
  620. 'koi8_r': 'KOI8-R',
  621. 'koi8_t': 'KOI8-T',
  622. 'koi8_u': 'KOI8-U',
  623. 'kz1048': 'RK1048',
  624. 'cp1251': 'CP1251',
  625. 'cp1255': 'CP1255',
  626. 'cp1256': 'CP1256',
  627. # XXX This list is still incomplete. If you know more
  628. # mappings, please file a bug report. Thanks.
  629. }
  630. for k, v in sorted(locale_encoding_alias.items()):
  631. k = k.replace('_', '')
  632. locale_encoding_alias.setdefault(k, v)
  633. #
  634. # The locale_alias table maps lowercase alias names to C locale names
  635. # (case-sensitive). Encodings are always separated from the locale
  636. # name using a dot ('.'); they should only be given in case the
  637. # language name is needed to interpret the given encoding alias
  638. # correctly (CJK codes often have this need).
  639. #
  640. # Note that the normalize() function which uses this tables
  641. # removes '_' and '-' characters from the encoding part of the
  642. # locale name before doing the lookup. This saves a lot of
  643. # space in the table.
  644. #
  645. # MAL 2004-12-10:
  646. # Updated alias mapping to most recent locale.alias file
  647. # from X.org distribution using makelocalealias.py.
  648. #
  649. # These are the differences compared to the old mapping (Python 2.4
  650. # and older):
  651. #
  652. # updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  653. # updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  654. # updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  655. # updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
  656. # updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
  657. # updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
  658. # updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
  659. # updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
  660. # updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
  661. # updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
  662. # updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
  663. # updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  664. # updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  665. # updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
  666. # updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
  667. # updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
  668. # updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
  669. # updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
  670. # updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
  671. # updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
  672. # updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
  673. # updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
  674. #
  675. # MAL 2008-05-30:
  676. # Updated alias mapping to most recent locale.alias file
  677. # from X.org distribution using makelocalealias.py.
  678. #
  679. # These are the differences compared to the old mapping (Python 2.5
  680. # and older):
  681. #
  682. # updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
  683. # updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  684. # updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  685. # updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
  686. # updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  687. # updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  688. # updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  689. # updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  690. # updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  691. # updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  692. # updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
  693. # updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  694. # updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
  695. # updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  696. # updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  697. # updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  698. # updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
  699. # updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
  700. # updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  701. #
  702. # AP 2010-04-12:
  703. # Updated alias mapping to most recent locale.alias file
  704. # from X.org distribution using makelocalealias.py.
  705. #
  706. # These are the differences compared to the old mapping (Python 2.6.5
  707. # and older):
  708. #
  709. # updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
  710. # updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
  711. # updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  712. # updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  713. # updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  714. # updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  715. # updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  716. # updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  717. # updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
  718. # updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  719. # updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
  720. # updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
  721. # updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  722. #
  723. # SS 2013-12-20:
  724. # Updated alias mapping to most recent locale.alias file
  725. # from X.org distribution using makelocalealias.py.
  726. #
  727. # These are the differences compared to the old mapping (Python 3.3.3
  728. # and older):
  729. #
  730. # updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  731. # updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  732. # updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  733. # updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
  734. # updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  735. # updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  736. # updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8'
  737. # updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  738. # updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
  739. # updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  740. # updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  741. #
  742. # SS 2014-10-01:
  743. # Updated alias mapping with glibc 2.19 supported locales.
  744. #
  745. # SS 2018-05-05:
  746. # Updated alias mapping with glibc 2.27 supported locales.
  747. #
  748. # These are the differences compared to the old mapping (Python 3.6.5
  749. # and older):
  750. #
  751. # updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia'
  752. # updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154'
  753. # updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R'
  754. locale_alias = {
  755. 'a3': 'az_AZ.KOI8-C',
  756. 'a3_az': 'az_AZ.KOI8-C',
  757. 'a3_az.koic': 'az_AZ.KOI8-C',
  758. 'aa_dj': 'aa_DJ.ISO8859-1',
  759. 'aa_er': 'aa_ER.UTF-8',
  760. 'aa_et': 'aa_ET.UTF-8',
  761. 'af': 'af_ZA.ISO8859-1',
  762. 'af_za': 'af_ZA.ISO8859-1',
  763. 'agr_pe': 'agr_PE.UTF-8',
  764. 'ak_gh': 'ak_GH.UTF-8',
  765. 'am': 'am_ET.UTF-8',
  766. 'am_et': 'am_ET.UTF-8',
  767. 'american': 'en_US.ISO8859-1',
  768. 'an_es': 'an_ES.ISO8859-15',
  769. 'anp_in': 'anp_IN.UTF-8',
  770. 'ar': 'ar_AA.ISO8859-6',
  771. 'ar_aa': 'ar_AA.ISO8859-6',
  772. 'ar_ae': 'ar_AE.ISO8859-6',
  773. 'ar_bh': 'ar_BH.ISO8859-6',
  774. 'ar_dz': 'ar_DZ.ISO8859-6',
  775. 'ar_eg': 'ar_EG.ISO8859-6',
  776. 'ar_in': 'ar_IN.UTF-8',
  777. 'ar_iq': 'ar_IQ.ISO8859-6',
  778. 'ar_jo': 'ar_JO.ISO8859-6',
  779. 'ar_kw': 'ar_KW.ISO8859-6',
  780. 'ar_lb': 'ar_LB.ISO8859-6',
  781. 'ar_ly': 'ar_LY.ISO8859-6',
  782. 'ar_ma': 'ar_MA.ISO8859-6',
  783. 'ar_om': 'ar_OM.ISO8859-6',
  784. 'ar_qa': 'ar_QA.ISO8859-6',
  785. 'ar_sa': 'ar_SA.ISO8859-6',
  786. 'ar_sd': 'ar_SD.ISO8859-6',
  787. 'ar_ss': 'ar_SS.UTF-8',
  788. 'ar_sy': 'ar_SY.ISO8859-6',
  789. 'ar_tn': 'ar_TN.ISO8859-6',
  790. 'ar_ye': 'ar_YE.ISO8859-6',
  791. 'arabic': 'ar_AA.ISO8859-6',
  792. 'as': 'as_IN.UTF-8',
  793. 'as_in': 'as_IN.UTF-8',
  794. 'ast_es': 'ast_ES.ISO8859-15',
  795. 'ayc_pe': 'ayc_PE.UTF-8',
  796. 'az': 'az_AZ.ISO8859-9E',
  797. 'az_az': 'az_AZ.ISO8859-9E',
  798. 'az_az.iso88599e': 'az_AZ.ISO8859-9E',
  799. 'az_ir': 'az_IR.UTF-8',
  800. 'be': 'be_BY.CP1251',
  801. 'be@latin': 'be_BY.UTF-8@latin',
  802. 'be_bg.utf8': 'bg_BG.UTF-8',
  803. 'be_by': 'be_BY.CP1251',
  804. 'be_by@latin': 'be_BY.UTF-8@latin',
  805. 'bem_zm': 'bem_ZM.UTF-8',
  806. 'ber_dz': 'ber_DZ.UTF-8',
  807. 'ber_ma': 'ber_MA.UTF-8',
  808. 'bg': 'bg_BG.CP1251',
  809. 'bg_bg': 'bg_BG.CP1251',
  810. 'bhb_in.utf8': 'bhb_IN.UTF-8',
  811. 'bho_in': 'bho_IN.UTF-8',
  812. 'bho_np': 'bho_NP.UTF-8',
  813. 'bi_vu': 'bi_VU.UTF-8',
  814. 'bn_bd': 'bn_BD.UTF-8',
  815. 'bn_in': 'bn_IN.UTF-8',
  816. 'bo_cn': 'bo_CN.UTF-8',
  817. 'bo_in': 'bo_IN.UTF-8',
  818. 'bokmal': 'nb_NO.ISO8859-1',
  819. 'bokm\xe5l': 'nb_NO.ISO8859-1',
  820. 'br': 'br_FR.ISO8859-1',
  821. 'br_fr': 'br_FR.ISO8859-1',
  822. 'brx_in': 'brx_IN.UTF-8',
  823. 'bs': 'bs_BA.ISO8859-2',
  824. 'bs_ba': 'bs_BA.ISO8859-2',
  825. 'bulgarian': 'bg_BG.CP1251',
  826. 'byn_er': 'byn_ER.UTF-8',
  827. 'c': 'C',
  828. 'c-french': 'fr_CA.ISO8859-1',
  829. 'c.ascii': 'C',
  830. 'c.en': 'C',
  831. 'c.iso88591': 'en_US.ISO8859-1',
  832. 'c.utf8': 'en_US.UTF-8',
  833. 'c_c': 'C',
  834. 'c_c.c': 'C',
  835. 'ca': 'ca_ES.ISO8859-1',
  836. 'ca_ad': 'ca_AD.ISO8859-1',
  837. 'ca_es': 'ca_ES.ISO8859-1',
  838. 'ca_es@valencia': 'ca_ES.UTF-8@valencia',
  839. 'ca_fr': 'ca_FR.ISO8859-1',
  840. 'ca_it': 'ca_IT.ISO8859-1',
  841. 'catalan': 'ca_ES.ISO8859-1',
  842. 'ce_ru': 'ce_RU.UTF-8',
  843. 'cextend': 'en_US.ISO8859-1',
  844. 'chinese-s': 'zh_CN.eucCN',
  845. 'chinese-t': 'zh_TW.eucTW',
  846. 'chr_us': 'chr_US.UTF-8',
  847. 'ckb_iq': 'ckb_IQ.UTF-8',
  848. 'cmn_tw': 'cmn_TW.UTF-8',
  849. 'crh_ua': 'crh_UA.UTF-8',
  850. 'croatian': 'hr_HR.ISO8859-2',
  851. 'cs': 'cs_CZ.ISO8859-2',
  852. 'cs_cs': 'cs_CZ.ISO8859-2',
  853. 'cs_cz': 'cs_CZ.ISO8859-2',
  854. 'csb_pl': 'csb_PL.UTF-8',
  855. 'cv_ru': 'cv_RU.UTF-8',
  856. 'cy': 'cy_GB.ISO8859-1',
  857. 'cy_gb': 'cy_GB.ISO8859-1',
  858. 'cz': 'cs_CZ.ISO8859-2',
  859. 'cz_cz': 'cs_CZ.ISO8859-2',
  860. 'czech': 'cs_CZ.ISO8859-2',
  861. 'da': 'da_DK.ISO8859-1',
  862. 'da_dk': 'da_DK.ISO8859-1',
  863. 'danish': 'da_DK.ISO8859-1',
  864. 'dansk': 'da_DK.ISO8859-1',
  865. 'de': 'de_DE.ISO8859-1',
  866. 'de_at': 'de_AT.ISO8859-1',
  867. 'de_be': 'de_BE.ISO8859-1',
  868. 'de_ch': 'de_CH.ISO8859-1',
  869. 'de_de': 'de_DE.ISO8859-1',
  870. 'de_it': 'de_IT.ISO8859-1',
  871. 'de_li.utf8': 'de_LI.UTF-8',
  872. 'de_lu': 'de_LU.ISO8859-1',
  873. 'deutsch': 'de_DE.ISO8859-1',
  874. 'doi_in': 'doi_IN.UTF-8',
  875. 'dutch': 'nl_NL.ISO8859-1',
  876. 'dutch.iso88591': 'nl_BE.ISO8859-1',
  877. 'dv_mv': 'dv_MV.UTF-8',
  878. 'dz_bt': 'dz_BT.UTF-8',
  879. 'ee': 'ee_EE.ISO8859-4',
  880. 'ee_ee': 'ee_EE.ISO8859-4',
  881. 'eesti': 'et_EE.ISO8859-1',
  882. 'el': 'el_GR.ISO8859-7',
  883. 'el_cy': 'el_CY.ISO8859-7',
  884. 'el_gr': 'el_GR.ISO8859-7',
  885. 'el_gr@euro': 'el_GR.ISO8859-15',
  886. 'en': 'en_US.ISO8859-1',
  887. 'en_ag': 'en_AG.UTF-8',
  888. 'en_au': 'en_AU.ISO8859-1',
  889. 'en_be': 'en_BE.ISO8859-1',
  890. 'en_bw': 'en_BW.ISO8859-1',
  891. 'en_ca': 'en_CA.ISO8859-1',
  892. 'en_dk': 'en_DK.ISO8859-1',
  893. 'en_dl.utf8': 'en_DL.UTF-8',
  894. 'en_gb': 'en_GB.ISO8859-1',
  895. 'en_hk': 'en_HK.ISO8859-1',
  896. 'en_ie': 'en_IE.ISO8859-1',
  897. 'en_il': 'en_IL.UTF-8',
  898. 'en_in': 'en_IN.ISO8859-1',
  899. 'en_ng': 'en_NG.UTF-8',
  900. 'en_nz': 'en_NZ.ISO8859-1',
  901. 'en_ph': 'en_PH.ISO8859-1',
  902. 'en_sc.utf8': 'en_SC.UTF-8',
  903. 'en_sg': 'en_SG.ISO8859-1',
  904. 'en_uk': 'en_GB.ISO8859-1',
  905. 'en_us': 'en_US.ISO8859-1',
  906. 'en_us@euro@euro': 'en_US.ISO8859-15',
  907. 'en_za': 'en_ZA.ISO8859-1',
  908. 'en_zm': 'en_ZM.UTF-8',
  909. 'en_zw': 'en_ZW.ISO8859-1',
  910. 'en_zw.utf8': 'en_ZS.UTF-8',
  911. 'eng_gb': 'en_GB.ISO8859-1',
  912. 'english': 'en_EN.ISO8859-1',
  913. 'english.iso88591': 'en_US.ISO8859-1',
  914. 'english_uk': 'en_GB.ISO8859-1',
  915. 'english_united-states': 'en_US.ISO8859-1',
  916. 'english_united-states.437': 'C',
  917. 'english_us': 'en_US.ISO8859-1',
  918. 'eo': 'eo_XX.ISO8859-3',
  919. 'eo.utf8': 'eo.UTF-8',
  920. 'eo_eo': 'eo_EO.ISO8859-3',
  921. 'eo_us.utf8': 'eo_US.UTF-8',
  922. 'eo_xx': 'eo_XX.ISO8859-3',
  923. 'es': 'es_ES.ISO8859-1',
  924. 'es_ar': 'es_AR.ISO8859-1',
  925. 'es_bo': 'es_BO.ISO8859-1',
  926. 'es_cl': 'es_CL.ISO8859-1',
  927. 'es_co': 'es_CO.ISO8859-1',
  928. 'es_cr': 'es_CR.ISO8859-1',
  929. 'es_cu': 'es_CU.UTF-8',
  930. 'es_do': 'es_DO.ISO8859-1',
  931. 'es_ec': 'es_EC.ISO8859-1',
  932. 'es_es': 'es_ES.ISO8859-1',
  933. 'es_gt': 'es_GT.ISO8859-1',
  934. 'es_hn': 'es_HN.ISO8859-1',
  935. 'es_mx': 'es_MX.ISO8859-1',
  936. 'es_ni': 'es_NI.ISO8859-1',
  937. 'es_pa': 'es_PA.ISO8859-1',
  938. 'es_pe': 'es_PE.ISO8859-1',
  939. 'es_pr': 'es_PR.ISO8859-1',
  940. 'es_py': 'es_PY.ISO8859-1',
  941. 'es_sv': 'es_SV.ISO8859-1',
  942. 'es_us': 'es_US.ISO8859-1',
  943. 'es_uy': 'es_UY.ISO8859-1',
  944. 'es_ve': 'es_VE.ISO8859-1',
  945. 'estonian': 'et_EE.ISO8859-1',
  946. 'et': 'et_EE.ISO8859-15',
  947. 'et_ee': 'et_EE.ISO8859-15',
  948. 'eu': 'eu_ES.ISO8859-1',
  949. 'eu_es': 'eu_ES.ISO8859-1',
  950. 'eu_fr': 'eu_FR.ISO8859-1',
  951. 'fa': 'fa_IR.UTF-8',
  952. 'fa_ir': 'fa_IR.UTF-8',
  953. 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342',
  954. 'ff_sn': 'ff_SN.UTF-8',
  955. 'fi': 'fi_FI.ISO8859-15',
  956. 'fi_fi': 'fi_FI.ISO8859-15',
  957. 'fil_ph': 'fil_PH.UTF-8',
  958. 'finnish': 'fi_FI.ISO8859-1',
  959. 'fo': 'fo_FO.ISO8859-1',
  960. 'fo_fo': 'fo_FO.ISO8859-1',
  961. 'fr': 'fr_FR.ISO8859-1',
  962. 'fr_be': 'fr_BE.ISO8859-1',
  963. 'fr_ca': 'fr_CA.ISO8859-1',
  964. 'fr_ch': 'fr_CH.ISO8859-1',
  965. 'fr_fr': 'fr_FR.ISO8859-1',
  966. 'fr_lu': 'fr_LU.ISO8859-1',
  967. 'fran\xe7ais': 'fr_FR.ISO8859-1',
  968. 'fre_fr': 'fr_FR.ISO8859-1',
  969. 'french': 'fr_FR.ISO8859-1',
  970. 'french.iso88591': 'fr_CH.ISO8859-1',
  971. 'french_france': 'fr_FR.ISO8859-1',
  972. 'fur_it': 'fur_IT.UTF-8',
  973. 'fy_de': 'fy_DE.UTF-8',
  974. 'fy_nl': 'fy_NL.UTF-8',
  975. 'ga': 'ga_IE.ISO8859-1',
  976. 'ga_ie': 'ga_IE.ISO8859-1',
  977. 'galego': 'gl_ES.ISO8859-1',
  978. 'galician': 'gl_ES.ISO8859-1',
  979. 'gd': 'gd_GB.ISO8859-1',
  980. 'gd_gb': 'gd_GB.ISO8859-1',
  981. 'ger_de': 'de_DE.ISO8859-1',
  982. 'german': 'de_DE.ISO8859-1',
  983. 'german.iso88591': 'de_CH.ISO8859-1',
  984. 'german_germany': 'de_DE.ISO8859-1',
  985. 'gez_er': 'gez_ER.UTF-8',
  986. 'gez_et': 'gez_ET.UTF-8',
  987. 'gl': 'gl_ES.ISO8859-1',
  988. 'gl_es': 'gl_ES.ISO8859-1',
  989. 'greek': 'el_GR.ISO8859-7',
  990. 'gu_in': 'gu_IN.UTF-8',
  991. 'gv': 'gv_GB.ISO8859-1',
  992. 'gv_gb': 'gv_GB.ISO8859-1',
  993. 'ha_ng': 'ha_NG.UTF-8',
  994. 'hak_tw': 'hak_TW.UTF-8',
  995. 'he': 'he_IL.ISO8859-8',
  996. 'he_il': 'he_IL.ISO8859-8',
  997. 'hebrew': 'he_IL.ISO8859-8',
  998. 'hi': 'hi_IN.ISCII-DEV',
  999. 'hi_in': 'hi_IN.ISCII-DEV',
  1000. 'hi_in.isciidev': 'hi_IN.ISCII-DEV',
  1001. 'hif_fj': 'hif_FJ.UTF-8',
  1002. 'hne': 'hne_IN.UTF-8',
  1003. 'hne_in': 'hne_IN.UTF-8',
  1004. 'hr': 'hr_HR.ISO8859-2',
  1005. 'hr_hr': 'hr_HR.ISO8859-2',
  1006. 'hrvatski': 'hr_HR.ISO8859-2',
  1007. 'hsb_de': 'hsb_DE.ISO8859-2',
  1008. 'ht_ht': 'ht_HT.UTF-8',
  1009. 'hu': 'hu_HU.ISO8859-2',
  1010. 'hu_hu': 'hu_HU.ISO8859-2',
  1011. 'hungarian': 'hu_HU.ISO8859-2',
  1012. 'hy_am': 'hy_AM.UTF-8',
  1013. 'hy_am.armscii8': 'hy_AM.ARMSCII_8',
  1014. 'ia': 'ia.UTF-8',
  1015. 'ia_fr': 'ia_FR.UTF-8',
  1016. 'icelandic': 'is_IS.ISO8859-1',
  1017. 'id': 'id_ID.ISO8859-1',
  1018. 'id_id': 'id_ID.ISO8859-1',
  1019. 'ig_ng': 'ig_NG.UTF-8',
  1020. 'ik_ca': 'ik_CA.UTF-8',
  1021. 'in': 'id_ID.ISO8859-1',
  1022. 'in_id': 'id_ID.ISO8859-1',
  1023. 'is': 'is_IS.ISO8859-1',
  1024. 'is_is': 'is_IS.ISO8859-1',
  1025. 'iso-8859-1': 'en_US.ISO8859-1',
  1026. 'iso-8859-15': 'en_US.ISO8859-15',
  1027. 'iso8859-1': 'en_US.ISO8859-1',
  1028. 'iso8859-15': 'en_US.ISO8859-15',
  1029. 'iso_8859_1': 'en_US.ISO8859-1',
  1030. 'iso_8859_15': 'en_US.ISO8859-15',
  1031. 'it': 'it_IT.ISO8859-1',
  1032. 'it_ch': 'it_CH.ISO8859-1',
  1033. 'it_it': 'it_IT.ISO8859-1',
  1034. 'italian': 'it_IT.ISO8859-1',
  1035. 'iu': 'iu_CA.NUNACOM-8',
  1036. 'iu_ca': 'iu_CA.NUNACOM-8',
  1037. 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8',
  1038. 'iw': 'he_IL.ISO8859-8',
  1039. 'iw_il': 'he_IL.ISO8859-8',
  1040. 'iw_il.utf8': 'iw_IL.UTF-8',
  1041. 'ja': 'ja_JP.eucJP',
  1042. 'ja_jp': 'ja_JP.eucJP',
  1043. 'ja_jp.euc': 'ja_JP.eucJP',
  1044. 'ja_jp.mscode': 'ja_JP.SJIS',
  1045. 'ja_jp.pck': 'ja_JP.SJIS',
  1046. 'japan': 'ja_JP.eucJP',
  1047. 'japanese': 'ja_JP.eucJP',
  1048. 'japanese-euc': 'ja_JP.eucJP',
  1049. 'japanese.euc': 'ja_JP.eucJP',
  1050. 'jp_jp': 'ja_JP.eucJP',
  1051. 'ka': 'ka_GE.GEORGIAN-ACADEMY',
  1052. 'ka_ge': 'ka_GE.GEORGIAN-ACADEMY',
  1053. 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY',
  1054. 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS',
  1055. 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY',
  1056. 'kab_dz': 'kab_DZ.UTF-8',
  1057. 'kk_kz': 'kk_KZ.ptcp154',
  1058. 'kl': 'kl_GL.ISO8859-1',
  1059. 'kl_gl': 'kl_GL.ISO8859-1',
  1060. 'km_kh': 'km_KH.UTF-8',
  1061. 'kn': 'kn_IN.UTF-8',
  1062. 'kn_in': 'kn_IN.UTF-8',
  1063. 'ko': 'ko_KR.eucKR',
  1064. 'ko_kr': 'ko_KR.eucKR',
  1065. 'ko_kr.euc': 'ko_KR.eucKR',
  1066. 'kok_in': 'kok_IN.UTF-8',
  1067. 'korean': 'ko_KR.eucKR',
  1068. 'korean.euc': 'ko_KR.eucKR',
  1069. 'ks': 'ks_IN.UTF-8',
  1070. 'ks_in': 'ks_IN.UTF-8',
  1071. 'ks_in@devanagari.utf8': 'ks_IN.UTF-8@devanagari',
  1072. 'ku_tr': 'ku_TR.ISO8859-9',
  1073. 'kw': 'kw_GB.ISO8859-1',
  1074. 'kw_gb': 'kw_GB.ISO8859-1',
  1075. 'ky': 'ky_KG.UTF-8',
  1076. 'ky_kg': 'ky_KG.UTF-8',
  1077. 'lb_lu': 'lb_LU.UTF-8',
  1078. 'lg_ug': 'lg_UG.ISO8859-10',
  1079. 'li_be': 'li_BE.UTF-8',
  1080. 'li_nl': 'li_NL.UTF-8',
  1081. 'lij_it': 'lij_IT.UTF-8',
  1082. 'lithuanian': 'lt_LT.ISO8859-13',
  1083. 'ln_cd': 'ln_CD.UTF-8',
  1084. 'lo': 'lo_LA.MULELAO-1',
  1085. 'lo_la': 'lo_LA.MULELAO-1',
  1086. 'lo_la.cp1133': 'lo_LA.IBM-CP1133',
  1087. 'lo_la.ibmcp1133': 'lo_LA.IBM-CP1133',
  1088. 'lo_la.mulelao1': 'lo_LA.MULELAO-1',
  1089. 'lt': 'lt_LT.ISO8859-13',
  1090. 'lt_lt': 'lt_LT.ISO8859-13',
  1091. 'lv': 'lv_LV.ISO8859-13',
  1092. 'lv_lv': 'lv_LV.ISO8859-13',
  1093. 'lzh_tw': 'lzh_TW.UTF-8',
  1094. 'mag_in': 'mag_IN.UTF-8',
  1095. 'mai': 'mai_IN.UTF-8',
  1096. 'mai_in': 'mai_IN.UTF-8',
  1097. 'mai_np': 'mai_NP.UTF-8',
  1098. 'mfe_mu': 'mfe_MU.UTF-8',
  1099. 'mg_mg': 'mg_MG.ISO8859-15',
  1100. 'mhr_ru': 'mhr_RU.UTF-8',
  1101. 'mi': 'mi_NZ.ISO8859-1',
  1102. 'mi_nz': 'mi_NZ.ISO8859-1',
  1103. 'miq_ni': 'miq_NI.UTF-8',
  1104. 'mjw_in': 'mjw_IN.UTF-8',
  1105. 'mk': 'mk_MK.ISO8859-5',
  1106. 'mk_mk': 'mk_MK.ISO8859-5',
  1107. 'ml': 'ml_IN.UTF-8',
  1108. 'ml_in': 'ml_IN.UTF-8',
  1109. 'mn_mn': 'mn_MN.UTF-8',
  1110. 'mni_in': 'mni_IN.UTF-8',
  1111. 'mr': 'mr_IN.UTF-8',
  1112. 'mr_in': 'mr_IN.UTF-8',
  1113. 'ms': 'ms_MY.ISO8859-1',
  1114. 'ms_my': 'ms_MY.ISO8859-1',
  1115. 'mt': 'mt_MT.ISO8859-3',
  1116. 'mt_mt': 'mt_MT.ISO8859-3',
  1117. 'my_mm': 'my_MM.UTF-8',
  1118. 'nan_tw': 'nan_TW.UTF-8',
  1119. 'nb': 'nb_NO.ISO8859-1',
  1120. 'nb_no': 'nb_NO.ISO8859-1',
  1121. 'nds_de': 'nds_DE.UTF-8',
  1122. 'nds_nl': 'nds_NL.UTF-8',
  1123. 'ne_np': 'ne_NP.UTF-8',
  1124. 'nhn_mx': 'nhn_MX.UTF-8',
  1125. 'niu_nu': 'niu_NU.UTF-8',
  1126. 'niu_nz': 'niu_NZ.UTF-8',
  1127. 'nl': 'nl_NL.ISO8859-1',
  1128. 'nl_aw': 'nl_AW.UTF-8',
  1129. 'nl_be': 'nl_BE.ISO8859-1',
  1130. 'nl_nl': 'nl_NL.ISO8859-1',
  1131. 'nn': 'nn_NO.ISO8859-1',
  1132. 'nn_no': 'nn_NO.ISO8859-1',
  1133. 'no': 'no_NO.ISO8859-1',
  1134. 'no@nynorsk': 'ny_NO.ISO8859-1',
  1135. 'no_no': 'no_NO.ISO8859-1',
  1136. 'no_no.iso88591@bokmal': 'no_NO.ISO8859-1',
  1137. 'no_no.iso88591@nynorsk': 'no_NO.ISO8859-1',
  1138. 'norwegian': 'no_NO.ISO8859-1',
  1139. 'nr': 'nr_ZA.ISO8859-1',
  1140. 'nr_za': 'nr_ZA.ISO8859-1',
  1141. 'nso': 'nso_ZA.ISO8859-15',
  1142. 'nso_za': 'nso_ZA.ISO8859-15',
  1143. 'ny': 'ny_NO.ISO8859-1',
  1144. 'ny_no': 'ny_NO.ISO8859-1',
  1145. 'nynorsk': 'nn_NO.ISO8859-1',
  1146. 'oc': 'oc_FR.ISO8859-1',
  1147. 'oc_fr': 'oc_FR.ISO8859-1',
  1148. 'om_et': 'om_ET.UTF-8',
  1149. 'om_ke': 'om_KE.ISO8859-1',
  1150. 'or': 'or_IN.UTF-8',
  1151. 'or_in': 'or_IN.UTF-8',
  1152. 'os_ru': 'os_RU.UTF-8',
  1153. 'pa': 'pa_IN.UTF-8',
  1154. 'pa_in': 'pa_IN.UTF-8',
  1155. 'pa_pk': 'pa_PK.UTF-8',
  1156. 'pap_an': 'pap_AN.UTF-8',
  1157. 'pap_aw': 'pap_AW.UTF-8',
  1158. 'pap_cw': 'pap_CW.UTF-8',
  1159. 'pd': 'pd_US.ISO8859-1',
  1160. 'pd_de': 'pd_DE.ISO8859-1',
  1161. 'pd_us': 'pd_US.ISO8859-1',
  1162. 'ph': 'ph_PH.ISO8859-1',
  1163. 'ph_ph': 'ph_PH.ISO8859-1',
  1164. 'pl': 'pl_PL.ISO8859-2',
  1165. 'pl_pl': 'pl_PL.ISO8859-2',
  1166. 'polish': 'pl_PL.ISO8859-2',
  1167. 'portuguese': 'pt_PT.ISO8859-1',
  1168. 'portuguese_brazil': 'pt_BR.ISO8859-1',
  1169. 'posix': 'C',
  1170. 'posix-utf2': 'C',
  1171. 'pp': 'pp_AN.ISO8859-1',
  1172. 'pp_an': 'pp_AN.ISO8859-1',
  1173. 'ps_af': 'ps_AF.UTF-8',
  1174. 'pt': 'pt_PT.ISO8859-1',
  1175. 'pt_br': 'pt_BR.ISO8859-1',
  1176. 'pt_pt': 'pt_PT.ISO8859-1',
  1177. 'quz_pe': 'quz_PE.UTF-8',
  1178. 'raj_in': 'raj_IN.UTF-8',
  1179. 'ro': 'ro_RO.ISO8859-2',
  1180. 'ro_ro': 'ro_RO.ISO8859-2',
  1181. 'romanian': 'ro_RO.ISO8859-2',
  1182. 'ru': 'ru_RU.UTF-8',
  1183. 'ru_ru': 'ru_RU.UTF-8',
  1184. 'ru_ua': 'ru_UA.KOI8-U',
  1185. 'rumanian': 'ro_RO.ISO8859-2',
  1186. 'russian': 'ru_RU.KOI8-R',
  1187. 'rw': 'rw_RW.ISO8859-1',
  1188. 'rw_rw': 'rw_RW.ISO8859-1',
  1189. 'sa_in': 'sa_IN.UTF-8',
  1190. 'sat_in': 'sat_IN.UTF-8',
  1191. 'sc_it': 'sc_IT.UTF-8',
  1192. 'sd': 'sd_IN.UTF-8',
  1193. 'sd_in': 'sd_IN.UTF-8',
  1194. 'sd_in@devanagari.utf8': 'sd_IN.UTF-8@devanagari',
  1195. 'sd_pk': 'sd_PK.UTF-8',
  1196. 'se_no': 'se_NO.UTF-8',
  1197. 'serbocroatian': 'sr_RS.UTF-8@latin',
  1198. 'sgs_lt': 'sgs_LT.UTF-8',
  1199. 'sh': 'sr_RS.UTF-8@latin',
  1200. 'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2',
  1201. 'sh_hr': 'sh_HR.ISO8859-2',
  1202. 'sh_hr.iso88592': 'hr_HR.ISO8859-2',
  1203. 'sh_sp': 'sr_CS.ISO8859-2',
  1204. 'sh_yu': 'sr_RS.UTF-8@latin',
  1205. 'shn_mm': 'shn_MM.UTF-8',
  1206. 'shs_ca': 'shs_CA.UTF-8',
  1207. 'si': 'si_LK.UTF-8',
  1208. 'si_lk': 'si_LK.UTF-8',
  1209. 'sid_et': 'sid_ET.UTF-8',
  1210. 'sinhala': 'si_LK.UTF-8',
  1211. 'sk': 'sk_SK.ISO8859-2',
  1212. 'sk_sk': 'sk_SK.ISO8859-2',
  1213. 'sl': 'sl_SI.ISO8859-2',
  1214. 'sl_cs': 'sl_CS.ISO8859-2',
  1215. 'sl_si': 'sl_SI.ISO8859-2',
  1216. 'slovak': 'sk_SK.ISO8859-2',
  1217. 'slovene': 'sl_SI.ISO8859-2',
  1218. 'slovenian': 'sl_SI.ISO8859-2',
  1219. 'sm_ws': 'sm_WS.UTF-8',
  1220. 'so_dj': 'so_DJ.ISO8859-1',
  1221. 'so_et': 'so_ET.UTF-8',
  1222. 'so_ke': 'so_KE.ISO8859-1',
  1223. 'so_so': 'so_SO.ISO8859-1',
  1224. 'sp': 'sr_CS.ISO8859-5',
  1225. 'sp_yu': 'sr_CS.ISO8859-5',
  1226. 'spanish': 'es_ES.ISO8859-1',
  1227. 'spanish_spain': 'es_ES.ISO8859-1',
  1228. 'sq': 'sq_AL.ISO8859-2',
  1229. 'sq_al': 'sq_AL.ISO8859-2',
  1230. 'sq_mk': 'sq_MK.UTF-8',
  1231. 'sr': 'sr_RS.UTF-8',
  1232. 'sr@cyrillic': 'sr_RS.UTF-8',
  1233. 'sr@latn': 'sr_CS.UTF-8@latin',
  1234. 'sr_cs': 'sr_CS.UTF-8',
  1235. 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2',
  1236. 'sr_cs@latn': 'sr_CS.UTF-8@latin',
  1237. 'sr_me': 'sr_ME.UTF-8',
  1238. 'sr_rs': 'sr_RS.UTF-8',
  1239. 'sr_rs@latn': 'sr_RS.UTF-8@latin',
  1240. 'sr_sp': 'sr_CS.ISO8859-2',
  1241. 'sr_yu': 'sr_RS.UTF-8@latin',
  1242. 'sr_yu.cp1251@cyrillic': 'sr_CS.CP1251',
  1243. 'sr_yu.iso88592': 'sr_CS.ISO8859-2',
  1244. 'sr_yu.iso88595': 'sr_CS.ISO8859-5',
  1245. 'sr_yu.iso88595@cyrillic': 'sr_CS.ISO8859-5',
  1246. 'sr_yu.microsoftcp1251@cyrillic': 'sr_CS.CP1251',
  1247. 'sr_yu.utf8': 'sr_RS.UTF-8',
  1248. 'sr_yu.utf8@cyrillic': 'sr_RS.UTF-8',
  1249. 'sr_yu@cyrillic': 'sr_RS.UTF-8',
  1250. 'ss': 'ss_ZA.ISO8859-1',
  1251. 'ss_za': 'ss_ZA.ISO8859-1',
  1252. 'st': 'st_ZA.ISO8859-1',
  1253. 'st_za': 'st_ZA.ISO8859-1',
  1254. 'sv': 'sv_SE.ISO8859-1',
  1255. 'sv_fi': 'sv_FI.ISO8859-1',
  1256. 'sv_se': 'sv_SE.ISO8859-1',
  1257. 'sw_ke': 'sw_KE.UTF-8',
  1258. 'sw_tz': 'sw_TZ.UTF-8',
  1259. 'swedish': 'sv_SE.ISO8859-1',
  1260. 'szl_pl': 'szl_PL.UTF-8',
  1261. 'ta': 'ta_IN.TSCII-0',
  1262. 'ta_in': 'ta_IN.TSCII-0',
  1263. 'ta_in.tscii': 'ta_IN.TSCII-0',
  1264. 'ta_in.tscii0': 'ta_IN.TSCII-0',
  1265. 'ta_lk': 'ta_LK.UTF-8',
  1266. 'tcy_in.utf8': 'tcy_IN.UTF-8',
  1267. 'te': 'te_IN.UTF-8',
  1268. 'te_in': 'te_IN.UTF-8',
  1269. 'tg': 'tg_TJ.KOI8-C',
  1270. 'tg_tj': 'tg_TJ.KOI8-C',
  1271. 'th': 'th_TH.ISO8859-11',
  1272. 'th_th': 'th_TH.ISO8859-11',
  1273. 'th_th.tactis': 'th_TH.TIS620',
  1274. 'th_th.tis620': 'th_TH.TIS620',
  1275. 'thai': 'th_TH.ISO8859-11',
  1276. 'the_np': 'the_NP.UTF-8',
  1277. 'ti_er': 'ti_ER.UTF-8',
  1278. 'ti_et': 'ti_ET.UTF-8',
  1279. 'tig_er': 'tig_ER.UTF-8',
  1280. 'tk_tm': 'tk_TM.UTF-8',
  1281. 'tl': 'tl_PH.ISO8859-1',
  1282. 'tl_ph': 'tl_PH.ISO8859-1',
  1283. 'tn': 'tn_ZA.ISO8859-15',
  1284. 'tn_za': 'tn_ZA.ISO8859-15',
  1285. 'to_to': 'to_TO.UTF-8',
  1286. 'tpi_pg': 'tpi_PG.UTF-8',
  1287. 'tr': 'tr_TR.ISO8859-9',
  1288. 'tr_cy': 'tr_CY.ISO8859-9',
  1289. 'tr_tr': 'tr_TR.ISO8859-9',
  1290. 'ts': 'ts_ZA.ISO8859-1',
  1291. 'ts_za': 'ts_ZA.ISO8859-1',
  1292. 'tt': 'tt_RU.TATAR-CYR',
  1293. 'tt_ru': 'tt_RU.TATAR-CYR',
  1294. 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR',
  1295. 'tt_ru@iqtelif': 'tt_RU.UTF-8@iqtelif',
  1296. 'turkish': 'tr_TR.ISO8859-9',
  1297. 'ug_cn': 'ug_CN.UTF-8',
  1298. 'uk': 'uk_UA.KOI8-U',
  1299. 'uk_ua': 'uk_UA.KOI8-U',
  1300. 'univ': 'en_US.utf',
  1301. 'universal': 'en_US.utf',
  1302. 'universal.utf8@ucs4': 'en_US.UTF-8',
  1303. 'unm_us': 'unm_US.UTF-8',
  1304. 'ur': 'ur_PK.CP1256',
  1305. 'ur_in': 'ur_IN.UTF-8',
  1306. 'ur_pk': 'ur_PK.CP1256',
  1307. 'uz': 'uz_UZ.UTF-8',
  1308. 'uz_uz': 'uz_UZ.UTF-8',
  1309. 'uz_uz@cyrillic': 'uz_UZ.UTF-8',
  1310. 've': 've_ZA.UTF-8',
  1311. 've_za': 've_ZA.UTF-8',
  1312. 'vi': 'vi_VN.TCVN',
  1313. 'vi_vn': 'vi_VN.TCVN',
  1314. 'vi_vn.tcvn': 'vi_VN.TCVN',
  1315. 'vi_vn.tcvn5712': 'vi_VN.TCVN',
  1316. 'vi_vn.viscii': 'vi_VN.VISCII',
  1317. 'vi_vn.viscii111': 'vi_VN.VISCII',
  1318. 'wa': 'wa_BE.ISO8859-1',
  1319. 'wa_be': 'wa_BE.ISO8859-1',
  1320. 'wae_ch': 'wae_CH.UTF-8',
  1321. 'wal_et': 'wal_ET.UTF-8',
  1322. 'wo_sn': 'wo_SN.UTF-8',
  1323. 'xh': 'xh_ZA.ISO8859-1',
  1324. 'xh_za': 'xh_ZA.ISO8859-1',
  1325. 'yi': 'yi_US.CP1255',
  1326. 'yi_us': 'yi_US.CP1255',
  1327. 'yo_ng': 'yo_NG.UTF-8',
  1328. 'yue_hk': 'yue_HK.UTF-8',
  1329. 'yuw_pg': 'yuw_PG.UTF-8',
  1330. 'zh': 'zh_CN.eucCN',
  1331. 'zh_cn': 'zh_CN.gb2312',
  1332. 'zh_cn.big5': 'zh_TW.big5',
  1333. 'zh_cn.euc': 'zh_CN.eucCN',
  1334. 'zh_hk': 'zh_HK.big5hkscs',
  1335. 'zh_hk.big5hk': 'zh_HK.big5hkscs',
  1336. 'zh_sg': 'zh_SG.GB2312',
  1337. 'zh_sg.gbk': 'zh_SG.GBK',
  1338. 'zh_tw': 'zh_TW.big5',
  1339. 'zh_tw.euc': 'zh_TW.eucTW',
  1340. 'zh_tw.euctw': 'zh_TW.eucTW',
  1341. 'zu': 'zu_ZA.ISO8859-1',
  1342. 'zu_za': 'zu_ZA.ISO8859-1',
  1343. }
  1344. #
  1345. # This maps Windows language identifiers to locale strings.
  1346. #
  1347. # This list has been updated from
  1348. # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
  1349. # to include every locale up to Windows Vista.
  1350. #
  1351. # NOTE: this mapping is incomplete. If your language is missing, please
  1352. # submit a bug report to the Python bug tracker at http://bugs.python.org/
  1353. # Make sure you include the missing language identifier and the suggested
  1354. # locale code.
  1355. #
  1356. windows_locale = {
  1357. 0x0436: "af_ZA", # Afrikaans
  1358. 0x041c: "sq_AL", # Albanian
  1359. 0x0484: "gsw_FR",# Alsatian - France
  1360. 0x045e: "am_ET", # Amharic - Ethiopia
  1361. 0x0401: "ar_SA", # Arabic - Saudi Arabia
  1362. 0x0801: "ar_IQ", # Arabic - Iraq
  1363. 0x0c01: "ar_EG", # Arabic - Egypt
  1364. 0x1001: "ar_LY", # Arabic - Libya
  1365. 0x1401: "ar_DZ", # Arabic - Algeria
  1366. 0x1801: "ar_MA", # Arabic - Morocco
  1367. 0x1c01: "ar_TN", # Arabic - Tunisia
  1368. 0x2001: "ar_OM", # Arabic - Oman
  1369. 0x2401: "ar_YE", # Arabic - Yemen
  1370. 0x2801: "ar_SY", # Arabic - Syria
  1371. 0x2c01: "ar_JO", # Arabic - Jordan
  1372. 0x3001: "ar_LB", # Arabic - Lebanon
  1373. 0x3401: "ar_KW", # Arabic - Kuwait
  1374. 0x3801: "ar_AE", # Arabic - United Arab Emirates
  1375. 0x3c01: "ar_BH", # Arabic - Bahrain
  1376. 0x4001: "ar_QA", # Arabic - Qatar
  1377. 0x042b: "hy_AM", # Armenian
  1378. 0x044d: "as_IN", # Assamese - India
  1379. 0x042c: "az_AZ", # Azeri - Latin
  1380. 0x082c: "az_AZ", # Azeri - Cyrillic
  1381. 0x046d: "ba_RU", # Bashkir
  1382. 0x042d: "eu_ES", # Basque - Russia
  1383. 0x0423: "be_BY", # Belarusian
  1384. 0x0445: "bn_IN", # Begali
  1385. 0x201a: "bs_BA", # Bosnian - Cyrillic
  1386. 0x141a: "bs_BA", # Bosnian - Latin
  1387. 0x047e: "br_FR", # Breton - France
  1388. 0x0402: "bg_BG", # Bulgarian
  1389. # 0x0455: "my_MM", # Burmese - Not supported
  1390. 0x0403: "ca_ES", # Catalan
  1391. 0x0004: "zh_CHS",# Chinese - Simplified
  1392. 0x0404: "zh_TW", # Chinese - Taiwan
  1393. 0x0804: "zh_CN", # Chinese - PRC
  1394. 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
  1395. 0x1004: "zh_SG", # Chinese - Singapore
  1396. 0x1404: "zh_MO", # Chinese - Macao S.A.R.
  1397. 0x7c04: "zh_CHT",# Chinese - Traditional
  1398. 0x0483: "co_FR", # Corsican - France
  1399. 0x041a: "hr_HR", # Croatian
  1400. 0x101a: "hr_BA", # Croatian - Bosnia
  1401. 0x0405: "cs_CZ", # Czech
  1402. 0x0406: "da_DK", # Danish
  1403. 0x048c: "gbz_AF",# Dari - Afghanistan
  1404. 0x0465: "div_MV",# Divehi - Maldives
  1405. 0x0413: "nl_NL", # Dutch - The Netherlands
  1406. 0x0813: "nl_BE", # Dutch - Belgium
  1407. 0x0409: "en_US", # English - United States
  1408. 0x0809: "en_GB", # English - United Kingdom
  1409. 0x0c09: "en_AU", # English - Australia
  1410. 0x1009: "en_CA", # English - Canada
  1411. 0x1409: "en_NZ", # English - New Zealand
  1412. 0x1809: "en_IE", # English - Ireland
  1413. 0x1c09: "en_ZA", # English - South Africa
  1414. 0x2009: "en_JA", # English - Jamaica
  1415. 0x2409: "en_CB", # English - Caribbean
  1416. 0x2809: "en_BZ", # English - Belize
  1417. 0x2c09: "en_TT", # English - Trinidad
  1418. 0x3009: "en_ZW", # English - Zimbabwe
  1419. 0x3409: "en_PH", # English - Philippines
  1420. 0x4009: "en_IN", # English - India
  1421. 0x4409: "en_MY", # English - Malaysia
  1422. 0x4809: "en_IN", # English - Singapore
  1423. 0x0425: "et_EE", # Estonian
  1424. 0x0438: "fo_FO", # Faroese
  1425. 0x0464: "fil_PH",# Filipino
  1426. 0x040b: "fi_FI", # Finnish
  1427. 0x040c: "fr_FR", # French - France
  1428. 0x080c: "fr_BE", # French - Belgium
  1429. 0x0c0c: "fr_CA", # French - Canada
  1430. 0x100c: "fr_CH", # French - Switzerland
  1431. 0x140c: "fr_LU", # French - Luxembourg
  1432. 0x180c: "fr_MC", # French - Monaco
  1433. 0x0462: "fy_NL", # Frisian - Netherlands
  1434. 0x0456: "gl_ES", # Galician
  1435. 0x0437: "ka_GE", # Georgian
  1436. 0x0407: "de_DE", # German - Germany
  1437. 0x0807: "de_CH", # German - Switzerland
  1438. 0x0c07: "de_AT", # German - Austria
  1439. 0x1007: "de_LU", # German - Luxembourg
  1440. 0x1407: "de_LI", # German - Liechtenstein
  1441. 0x0408: "el_GR", # Greek
  1442. 0x046f: "kl_GL", # Greenlandic - Greenland
  1443. 0x0447: "gu_IN", # Gujarati
  1444. 0x0468: "ha_NG", # Hausa - Latin
  1445. 0x040d: "he_IL", # Hebrew
  1446. 0x0439: "hi_IN", # Hindi
  1447. 0x040e: "hu_HU", # Hungarian
  1448. 0x040f: "is_IS", # Icelandic
  1449. 0x0421: "id_ID", # Indonesian
  1450. 0x045d: "iu_CA", # Inuktitut - Syllabics
  1451. 0x085d: "iu_CA", # Inuktitut - Latin
  1452. 0x083c: "ga_IE", # Irish - Ireland
  1453. 0x0410: "it_IT", # Italian - Italy
  1454. 0x0810: "it_CH", # Italian - Switzerland
  1455. 0x0411: "ja_JP", # Japanese
  1456. 0x044b: "kn_IN", # Kannada - India
  1457. 0x043f: "kk_KZ", # Kazakh
  1458. 0x0453: "kh_KH", # Khmer - Cambodia
  1459. 0x0486: "qut_GT",# K'iche - Guatemala
  1460. 0x0487: "rw_RW", # Kinyarwanda - Rwanda
  1461. 0x0457: "kok_IN",# Konkani
  1462. 0x0412: "ko_KR", # Korean
  1463. 0x0440: "ky_KG", # Kyrgyz
  1464. 0x0454: "lo_LA", # Lao - Lao PDR
  1465. 0x0426: "lv_LV", # Latvian
  1466. 0x0427: "lt_LT", # Lithuanian
  1467. 0x082e: "dsb_DE",# Lower Sorbian - Germany
  1468. 0x046e: "lb_LU", # Luxembourgish
  1469. 0x042f: "mk_MK", # FYROM Macedonian
  1470. 0x043e: "ms_MY", # Malay - Malaysia
  1471. 0x083e: "ms_BN", # Malay - Brunei Darussalam
  1472. 0x044c: "ml_IN", # Malayalam - India
  1473. 0x043a: "mt_MT", # Maltese
  1474. 0x0481: "mi_NZ", # Maori
  1475. 0x047a: "arn_CL",# Mapudungun
  1476. 0x044e: "mr_IN", # Marathi
  1477. 0x047c: "moh_CA",# Mohawk - Canada
  1478. 0x0450: "mn_MN", # Mongolian - Cyrillic
  1479. 0x0850: "mn_CN", # Mongolian - PRC
  1480. 0x0461: "ne_NP", # Nepali
  1481. 0x0414: "nb_NO", # Norwegian - Bokmal
  1482. 0x0814: "nn_NO", # Norwegian - Nynorsk
  1483. 0x0482: "oc_FR", # Occitan - France
  1484. 0x0448: "or_IN", # Oriya - India
  1485. 0x0463: "ps_AF", # Pashto - Afghanistan
  1486. 0x0429: "fa_IR", # Persian
  1487. 0x0415: "pl_PL", # Polish
  1488. 0x0416: "pt_BR", # Portuguese - Brazil
  1489. 0x0816: "pt_PT", # Portuguese - Portugal
  1490. 0x0446: "pa_IN", # Punjabi
  1491. 0x046b: "quz_BO",# Quechua (Bolivia)
  1492. 0x086b: "quz_EC",# Quechua (Ecuador)
  1493. 0x0c6b: "quz_PE",# Quechua (Peru)
  1494. 0x0418: "ro_RO", # Romanian - Romania
  1495. 0x0417: "rm_CH", # Romansh
  1496. 0x0419: "ru_RU", # Russian
  1497. 0x243b: "smn_FI",# Sami Finland
  1498. 0x103b: "smj_NO",# Sami Norway
  1499. 0x143b: "smj_SE",# Sami Sweden
  1500. 0x043b: "se_NO", # Sami Northern Norway
  1501. 0x083b: "se_SE", # Sami Northern Sweden
  1502. 0x0c3b: "se_FI", # Sami Northern Finland
  1503. 0x203b: "sms_FI",# Sami Skolt
  1504. 0x183b: "sma_NO",# Sami Southern Norway
  1505. 0x1c3b: "sma_SE",# Sami Southern Sweden
  1506. 0x044f: "sa_IN", # Sanskrit
  1507. 0x0c1a: "sr_SP", # Serbian - Cyrillic
  1508. 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
  1509. 0x081a: "sr_SP", # Serbian - Latin
  1510. 0x181a: "sr_BA", # Serbian - Bosnia Latin
  1511. 0x045b: "si_LK", # Sinhala - Sri Lanka
  1512. 0x046c: "ns_ZA", # Northern Sotho
  1513. 0x0432: "tn_ZA", # Setswana - Southern Africa
  1514. 0x041b: "sk_SK", # Slovak
  1515. 0x0424: "sl_SI", # Slovenian
  1516. 0x040a: "es_ES", # Spanish - Spain
  1517. 0x080a: "es_MX", # Spanish - Mexico
  1518. 0x0c0a: "es_ES", # Spanish - Spain (Modern)
  1519. 0x100a: "es_GT", # Spanish - Guatemala
  1520. 0x140a: "es_CR", # Spanish - Costa Rica
  1521. 0x180a: "es_PA", # Spanish - Panama
  1522. 0x1c0a: "es_DO", # Spanish - Dominican Republic
  1523. 0x200a: "es_VE", # Spanish - Venezuela
  1524. 0x240a: "es_CO", # Spanish - Colombia
  1525. 0x280a: "es_PE", # Spanish - Peru
  1526. 0x2c0a: "es_AR", # Spanish - Argentina
  1527. 0x300a: "es_EC", # Spanish - Ecuador
  1528. 0x340a: "es_CL", # Spanish - Chile
  1529. 0x380a: "es_UR", # Spanish - Uruguay
  1530. 0x3c0a: "es_PY", # Spanish - Paraguay
  1531. 0x400a: "es_BO", # Spanish - Bolivia
  1532. 0x440a: "es_SV", # Spanish - El Salvador
  1533. 0x480a: "es_HN", # Spanish - Honduras
  1534. 0x4c0a: "es_NI", # Spanish - Nicaragua
  1535. 0x500a: "es_PR", # Spanish - Puerto Rico
  1536. 0x540a: "es_US", # Spanish - United States
  1537. # 0x0430: "", # Sutu - Not supported
  1538. 0x0441: "sw_KE", # Swahili
  1539. 0x041d: "sv_SE", # Swedish - Sweden
  1540. 0x081d: "sv_FI", # Swedish - Finland
  1541. 0x045a: "syr_SY",# Syriac
  1542. 0x0428: "tg_TJ", # Tajik - Cyrillic
  1543. 0x085f: "tmz_DZ",# Tamazight - Latin
  1544. 0x0449: "ta_IN", # Tamil
  1545. 0x0444: "tt_RU", # Tatar
  1546. 0x044a: "te_IN", # Telugu
  1547. 0x041e: "th_TH", # Thai
  1548. 0x0851: "bo_BT", # Tibetan - Bhutan
  1549. 0x0451: "bo_CN", # Tibetan - PRC
  1550. 0x041f: "tr_TR", # Turkish
  1551. 0x0442: "tk_TM", # Turkmen - Cyrillic
  1552. 0x0480: "ug_CN", # Uighur - Arabic
  1553. 0x0422: "uk_UA", # Ukrainian
  1554. 0x042e: "wen_DE",# Upper Sorbian - Germany
  1555. 0x0420: "ur_PK", # Urdu
  1556. 0x0820: "ur_IN", # Urdu - India
  1557. 0x0443: "uz_UZ", # Uzbek - Latin
  1558. 0x0843: "uz_UZ", # Uzbek - Cyrillic
  1559. 0x042a: "vi_VN", # Vietnamese
  1560. 0x0452: "cy_GB", # Welsh
  1561. 0x0488: "wo_SN", # Wolof - Senegal
  1562. 0x0434: "xh_ZA", # Xhosa - South Africa
  1563. 0x0485: "sah_RU",# Yakut - Cyrillic
  1564. 0x0478: "ii_CN", # Yi - PRC
  1565. 0x046a: "yo_NG", # Yoruba - Nigeria
  1566. 0x0435: "zu_ZA", # Zulu
  1567. }
  1568. def _print_locale():
  1569. """ Test function.
  1570. """
  1571. categories = {}
  1572. def _init_categories(categories=categories):
  1573. for k,v in globals().items():
  1574. if k[:3] == 'LC_':
  1575. categories[k] = v
  1576. _init_categories()
  1577. del categories['LC_ALL']
  1578. print('Locale defaults as determined by getdefaultlocale():')
  1579. print('-'*72)
  1580. lang, enc = getdefaultlocale()
  1581. print('Language: ', lang or '(undefined)')
  1582. print('Encoding: ', enc or '(undefined)')
  1583. print()
  1584. print('Locale settings on startup:')
  1585. print('-'*72)
  1586. for name,category in categories.items():
  1587. print(name, '...')
  1588. lang, enc = getlocale(category)
  1589. print(' Language: ', lang or '(undefined)')
  1590. print(' Encoding: ', enc or '(undefined)')
  1591. print()
  1592. print()
  1593. print('Locale settings after calling resetlocale():')
  1594. print('-'*72)
  1595. resetlocale()
  1596. for name,category in categories.items():
  1597. print(name, '...')
  1598. lang, enc = getlocale(category)
  1599. print(' Language: ', lang or '(undefined)')
  1600. print(' Encoding: ', enc or '(undefined)')
  1601. print()
  1602. try:
  1603. setlocale(LC_ALL, "")
  1604. except:
  1605. print('NOTE:')
  1606. print('setlocale(LC_ALL, "") does not support the default locale')
  1607. print('given in the OS environment variables.')
  1608. else:
  1609. print()
  1610. print('Locale settings after calling setlocale(LC_ALL, ""):')
  1611. print('-'*72)
  1612. for name,category in categories.items():
  1613. print(name, '...')
  1614. lang, enc = getlocale(category)
  1615. print(' Language: ', lang or '(undefined)')
  1616. print(' Encoding: ', enc or '(undefined)')
  1617. print()
  1618. ###
  1619. try:
  1620. LC_MESSAGES
  1621. except NameError:
  1622. pass
  1623. else:
  1624. __all__.append("LC_MESSAGES")
  1625. if __name__=='__main__':
  1626. print('Locale aliasing:')
  1627. print()
  1628. _print_locale()
  1629. print()
  1630. print('Number formatting:')
  1631. print()
  1632. _test()