2002-04-03 07:39:33 +02:00
|
|
|
/*-----------------------------------------------------------------------
|
2000-01-07 18:22:47 +01:00
|
|
|
*
|
2002-04-03 07:39:33 +02:00
|
|
|
* PostgreSQL locale utilities
|
2000-01-07 18:22:47 +01:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/utils/pg_locale.h
|
2000-01-07 18:22:47 +01:00
|
|
|
*
|
2023-01-02 21:00:37 +01:00
|
|
|
* Copyright (c) 2002-2023, PostgreSQL Global Development Group
|
2000-02-08 16:57:01 +01:00
|
|
|
*
|
2002-04-03 07:39:33 +02:00
|
|
|
*-----------------------------------------------------------------------
|
2000-02-08 16:57:01 +01:00
|
|
|
*/
|
2000-04-12 19:17:23 +02:00
|
|
|
|
2000-01-07 18:22:47 +01:00
|
|
|
#ifndef _PG_LOCALE_
|
|
|
|
#define _PG_LOCALE_
|
2000-04-12 19:17:23 +02:00
|
|
|
|
Cope if platform declares mbstowcs_l(), but not locale_t, in <xlocale.h>.
Previously, we included <xlocale.h> only if necessary to get the definition
of type locale_t. According to notes in PGAC_TYPE_LOCALE_T, this is
important because on some versions of glibc that file supplies an
incompatible declaration of locale_t. (This info may be obsolete, because
on my RHEL6 box that seems to be the *only* definition of locale_t; but
there may still be glibc's in the wild for which it's a live concern.)
It turns out though that on FreeBSD and maybe other BSDen, you can get
locale_t from stdlib.h or locale.h but mbstowcs_l() and friends only from
<xlocale.h>. This was leaving us compiling calls to mbstowcs_l() and
friends with no visible prototype, which causes a warning and could
possibly cause actual trouble, since it's not declared to return int.
Hence, adjust the configure checks so that we'll include <xlocale.h>
either if it's necessary to get type locale_t or if it's necessary to
get a declaration of mbstowcs_l().
Report and patch by Aleksander Alekseev, somewhat whacked around by me.
Back-patch to all supported branches, since we have been using
mbstowcs_l() since 9.1.
2016-03-15 18:19:57 +01:00
|
|
|
#if defined(LOCALE_T_IN_XLOCALE) || defined(WCSTOMBS_L_IN_XLOCALE)
|
2011-02-08 22:04:18 +01:00
|
|
|
#include <xlocale.h>
|
|
|
|
#endif
|
2017-03-23 20:25:34 +01:00
|
|
|
#ifdef USE_ICU
|
|
|
|
#include <unicode/ucol.h>
|
|
|
|
#endif
|
2000-04-12 19:17:23 +02:00
|
|
|
|
2017-08-10 02:34:51 +02:00
|
|
|
#ifdef USE_ICU
|
|
|
|
/*
|
|
|
|
* ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53.
|
|
|
|
* (see
|
|
|
|
* <https://www.postgresql.org/message-id/flat/f1438ec6-22aa-4029-9a3b-26f79d330e72%40manitou-mail.org>)
|
|
|
|
*/
|
|
|
|
#if U_ICU_VERSION_MAJOR_NUM >= 53
|
|
|
|
#define HAVE_UCOL_STRCOLLUTF8 1
|
|
|
|
#else
|
|
|
|
#undef HAVE_UCOL_STRCOLLUTF8
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
2022-01-27 08:44:31 +01:00
|
|
|
/* use for libc locale names */
|
|
|
|
#define LOCALE_NAME_BUFLEN 128
|
2004-01-19 20:04:40 +01:00
|
|
|
|
2008-05-19 20:08:16 +02:00
|
|
|
/* GUC settings */
|
2022-04-08 14:16:38 +02:00
|
|
|
extern PGDLLIMPORT char *locale_messages;
|
|
|
|
extern PGDLLIMPORT char *locale_monetary;
|
|
|
|
extern PGDLLIMPORT char *locale_numeric;
|
|
|
|
extern PGDLLIMPORT char *locale_time;
|
2023-03-29 01:15:59 +02:00
|
|
|
extern PGDLLIMPORT int icu_validation_level;
|
2002-05-17 03:19:19 +02:00
|
|
|
|
2008-05-19 20:08:16 +02:00
|
|
|
/* lc_time localization cache */
|
2022-04-08 14:16:38 +02:00
|
|
|
extern PGDLLIMPORT char *localized_abbrev_days[];
|
|
|
|
extern PGDLLIMPORT char *localized_full_days[];
|
|
|
|
extern PGDLLIMPORT char *localized_abbrev_months[];
|
|
|
|
extern PGDLLIMPORT char *localized_full_months[];
|
2008-05-19 20:08:16 +02:00
|
|
|
|
2023-03-17 19:47:35 +01:00
|
|
|
/* is the databases's LC_CTYPE the C locale? */
|
|
|
|
extern PGDLLIMPORT bool database_ctype_is_c;
|
2008-05-19 20:08:16 +02:00
|
|
|
|
Replace empty locale name with implied value in CREATE DATABASE and initdb.
setlocale() accepts locale name "" as meaning "the locale specified by the
process's environment variables". Historically we've accepted that for
Postgres' locale settings, too. However, it's fairly unsafe to store an
empty string in a new database's pg_database.datcollate or datctype fields,
because then the interpretation could vary across postmaster restarts,
possibly resulting in index corruption and other unpleasantness.
Instead, we should expand "" to whatever it means at the moment of calling
CREATE DATABASE, which we can do by saving the value returned by
setlocale().
For consistency, make initdb set up the initial lc_xxx parameter values the
same way. initdb was already doing the right thing for empty locale names,
but it did not replace non-empty names with setlocale results. On a
platform where setlocale chooses to canonicalize the spellings of locale
names, this would result in annoying inconsistency. (It seems that popular
implementations of setlocale don't do such canonicalization, which is a
pity, but the POSIX spec certainly allows it to be done.) The same risk
of inconsistency leads me to not venture back-patching this, although it
could certainly be seen as a longstanding bug.
Per report from Jeff Davis, though this is not his proposed patch.
2012-03-26 03:47:22 +02:00
|
|
|
extern bool check_locale(int category, const char *locale, char **canonname);
|
2005-12-29 00:22:51 +01:00
|
|
|
extern char *pg_perm_setlocale(int category, const char *locale);
|
|
|
|
|
2011-02-08 22:04:18 +01:00
|
|
|
extern bool lc_collate_is_c(Oid collation);
|
|
|
|
extern bool lc_ctype_is_c(Oid collation);
|
2001-10-25 07:50:21 +02:00
|
|
|
|
2001-09-29 23:16:30 +02:00
|
|
|
/*
|
2002-04-03 07:39:33 +02:00
|
|
|
* Return the POSIX lconv struct (contains number/money formatting
|
|
|
|
* information) with locale information for all categories.
|
2000-01-07 18:22:47 +01:00
|
|
|
*/
|
2000-03-18 19:57:16 +01:00
|
|
|
extern struct lconv *PGLC_localeconv(void);
|
2001-10-28 07:26:15 +01:00
|
|
|
|
2008-05-19 20:08:16 +02:00
|
|
|
extern void cache_locale_time(void);
|
|
|
|
|
2011-02-08 22:04:18 +01:00
|
|
|
|
|
|
|
/*
|
2023-07-09 01:55:03 +02:00
|
|
|
* We use a discriminated union to hold either a locale_t or an ICU collator.
|
2017-03-23 20:25:34 +01:00
|
|
|
* pg_locale_t is occasionally checked for truth, so make it a pointer.
|
2011-02-08 22:04:18 +01:00
|
|
|
*/
|
2017-03-27 00:31:05 +02:00
|
|
|
struct pg_locale_struct
|
2017-03-23 20:25:34 +01:00
|
|
|
{
|
|
|
|
char provider;
|
2019-03-22 12:09:32 +01:00
|
|
|
bool deterministic;
|
2017-03-23 20:25:34 +01:00
|
|
|
union
|
|
|
|
{
|
|
|
|
locale_t lt;
|
|
|
|
#ifdef USE_ICU
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
const char *locale;
|
|
|
|
UCollator *ucol;
|
|
|
|
} icu;
|
2011-02-08 22:04:18 +01:00
|
|
|
#endif
|
2017-03-23 20:25:34 +01:00
|
|
|
} info;
|
|
|
|
};
|
|
|
|
|
2017-03-27 00:31:05 +02:00
|
|
|
typedef struct pg_locale_struct *pg_locale_t;
|
2011-02-08 22:04:18 +01:00
|
|
|
|
2022-04-08 14:16:38 +02:00
|
|
|
extern PGDLLIMPORT struct pg_locale_struct default_locale;
|
2022-03-17 11:11:21 +01:00
|
|
|
|
|
|
|
extern void make_icu_collator(const char *iculocstr,
|
2023-03-08 16:35:42 +01:00
|
|
|
const char *icurules,
|
2022-03-17 11:11:21 +01:00
|
|
|
struct pg_locale_struct *resultp);
|
|
|
|
|
2023-02-23 20:17:41 +01:00
|
|
|
extern bool pg_locale_deterministic(pg_locale_t locale);
|
2011-02-08 22:04:18 +01:00
|
|
|
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
|
|
|
|
|
2021-05-07 10:17:42 +02:00
|
|
|
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
|
2023-02-23 19:55:20 +01:00
|
|
|
extern int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale);
|
|
|
|
extern int pg_strncoll(const char *arg1, size_t len1,
|
|
|
|
const char *arg2, size_t len2, pg_locale_t locale);
|
|
|
|
extern bool pg_strxfrm_enabled(pg_locale_t locale);
|
|
|
|
extern size_t pg_strxfrm(char *dest, const char *src, size_t destsize,
|
|
|
|
pg_locale_t locale);
|
|
|
|
extern size_t pg_strnxfrm(char *dest, size_t destsize, const char *src,
|
|
|
|
size_t srclen, pg_locale_t locale);
|
|
|
|
extern bool pg_strxfrm_prefix_enabled(pg_locale_t locale);
|
|
|
|
extern size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
|
|
|
|
pg_locale_t locale);
|
|
|
|
extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
|
|
|
|
size_t srclen, pg_locale_t locale);
|
2017-03-23 20:25:34 +01:00
|
|
|
|
2023-03-29 01:15:59 +02:00
|
|
|
extern void icu_validate_locale(const char *loc_str);
|
Canonicalize ICU locale names to language tags.
Convert to BCP47 language tags before storing in the catalog, except
during binary upgrade or when the locale comes from an existing
collation or template database.
The resulting language tags can vary slightly between ICU
versions. For instance, "@colBackwards=yes" is converted to
"und-u-kb-true" in older versions of ICU, and to the simpler (but
equivalent) "und-u-kb" in newer versions.
The process of canonicalizing to a language tag also understands more
input locale string formats than ucol_open(). For instance,
"fr_CA.UTF-8" is misinterpreted by ucol_open() and the region is
ignored; effectively treating it the same as the locale "fr" and
opening the wrong collator. Canonicalization properly interprets the
language and region, resulting in the language tag "fr-CA", which can
then be understood by ucol_open().
This commit fixes a problem in prior versions due to ucol_open()
misinterpreting locale strings as described above. For instance,
creating an ICU collation with locale "fr_CA.UTF-8" would store that
string directly in the catalog, which would later be passed to (and
misinterpreted by) ucol_open(). After this commit, the locale string
will be canonicalized to language tag "fr-CA" in the catalog, which
will be properly understood by ucol_open(). Because this fix affects
the resulting collator, we cannot change the locale string stored in
the catalog for existing databases or collations; otherwise we'd risk
corrupting indexes. Therefore, only canonicalize locales for
newly-created (not upgraded) collations/databases. For similar
reasons, do not backport.
Discussion: https://postgr.es/m/8c7af6820aed94dc7bc259d2aa7f9663518e6137.camel@j-davis.com
Reviewed-by: Peter Eisentraut
2023-04-04 19:28:08 +02:00
|
|
|
extern char *icu_language_tag(const char *loc_str, int elevel);
|
2023-03-29 01:15:59 +02:00
|
|
|
|
2017-03-23 20:25:34 +01:00
|
|
|
#ifdef USE_ICU
|
|
|
|
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
|
Fix memory leakage in ICU encoding conversion, and other code review.
Callers of icu_to_uchar() neglected to pfree the result string when done
with it. This results in catastrophic memory leaks in varstr_cmp(),
because of our prevailing assumption that btree comparison functions don't
leak memory. For safety, make all the call sites clean up leaks, though
I suspect that we could get away without it in formatting.c. I audited
callers of icu_from_uchar() as well, but found no places that seemed to
have a comparable issue.
Add function API specifications for icu_to_uchar() and icu_from_uchar();
the lack of any thought-through specification is perhaps not unrelated
to the existence of this bug in the first place. Fix icu_to_uchar()
to guarantee a nul-terminated result; although no existing caller appears
to care, the fact that it would have been nul-terminated except in
extreme corner cases seems ideally designed to bite someone on the rear
someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst
case, that could perhaps have led to a non-nul-terminated result, too.
Fix icu_from_uchar() to have a more reasonable definition of the function
result --- no callers are actually paying attention, so this isn't a live
bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s
input string for consistency.
That is not the end of what needs to be done to these functions, but
it's as much as I have the patience for right now.
Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
2017-06-23 18:22:06 +02:00
|
|
|
extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
|
2017-03-23 20:25:34 +01:00
|
|
|
#endif
|
|
|
|
|
2011-04-23 18:35:41 +02:00
|
|
|
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
|
|
|
|
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
|
|
|
|
pg_locale_t locale);
|
|
|
|
extern size_t char2wchar(wchar_t *to, size_t tolen,
|
|
|
|
const char *from, size_t fromlen, pg_locale_t locale);
|
|
|
|
|
2000-01-07 18:22:47 +01:00
|
|
|
#endif /* _PG_LOCALE_ */
|