postgresql/src/common/unicode/category_test.c

109 lines
3.0 KiB
C

/*-------------------------------------------------------------------------
* category_test.c
* Program to test Unicode general category functions.
*
* Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/common/unicode/category_test.c
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef USE_ICU
#include <unicode/uchar.h>
#endif
#include "common/unicode_category.h"
#include "common/unicode_version.h"
/*
* Parse version into integer for easy comparison.
*/
#ifdef USE_ICU
static int
parse_unicode_version(const char *version)
{
int n PG_USED_FOR_ASSERTS_ONLY;
int major;
int minor;
n = sscanf(version, "%d.%d", &major, &minor);
Assert(n == 2);
Assert(minor < 100);
return major * 100 + minor;
}
#endif
/*
* Exhaustively test that the Unicode category for each codepoint matches that
* returned by ICU.
*/
int
main(int argc, char **argv)
{
#ifdef USE_ICU
int pg_unicode_version = parse_unicode_version(PG_UNICODE_VERSION);
int icu_unicode_version = parse_unicode_version(U_UNICODE_VERSION);
int pg_skipped_codepoints = 0;
int icu_skipped_codepoints = 0;
printf("category_test: Postgres Unicode version:\t%s\n", PG_UNICODE_VERSION);
printf("category_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION);
for (UChar32 code = 0; code <= 0x10ffff; code++)
{
uint8_t pg_category = unicode_category(code);
uint8_t icu_category = u_charType(code);
if (pg_category != icu_category)
{
/*
* A version mismatch means that some assigned codepoints in the
* newer version may be unassigned in the older version. That's
* OK, though the test will not cover those codepoints marked
* unassigned in the older version (that is, it will no longer be
* an exhaustive test).
*/
if (pg_category == PG_U_UNASSIGNED &&
pg_unicode_version < icu_unicode_version)
pg_skipped_codepoints++;
else if (icu_category == PG_U_UNASSIGNED &&
icu_unicode_version < pg_unicode_version)
icu_skipped_codepoints++;
else
{
printf("category_test: FAILURE for codepoint 0x%06x\n", code);
printf("category_test: Postgres category: %02d %s %s\n", pg_category,
unicode_category_abbrev(pg_category),
unicode_category_string(pg_category));
printf("category_test: ICU category: %02d %s %s\n", icu_category,
unicode_category_abbrev(icu_category),
unicode_category_string(icu_category));
printf("\n");
exit(1);
}
}
}
if (pg_skipped_codepoints > 0)
printf("category_test: skipped %d codepoints unassigned in Postgres due to Unicode version mismatch\n",
pg_skipped_codepoints);
if (icu_skipped_codepoints > 0)
printf("category_test: skipped %d codepoints unassigned in ICU due to Unicode version mismatch\n",
icu_skipped_codepoints);
printf("category_test: success\n");
exit(0);
#else
printf("category_test: ICU support required for test; skipping\n");
exit(0);
#endif
}