2021-12-07 06:26:05 +01:00
|
|
|
/* Generated by Snowball 2.2.0 - https://snowballstem.org/ */
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
#include "header.h"
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
extern int finnish_ISO_8859_1_stem(struct SN_env * z);
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static int r_tidy(struct SN_env * z);
|
|
|
|
static int r_other_endings(struct SN_env * z);
|
|
|
|
static int r_t_plural(struct SN_env * z);
|
|
|
|
static int r_i_plural(struct SN_env * z);
|
|
|
|
static int r_case_ending(struct SN_env * z);
|
|
|
|
static int r_VI(struct SN_env * z);
|
|
|
|
static int r_LONG(struct SN_env * z);
|
|
|
|
static int r_possessive(struct SN_env * z);
|
|
|
|
static int r_particle_etc(struct SN_env * z);
|
|
|
|
static int r_R2(struct SN_env * z);
|
|
|
|
static int r_mark_regions(struct SN_env * z);
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
extern struct SN_env * finnish_ISO_8859_1_create_env(void);
|
|
|
|
extern void finnish_ISO_8859_1_close_env(struct SN_env * z);
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
static const symbol s_0_0[2] = { 'p', 'a' };
|
|
|
|
static const symbol s_0_1[3] = { 's', 't', 'i' };
|
|
|
|
static const symbol s_0_2[4] = { 'k', 'a', 'a', 'n' };
|
|
|
|
static const symbol s_0_3[3] = { 'h', 'a', 'n' };
|
|
|
|
static const symbol s_0_4[3] = { 'k', 'i', 'n' };
|
|
|
|
static const symbol s_0_5[3] = { 'h', 0xE4, 'n' };
|
|
|
|
static const symbol s_0_6[4] = { 'k', 0xE4, 0xE4, 'n' };
|
|
|
|
static const symbol s_0_7[2] = { 'k', 'o' };
|
|
|
|
static const symbol s_0_8[2] = { 'p', 0xE4 };
|
|
|
|
static const symbol s_0_9[2] = { 'k', 0xF6 };
|
|
|
|
|
|
|
|
static const struct among a_0[10] =
|
|
|
|
{
|
2021-02-19 07:57:42 +01:00
|
|
|
{ 2, s_0_0, -1, 1, 0},
|
|
|
|
{ 3, s_0_1, -1, 2, 0},
|
|
|
|
{ 4, s_0_2, -1, 1, 0},
|
|
|
|
{ 3, s_0_3, -1, 1, 0},
|
|
|
|
{ 3, s_0_4, -1, 1, 0},
|
|
|
|
{ 3, s_0_5, -1, 1, 0},
|
|
|
|
{ 4, s_0_6, -1, 1, 0},
|
|
|
|
{ 2, s_0_7, -1, 1, 0},
|
|
|
|
{ 2, s_0_8, -1, 1, 0},
|
|
|
|
{ 2, s_0_9, -1, 1, 0}
|
2007-08-21 03:11:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const symbol s_1_0[3] = { 'l', 'l', 'a' };
|
|
|
|
static const symbol s_1_1[2] = { 'n', 'a' };
|
|
|
|
static const symbol s_1_2[3] = { 's', 's', 'a' };
|
|
|
|
static const symbol s_1_3[2] = { 't', 'a' };
|
|
|
|
static const symbol s_1_4[3] = { 'l', 't', 'a' };
|
|
|
|
static const symbol s_1_5[3] = { 's', 't', 'a' };
|
|
|
|
|
|
|
|
static const struct among a_1[6] =
|
|
|
|
{
|
2021-02-19 07:57:42 +01:00
|
|
|
{ 3, s_1_0, -1, -1, 0},
|
|
|
|
{ 2, s_1_1, -1, -1, 0},
|
|
|
|
{ 3, s_1_2, -1, -1, 0},
|
|
|
|
{ 2, s_1_3, -1, -1, 0},
|
|
|
|
{ 3, s_1_4, 3, -1, 0},
|
|
|
|
{ 3, s_1_5, 3, -1, 0}
|
2007-08-21 03:11:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const symbol s_2_0[3] = { 'l', 'l', 0xE4 };
|
|
|
|
static const symbol s_2_1[2] = { 'n', 0xE4 };
|
|
|
|
static const symbol s_2_2[3] = { 's', 's', 0xE4 };
|
|
|
|
static const symbol s_2_3[2] = { 't', 0xE4 };
|
|
|
|
static const symbol s_2_4[3] = { 'l', 't', 0xE4 };
|
|
|
|
static const symbol s_2_5[3] = { 's', 't', 0xE4 };
|
|
|
|
|
|
|
|
static const struct among a_2[6] =
|
|
|
|
{
|
2021-02-19 07:57:42 +01:00
|
|
|
{ 3, s_2_0, -1, -1, 0},
|
|
|
|
{ 2, s_2_1, -1, -1, 0},
|
|
|
|
{ 3, s_2_2, -1, -1, 0},
|
|
|
|
{ 2, s_2_3, -1, -1, 0},
|
|
|
|
{ 3, s_2_4, 3, -1, 0},
|
|
|
|
{ 3, s_2_5, 3, -1, 0}
|
2007-08-21 03:11:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const symbol s_3_0[3] = { 'l', 'l', 'e' };
|
|
|
|
static const symbol s_3_1[3] = { 'i', 'n', 'e' };
|
|
|
|
|
|
|
|
static const struct among a_3[2] =
|
|
|
|
{
|
2021-02-19 07:57:42 +01:00
|
|
|
{ 3, s_3_0, -1, -1, 0},
|
|
|
|
{ 3, s_3_1, -1, -1, 0}
|
2007-08-21 03:11:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const symbol s_4_0[3] = { 'n', 's', 'a' };
|
|
|
|
static const symbol s_4_1[3] = { 'm', 'm', 'e' };
|
|
|
|
static const symbol s_4_2[3] = { 'n', 'n', 'e' };
|
|
|
|
static const symbol s_4_3[2] = { 'n', 'i' };
|
|
|
|
static const symbol s_4_4[2] = { 's', 'i' };
|
|
|
|
static const symbol s_4_5[2] = { 'a', 'n' };
|
|
|
|
static const symbol s_4_6[2] = { 'e', 'n' };
|
|
|
|
static const symbol s_4_7[2] = { 0xE4, 'n' };
|
|
|
|
static const symbol s_4_8[3] = { 'n', 's', 0xE4 };
|
|
|
|
|
|
|
|
static const struct among a_4[9] =
|
|
|
|
{
|
2021-02-19 07:57:42 +01:00
|
|
|
{ 3, s_4_0, -1, 3, 0},
|
|
|
|
{ 3, s_4_1, -1, 3, 0},
|
|
|
|
{ 3, s_4_2, -1, 3, 0},
|
|
|
|
{ 2, s_4_3, -1, 2, 0},
|
|
|
|
{ 2, s_4_4, -1, 1, 0},
|
|
|
|
{ 2, s_4_5, -1, 4, 0},
|
|
|
|
{ 2, s_4_6, -1, 6, 0},
|
|
|
|
{ 2, s_4_7, -1, 5, 0},
|
|
|
|
{ 3, s_4_8, -1, 3, 0}
|
2007-08-21 03:11:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const symbol s_5_0[2] = { 'a', 'a' };
|
|
|
|
static const symbol s_5_1[2] = { 'e', 'e' };
|
|
|
|
static const symbol s_5_2[2] = { 'i', 'i' };
|
|
|
|
static const symbol s_5_3[2] = { 'o', 'o' };
|
|
|
|
static const symbol s_5_4[2] = { 'u', 'u' };
|
|
|
|
static const symbol s_5_5[2] = { 0xE4, 0xE4 };
|
|
|
|
static const symbol s_5_6[2] = { 0xF6, 0xF6 };
|
|
|
|
|
|
|
|
static const struct among a_5[7] =
|
|
|
|
{
|
2021-02-19 07:57:42 +01:00
|
|
|
{ 2, s_5_0, -1, -1, 0},
|
|
|
|
{ 2, s_5_1, -1, -1, 0},
|
|
|
|
{ 2, s_5_2, -1, -1, 0},
|
|
|
|
{ 2, s_5_3, -1, -1, 0},
|
|
|
|
{ 2, s_5_4, -1, -1, 0},
|
|
|
|
{ 2, s_5_5, -1, -1, 0},
|
|
|
|
{ 2, s_5_6, -1, -1, 0}
|
2007-08-21 03:11:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const symbol s_6_0[1] = { 'a' };
|
|
|
|
static const symbol s_6_1[3] = { 'l', 'l', 'a' };
|
|
|
|
static const symbol s_6_2[2] = { 'n', 'a' };
|
|
|
|
static const symbol s_6_3[3] = { 's', 's', 'a' };
|
|
|
|
static const symbol s_6_4[2] = { 't', 'a' };
|
|
|
|
static const symbol s_6_5[3] = { 'l', 't', 'a' };
|
|
|
|
static const symbol s_6_6[3] = { 's', 't', 'a' };
|
|
|
|
static const symbol s_6_7[3] = { 't', 't', 'a' };
|
|
|
|
static const symbol s_6_8[3] = { 'l', 'l', 'e' };
|
|
|
|
static const symbol s_6_9[3] = { 'i', 'n', 'e' };
|
|
|
|
static const symbol s_6_10[3] = { 'k', 's', 'i' };
|
|
|
|
static const symbol s_6_11[1] = { 'n' };
|
|
|
|
static const symbol s_6_12[3] = { 'h', 'a', 'n' };
|
|
|
|
static const symbol s_6_13[3] = { 'd', 'e', 'n' };
|
|
|
|
static const symbol s_6_14[4] = { 's', 'e', 'e', 'n' };
|
|
|
|
static const symbol s_6_15[3] = { 'h', 'e', 'n' };
|
|
|
|
static const symbol s_6_16[4] = { 't', 't', 'e', 'n' };
|
|
|
|
static const symbol s_6_17[3] = { 'h', 'i', 'n' };
|
|
|
|
static const symbol s_6_18[4] = { 's', 'i', 'i', 'n' };
|
|
|
|
static const symbol s_6_19[3] = { 'h', 'o', 'n' };
|
|
|
|
static const symbol s_6_20[3] = { 'h', 0xE4, 'n' };
|
|
|
|
static const symbol s_6_21[3] = { 'h', 0xF6, 'n' };
|
|
|
|
static const symbol s_6_22[1] = { 0xE4 };
|
|
|
|
static const symbol s_6_23[3] = { 'l', 'l', 0xE4 };
|
|
|
|
static const symbol s_6_24[2] = { 'n', 0xE4 };
|
|
|
|
static const symbol s_6_25[3] = { 's', 's', 0xE4 };
|
|
|
|
static const symbol s_6_26[2] = { 't', 0xE4 };
|
|
|
|
static const symbol s_6_27[3] = { 'l', 't', 0xE4 };
|
|
|
|
static const symbol s_6_28[3] = { 's', 't', 0xE4 };
|
|
|
|
static const symbol s_6_29[3] = { 't', 't', 0xE4 };
|
|
|
|
|
|
|
|
static const struct among a_6[30] =
|
|
|
|
{
|
2021-02-19 07:57:42 +01:00
|
|
|
{ 1, s_6_0, -1, 8, 0},
|
|
|
|
{ 3, s_6_1, 0, -1, 0},
|
|
|
|
{ 2, s_6_2, 0, -1, 0},
|
|
|
|
{ 3, s_6_3, 0, -1, 0},
|
|
|
|
{ 2, s_6_4, 0, -1, 0},
|
|
|
|
{ 3, s_6_5, 4, -1, 0},
|
|
|
|
{ 3, s_6_6, 4, -1, 0},
|
|
|
|
{ 3, s_6_7, 4, 2, 0},
|
|
|
|
{ 3, s_6_8, -1, -1, 0},
|
|
|
|
{ 3, s_6_9, -1, -1, 0},
|
|
|
|
{ 3, s_6_10, -1, -1, 0},
|
|
|
|
{ 1, s_6_11, -1, 7, 0},
|
|
|
|
{ 3, s_6_12, 11, 1, 0},
|
|
|
|
{ 3, s_6_13, 11, -1, r_VI},
|
|
|
|
{ 4, s_6_14, 11, -1, r_LONG},
|
|
|
|
{ 3, s_6_15, 11, 2, 0},
|
|
|
|
{ 4, s_6_16, 11, -1, r_VI},
|
|
|
|
{ 3, s_6_17, 11, 3, 0},
|
|
|
|
{ 4, s_6_18, 11, -1, r_VI},
|
|
|
|
{ 3, s_6_19, 11, 4, 0},
|
|
|
|
{ 3, s_6_20, 11, 5, 0},
|
|
|
|
{ 3, s_6_21, 11, 6, 0},
|
|
|
|
{ 1, s_6_22, -1, 8, 0},
|
|
|
|
{ 3, s_6_23, 22, -1, 0},
|
|
|
|
{ 2, s_6_24, 22, -1, 0},
|
|
|
|
{ 3, s_6_25, 22, -1, 0},
|
|
|
|
{ 2, s_6_26, 22, -1, 0},
|
|
|
|
{ 3, s_6_27, 26, -1, 0},
|
|
|
|
{ 3, s_6_28, 26, -1, 0},
|
|
|
|
{ 3, s_6_29, 26, 2, 0}
|
2007-08-21 03:11:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const symbol s_7_0[3] = { 'e', 'j', 'a' };
|
|
|
|
static const symbol s_7_1[3] = { 'm', 'm', 'a' };
|
|
|
|
static const symbol s_7_2[4] = { 'i', 'm', 'm', 'a' };
|
|
|
|
static const symbol s_7_3[3] = { 'm', 'p', 'a' };
|
|
|
|
static const symbol s_7_4[4] = { 'i', 'm', 'p', 'a' };
|
|
|
|
static const symbol s_7_5[3] = { 'm', 'm', 'i' };
|
|
|
|
static const symbol s_7_6[4] = { 'i', 'm', 'm', 'i' };
|
|
|
|
static const symbol s_7_7[3] = { 'm', 'p', 'i' };
|
|
|
|
static const symbol s_7_8[4] = { 'i', 'm', 'p', 'i' };
|
|
|
|
static const symbol s_7_9[3] = { 'e', 'j', 0xE4 };
|
|
|
|
static const symbol s_7_10[3] = { 'm', 'm', 0xE4 };
|
|
|
|
static const symbol s_7_11[4] = { 'i', 'm', 'm', 0xE4 };
|
|
|
|
static const symbol s_7_12[3] = { 'm', 'p', 0xE4 };
|
|
|
|
static const symbol s_7_13[4] = { 'i', 'm', 'p', 0xE4 };
|
|
|
|
|
|
|
|
static const struct among a_7[14] =
|
|
|
|
{
|
2021-02-19 07:57:42 +01:00
|
|
|
{ 3, s_7_0, -1, -1, 0},
|
|
|
|
{ 3, s_7_1, -1, 1, 0},
|
|
|
|
{ 4, s_7_2, 1, -1, 0},
|
|
|
|
{ 3, s_7_3, -1, 1, 0},
|
|
|
|
{ 4, s_7_4, 3, -1, 0},
|
|
|
|
{ 3, s_7_5, -1, 1, 0},
|
|
|
|
{ 4, s_7_6, 5, -1, 0},
|
|
|
|
{ 3, s_7_7, -1, 1, 0},
|
|
|
|
{ 4, s_7_8, 7, -1, 0},
|
|
|
|
{ 3, s_7_9, -1, -1, 0},
|
|
|
|
{ 3, s_7_10, -1, 1, 0},
|
|
|
|
{ 4, s_7_11, 10, -1, 0},
|
|
|
|
{ 3, s_7_12, -1, 1, 0},
|
|
|
|
{ 4, s_7_13, 12, -1, 0}
|
2007-08-21 03:11:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const symbol s_8_0[1] = { 'i' };
|
|
|
|
static const symbol s_8_1[1] = { 'j' };
|
|
|
|
|
|
|
|
static const struct among a_8[2] =
|
|
|
|
{
|
2021-02-19 07:57:42 +01:00
|
|
|
{ 1, s_8_0, -1, -1, 0},
|
|
|
|
{ 1, s_8_1, -1, -1, 0}
|
2007-08-21 03:11:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const symbol s_9_0[3] = { 'm', 'm', 'a' };
|
|
|
|
static const symbol s_9_1[4] = { 'i', 'm', 'm', 'a' };
|
|
|
|
|
|
|
|
static const struct among a_9[2] =
|
|
|
|
{
|
2021-02-19 07:57:42 +01:00
|
|
|
{ 3, s_9_0, -1, 1, 0},
|
|
|
|
{ 4, s_9_1, 0, -1, 0}
|
2007-08-21 03:11:32 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static const unsigned char g_AEI[] = { 17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 };
|
|
|
|
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
static const unsigned char g_C[] = { 119, 223, 119, 1 };
|
|
|
|
|
2007-08-21 03:11:32 +02:00
|
|
|
static const unsigned char g_V1[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
|
|
|
|
|
|
|
|
static const unsigned char g_V2[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
|
|
|
|
|
|
|
|
static const unsigned char g_particle_end[] = { 17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
|
|
|
|
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
static const symbol s_0[] = { 'k', 's', 'e' };
|
|
|
|
static const symbol s_1[] = { 'k', 's', 'i' };
|
|
|
|
static const symbol s_2[] = { 'i', 'e' };
|
|
|
|
static const symbol s_3[] = { 'p', 'o' };
|
|
|
|
static const symbol s_4[] = { 'p', 'o' };
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
static int r_mark_regions(struct SN_env * z) {
|
|
|
|
z->I[1] = z->l;
|
|
|
|
z->I[0] = z->l;
|
|
|
|
if (out_grouping(z, g_V1, 97, 246, 1) < 0) return 0;
|
|
|
|
{
|
2007-08-21 03:11:32 +02:00
|
|
|
int ret = in_grouping(z, g_V1, 97, 246, 1);
|
|
|
|
if (ret < 0) return 0;
|
|
|
|
z->c += ret;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
z->I[1] = z->c;
|
|
|
|
if (out_grouping(z, g_V1, 97, 246, 1) < 0) return 0;
|
|
|
|
{
|
2007-08-21 03:11:32 +02:00
|
|
|
int ret = in_grouping(z, g_V1, 97, 246, 1);
|
|
|
|
if (ret < 0) return 0;
|
|
|
|
z->c += ret;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
z->I[0] = z->c;
|
2007-08-21 03:11:32 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
static int r_R2(struct SN_env * z) {
|
|
|
|
if (!(z->I[0] <= z->c)) return 0;
|
2007-08-21 03:11:32 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
static int r_particle_etc(struct SN_env * z) {
|
2007-08-21 03:11:32 +02:00
|
|
|
int among_var;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int mlimit1;
|
|
|
|
if (z->c < z->I[1]) return 0;
|
|
|
|
mlimit1 = z->lb; z->lb = z->I[1];
|
|
|
|
z->ket = z->c;
|
|
|
|
among_var = find_among_b(z, a_0, 10);
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (!(among_var)) { z->lb = mlimit1; return 0; }
|
2021-02-19 07:57:42 +01:00
|
|
|
z->bra = z->c;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->lb = mlimit1;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
switch (among_var) {
|
2007-08-21 03:11:32 +02:00
|
|
|
case 1:
|
2021-02-19 07:57:42 +01:00
|
|
|
if (in_grouping_b(z, g_particle_end, 97, 246, 0)) return 0;
|
2007-08-21 03:11:32 +02:00
|
|
|
break;
|
|
|
|
case 2:
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = r_R2(z);
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (ret <= 0) return ret;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
static int r_possessive(struct SN_env * z) {
|
2007-08-21 03:11:32 +02:00
|
|
|
int among_var;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int mlimit1;
|
|
|
|
if (z->c < z->I[1]) return 0;
|
|
|
|
mlimit1 = z->lb; z->lb = z->I[1];
|
|
|
|
z->ket = z->c;
|
|
|
|
among_var = find_among_b(z, a_4, 9);
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (!(among_var)) { z->lb = mlimit1; return 0; }
|
2021-02-19 07:57:42 +01:00
|
|
|
z->bra = z->c;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->lb = mlimit1;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
switch (among_var) {
|
2007-08-21 03:11:32 +02:00
|
|
|
case 1:
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m2 = z->l - z->c; (void)m2;
|
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 'k') goto lab0;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2007-08-21 03:11:32 +02:00
|
|
|
return 0;
|
|
|
|
lab0:
|
|
|
|
z->c = z->l - m2;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 2:
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
z->ket = z->c;
|
|
|
|
if (!(eq_s_b(z, 3, s_0))) return 0;
|
|
|
|
z->bra = z->c;
|
|
|
|
{ int ret = slice_from_s(z, 3, s_1);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 3:
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 4:
|
2021-02-19 07:57:42 +01:00
|
|
|
if (z->c - 1 <= z->lb || z->p[z->c - 1] != 97) return 0;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (!(find_among_b(z, a_1, 6))) return 0;
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 5:
|
2021-02-19 07:57:42 +01:00
|
|
|
if (z->c - 1 <= z->lb || z->p[z->c - 1] != 228) return 0;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (!(find_among_b(z, a_2, 6))) return 0;
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 6:
|
2021-02-19 07:57:42 +01:00
|
|
|
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 101) return 0;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (!(find_among_b(z, a_3, 2))) return 0;
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
static int r_LONG(struct SN_env * z) {
|
|
|
|
if (!(find_among_b(z, a_5, 7))) return 0;
|
2007-08-21 03:11:32 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
static int r_VI(struct SN_env * z) {
|
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 'i') return 0;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2021-02-19 07:57:42 +01:00
|
|
|
if (in_grouping_b(z, g_V2, 97, 246, 0)) return 0;
|
2007-08-21 03:11:32 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
static int r_case_ending(struct SN_env * z) {
|
2007-08-21 03:11:32 +02:00
|
|
|
int among_var;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int mlimit1;
|
|
|
|
if (z->c < z->I[1]) return 0;
|
|
|
|
mlimit1 = z->lb; z->lb = z->I[1];
|
|
|
|
z->ket = z->c;
|
|
|
|
among_var = find_among_b(z, a_6, 30);
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (!(among_var)) { z->lb = mlimit1; return 0; }
|
2021-02-19 07:57:42 +01:00
|
|
|
z->bra = z->c;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->lb = mlimit1;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
switch (among_var) {
|
2007-08-21 03:11:32 +02:00
|
|
|
case 1:
|
2021-02-19 07:57:42 +01:00
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 'a') return 0;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2007-08-21 03:11:32 +02:00
|
|
|
break;
|
|
|
|
case 2:
|
2021-02-19 07:57:42 +01:00
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 'e') return 0;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2007-08-21 03:11:32 +02:00
|
|
|
break;
|
|
|
|
case 3:
|
2021-02-19 07:57:42 +01:00
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 'i') return 0;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2007-08-21 03:11:32 +02:00
|
|
|
break;
|
|
|
|
case 4:
|
2021-02-19 07:57:42 +01:00
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 'o') return 0;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2007-08-21 03:11:32 +02:00
|
|
|
break;
|
|
|
|
case 5:
|
2021-02-19 07:57:42 +01:00
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 0xE4) return 0;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2007-08-21 03:11:32 +02:00
|
|
|
break;
|
|
|
|
case 6:
|
2021-02-19 07:57:42 +01:00
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 0xF6) return 0;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2007-08-21 03:11:32 +02:00
|
|
|
break;
|
|
|
|
case 7:
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m2 = z->l - z->c; (void)m2;
|
|
|
|
{ int m3 = z->l - z->c; (void)m3;
|
|
|
|
{ int m4 = z->l - z->c; (void)m4;
|
|
|
|
{ int ret = r_LONG(z);
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (ret == 0) goto lab2;
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
goto lab1;
|
|
|
|
lab2:
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c = z->l - m4;
|
2021-02-19 07:57:42 +01:00
|
|
|
if (!(eq_s_b(z, 2, s_2))) { z->c = z->l - m2; goto lab0; }
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
lab1:
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c = z->l - m3;
|
|
|
|
if (z->c <= z->lb) { z->c = z->l - m2; goto lab0; }
|
2021-02-19 07:57:42 +01:00
|
|
|
z->c--;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
z->bra = z->c;
|
2007-08-21 03:11:32 +02:00
|
|
|
lab0:
|
|
|
|
;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 8:
|
2021-02-19 07:57:42 +01:00
|
|
|
if (in_grouping_b(z, g_V1, 97, 246, 0)) return 0;
|
|
|
|
if (in_grouping_b(z, g_C, 98, 122, 0)) return 0;
|
2007-08-21 03:11:32 +02:00
|
|
|
break;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
z->I[2] = 1;
|
2007-08-21 03:11:32 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
static int r_other_endings(struct SN_env * z) {
|
2007-08-21 03:11:32 +02:00
|
|
|
int among_var;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int mlimit1;
|
|
|
|
if (z->c < z->I[0]) return 0;
|
|
|
|
mlimit1 = z->lb; z->lb = z->I[0];
|
|
|
|
z->ket = z->c;
|
|
|
|
among_var = find_among_b(z, a_7, 14);
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (!(among_var)) { z->lb = mlimit1; return 0; }
|
2021-02-19 07:57:42 +01:00
|
|
|
z->bra = z->c;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->lb = mlimit1;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
switch (among_var) {
|
2007-08-21 03:11:32 +02:00
|
|
|
case 1:
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m2 = z->l - z->c; (void)m2;
|
|
|
|
if (!(eq_s_b(z, 2, s_3))) goto lab0;
|
2007-08-21 03:11:32 +02:00
|
|
|
return 0;
|
|
|
|
lab0:
|
|
|
|
z->c = z->l - m2;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
static int r_i_plural(struct SN_env * z) {
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int mlimit1;
|
|
|
|
if (z->c < z->I[1]) return 0;
|
|
|
|
mlimit1 = z->lb; z->lb = z->I[1];
|
|
|
|
z->ket = z->c;
|
|
|
|
if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 106)) { z->lb = mlimit1; return 0; }
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (!(find_among_b(z, a_8, 2))) { z->lb = mlimit1; return 0; }
|
2021-02-19 07:57:42 +01:00
|
|
|
z->bra = z->c;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->lb = mlimit1;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
static int r_t_plural(struct SN_env * z) {
|
2007-08-21 03:11:32 +02:00
|
|
|
int among_var;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int mlimit1;
|
|
|
|
if (z->c < z->I[1]) return 0;
|
|
|
|
mlimit1 = z->lb; z->lb = z->I[1];
|
|
|
|
z->ket = z->c;
|
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 't') { z->lb = mlimit1; return 0; }
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2021-02-19 07:57:42 +01:00
|
|
|
z->bra = z->c;
|
|
|
|
{ int m_test2 = z->l - z->c;
|
|
|
|
if (in_grouping_b(z, g_V1, 97, 246, 0)) { z->lb = mlimit1; return 0; }
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c = z->l - m_test2;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->lb = mlimit1;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int mlimit3;
|
|
|
|
if (z->c < z->I[0]) return 0;
|
|
|
|
mlimit3 = z->lb; z->lb = z->I[0];
|
|
|
|
z->ket = z->c;
|
|
|
|
if (z->c - 2 <= z->lb || z->p[z->c - 1] != 97) { z->lb = mlimit3; return 0; }
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
among_var = find_among_b(z, a_9, 2);
|
|
|
|
if (!(among_var)) { z->lb = mlimit3; return 0; }
|
2021-02-19 07:57:42 +01:00
|
|
|
z->bra = z->c;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->lb = mlimit3;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
switch (among_var) {
|
2007-08-21 03:11:32 +02:00
|
|
|
case 1:
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m4 = z->l - z->c; (void)m4;
|
|
|
|
if (!(eq_s_b(z, 2, s_4))) goto lab0;
|
2007-08-21 03:11:32 +02:00
|
|
|
return 0;
|
|
|
|
lab0:
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c = z->l - m4;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
static int r_tidy(struct SN_env * z) {
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int mlimit1;
|
|
|
|
if (z->c < z->I[1]) return 0;
|
|
|
|
mlimit1 = z->lb; z->lb = z->I[1];
|
|
|
|
{ int m2 = z->l - z->c; (void)m2;
|
|
|
|
{ int m3 = z->l - z->c; (void)m3;
|
|
|
|
{ int ret = r_LONG(z);
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (ret == 0) goto lab0;
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
z->c = z->l - m3;
|
2021-02-19 07:57:42 +01:00
|
|
|
z->ket = z->c;
|
2007-08-21 03:11:32 +02:00
|
|
|
if (z->c <= z->lb) goto lab0;
|
2021-02-19 07:57:42 +01:00
|
|
|
z->c--;
|
|
|
|
z->bra = z->c;
|
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
lab0:
|
|
|
|
z->c = z->l - m2;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m4 = z->l - z->c; (void)m4;
|
|
|
|
z->ket = z->c;
|
|
|
|
if (in_grouping_b(z, g_AEI, 97, 228, 0)) goto lab1;
|
|
|
|
z->bra = z->c;
|
|
|
|
if (in_grouping_b(z, g_C, 98, 122, 0)) goto lab1;
|
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
lab1:
|
|
|
|
z->c = z->l - m4;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m5 = z->l - z->c; (void)m5;
|
|
|
|
z->ket = z->c;
|
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 'j') goto lab2;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2021-02-19 07:57:42 +01:00
|
|
|
z->bra = z->c;
|
|
|
|
{ int m6 = z->l - z->c; (void)m6;
|
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 'o') goto lab4;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2007-08-21 03:11:32 +02:00
|
|
|
goto lab3;
|
|
|
|
lab4:
|
|
|
|
z->c = z->l - m6;
|
2021-02-19 07:57:42 +01:00
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 'u') goto lab2;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
lab3:
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
lab2:
|
|
|
|
z->c = z->l - m5;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m7 = z->l - z->c; (void)m7;
|
|
|
|
z->ket = z->c;
|
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 'o') goto lab5;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2021-02-19 07:57:42 +01:00
|
|
|
z->bra = z->c;
|
|
|
|
if (z->c <= z->lb || z->p[z->c - 1] != 'j') goto lab5;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c--;
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
lab5:
|
|
|
|
z->c = z->l - m7;
|
|
|
|
}
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->lb = mlimit1;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
if (in_grouping_b(z, g_V1, 97, 246, 1) < 0) return 0;
|
|
|
|
z->ket = z->c;
|
|
|
|
if (in_grouping_b(z, g_C, 98, 122, 0)) return 0;
|
|
|
|
z->bra = z->c;
|
|
|
|
z->S[0] = slice_to(z, z->S[0]);
|
|
|
|
if (z->S[0] == 0) return -1;
|
|
|
|
if (!(eq_v_b(z, z->S[0]))) return 0;
|
|
|
|
{ int ret = slice_del(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
extern int finnish_ISO_8859_1_stem(struct SN_env * z) {
|
|
|
|
{ int c1 = z->c;
|
|
|
|
{ int ret = r_mark_regions(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
z->c = c1;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
z->I[2] = 0;
|
|
|
|
z->lb = z->c; z->c = z->l;
|
2007-08-21 03:11:32 +02:00
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m2 = z->l - z->c; (void)m2;
|
|
|
|
{ int ret = r_particle_etc(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
z->c = z->l - m2;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m3 = z->l - z->c; (void)m3;
|
|
|
|
{ int ret = r_possessive(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
z->c = z->l - m3;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m4 = z->l - z->c; (void)m4;
|
|
|
|
{ int ret = r_case_ending(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
z->c = z->l - m4;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m5 = z->l - z->c; (void)m5;
|
|
|
|
{ int ret = r_other_endings(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
|
|
|
z->c = z->l - m5;
|
|
|
|
}
|
2021-02-19 07:57:42 +01:00
|
|
|
|
|
|
|
if (!(z->I[2])) goto lab1;
|
|
|
|
{ int m6 = z->l - z->c; (void)m6;
|
|
|
|
{ int ret = r_i_plural(z);
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (ret < 0) return ret;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
z->c = z->l - m6;
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
}
|
2020-06-08 07:58:51 +02:00
|
|
|
goto lab0;
|
|
|
|
lab1:
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m7 = z->l - z->c; (void)m7;
|
|
|
|
{ int ret = r_t_plural(z);
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
if (ret < 0) return ret;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c = z->l - m7;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
2020-06-08 07:58:51 +02:00
|
|
|
lab0:
|
2021-02-19 07:57:42 +01:00
|
|
|
{ int m8 = z->l - z->c; (void)m8;
|
|
|
|
{ int ret = r_tidy(z);
|
2007-08-21 03:11:32 +02:00
|
|
|
if (ret < 0) return ret;
|
|
|
|
}
|
Sync our Snowball stemmer dictionaries with current upstream.
We haven't touched these since text search functionality landed in core
in 2007 :-(. While the upstream project isn't a beehive of activity,
they do make additions and bug fixes from time to time. Update our
copies of these files.
Also update our documentation about how to keep things in sync, since
they're not making distribution tarballs these days. Fortunately,
their source code turns out to be a breeze to build.
Notable changes:
* The non-UTF8 version of the hungarian stemmer now works in LATIN2
not LATIN1.
* New stemmers have appeared for arabic, indonesian, irish, lithuanian,
nepali, and tamil. These all work in UTF8, and the indonesian and
irish ones also work in LATIN1.
(There are some new stemmers that I did not incorporate, mainly because
their names don't match the underlying languages, suggesting that they're
not to be considered mainstream.)
Worth noting: the upstream Nepali dictionary was contributed by
Arthur Zakirov.
initdb forced because the contents of snowball_create.sql have
changed.
Still TODO: see about updating the stopword lists.
Arthur Zakirov, minor mods and doc work by me
Discussion: https://postgr.es/m/20180626122025.GA12647@zakirov.localdomain
Discussion: https://postgr.es/m/20180219140849.GA9050@zakirov.localdomain
2018-09-24 23:29:08 +02:00
|
|
|
z->c = z->l - m8;
|
2007-08-21 03:11:32 +02:00
|
|
|
}
|
|
|
|
z->c = z->lb;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2021-02-19 07:57:42 +01:00
|
|
|
extern struct SN_env * finnish_ISO_8859_1_create_env(void) { return SN_create_env(1, 3); }
|
2007-08-21 03:11:32 +02:00
|
|
|
|
|
|
|
extern void finnish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); }
|
|
|
|
|