Avoid character classification in regex escape parsing.

For regex escape sequences, just test directly for the relevant ASCII characters rather than using locale-sensitive character classification. This fixes an assertion failure when a locale considers a non-ASCII character, such as "൧", to be a digit. Reported-by: Richard Guo Discussion: https://postgr.es/m/CAMbWs49Q6UoKGeT8pBkMtJGJd+16CBFZaaWUk9Du+2ERE5g_YA@mail.gmail.com Backpatch-through: 11
2023-04-21 08:19:41 -07:00 · 2023-04-21 08:19:41 -07:00 · dde926b0f6
parent 6d60b718ce
commit dde926b0f6
1 changed files with 10 additions and 3 deletions
--- a/src/backend/regex/regc_lex.c
+++ b/src/backend/regex/regc_lex.c
@ -616,7 +616,11 @@ lexescape(struct vars *v)

 	assert(!ATEOS());
 	c = *v->now++;
-	if (!iscalnum(c))
+
+	/* if it's not alphanumeric ASCII, treat it as a plain character */
+	if (!('a' <= c && c <= 'z') &&
+		!('A' <= c && c <= 'Z') &&
+		!('0' <= c && c <= '9'))
 		RETV(PLAIN, c);

 	NOTE(REG_UNONPOSIX);
@ -758,8 +762,11 @@ lexescape(struct vars *v)
 			RETV(PLAIN, c);
 			break;
 		default:
-			assert(iscalpha(c));
-			FAILW(REG_EESCAPE); /* unknown alphabetic escape */
+			/*
+			 * Throw an error for unrecognized ASCII alpha escape sequences,
+			 * which reserves them for future use if needed.
+			 */
+			FAILW(REG_EESCAPE);
 			break;
 	}
 	assert(NOTREACHED);