const-ify puny_decode (and add puny.c)

This commit is contained in:
Omar Polo 2021-01-27 11:21:23 +00:00
parent 42650adec0
commit 7957cbd9aa
3 changed files with 255 additions and 8 deletions

2
gmid.h
View File

@ -247,6 +247,6 @@ int parse_iri(char*, struct iri*, const char**);
int trim_req_iri(char*);
/* puny.c */
int puny_decode(char*, char*, size_t);
int puny_decode(const char*, char*, size_t);
#endif

253
puny.c Normal file
View File

@ -0,0 +1,253 @@
/*
* Copyright (c) 2021 Omar Polo <op@omarpolo.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "gmid.h"
#define BASE 36
#define TMIN 1
#define TMAX 26
#define SKEW 38
#define DAMP 700
#define IBIAS 72
#define IN 128
static int
adapt(int delta, int numpoints, int firsttime)
{
int k;
if (firsttime)
delta = delta / DAMP;
else
delta = delta / 2;
delta += (delta / numpoints);
k = 0;
while (delta > ((BASE - TMIN) * TMAX) / 2) {
delta = delta / (BASE - TMIN);
k += BASE;
}
return k + (((BASE - TMIN + 1) * delta) / (delta + SKEW));
}
static const char *
copy_until_delimiter(const char *s, char *out, size_t len)
{
char *end, *t;
end = strchr(s, '\0');
if (end - s > len)
return NULL;
for (t = end; t >= s; --t)
if (*t == '-')
break;
if (t < s)
t = end;
for (; s < t; ++s, ++out) {
if (*s > 'z')
return NULL;
*out = *s;
}
return s;
}
static unsigned int
digit_value(char c)
{
if ('A' <= c && c <= 'Z')
return c - 'A';
if ('a' <= c && c <= 'z')
return c - 'a';
if ('0' <= c && c <= '9')
return 26 + c - '0';
return c;
}
static int
insert(char *out, size_t len, int codepoint, size_t i)
{
int l;
size_t outlen;
char *t;
if (codepoint <= 0x7F)
return 0;
else if (codepoint <= 0x7FF)
l = 2;
else if (codepoint <= 0xFFFF)
l = 3;
else if (codepoint <= 0x10FFFF)
l = 4;
else
return 0;
if ((t = utf8_nth(out, i)) == NULL)
return 0;
if (t + l >= out + len)
return 0;
memmove(t + l, t, strlen(t));
switch (l) {
case 2:
t[1] = ( codepoint & 0x3F) + 0x80;
t[0] = ((codepoint >> 6) & 0x1F) + 0xC0;
break;
case 3:
t[2] = ( codepoint & 0x3F) + 0x80;
t[1] = ((codepoint >> 6) & 0x3F) + 0x80;
t[0] = ((codepoint >> 12) & 0x0F) + 0xE0;
break;
case 4:
t[3] = ( codepoint & 0x3F) + 0x80;
t[2] = ((codepoint >> 6) & 0x3F) + 0x80;
t[1] = ((codepoint >> 12) & 0x3F) + 0x80;
t[0] = ((codepoint >> 18) & 0x07) + 0xF0;
break;
}
return 1;
}
static int
decode(const char *str, char *out, size_t len)
{
size_t i;
uint32_t n;
unsigned int oldi, bias, w, k, digit, t;
unsigned int numpoints;
const char *s;
if (str == NULL || len <= 4)
return 0;
/* todo: starts_with */
if (strstr(str, "xn--") != str) {
strncpy(out, str, len);
return 1;
}
/* skip the xn-- */
str += 4;
if (strchr(str, '-') != NULL) {
if ((s = copy_until_delimiter(str, out, len)) == NULL)
return 0;
if (*s == '-')
s++;
} else
s = str;
numpoints = strlen(out);
n = IN;
i = 0;
bias = IBIAS;
while (*s != '\0') {
oldi = i;
w = 1;
for (k = BASE; ; k += BASE) {
if (*s == '\0')
return 0;
/* fail eventually? */
digit = digit_value(*s);
s++;
/* fail on overflow */
i += digit * w;
if (k <= bias)
t = TMIN;
else if (k >= bias + TMAX)
t = TMAX;
else
t = k - bias;
if (digit < t)
break;
w *= (BASE - t);
}
bias = adapt(i - oldi, numpoints+1, oldi == 0);
n += i / (numpoints+1); /* fail on overflow */
i = i % (numpoints+1);
if (!insert(out, len, n, i))
return 0;
numpoints++;
++i;
}
return 1;
}
const char *
end_of_component(const char *hostname)
{
for (; *hostname != '\0' && *hostname != '.'; ++hostname)
; /* nop */
return hostname;
}
int
puny_decode(const char *hostname, char *out, size_t len)
{
char comp[DOMAIN_NAME_LEN];
const char *s, *end;
size_t l;
memset(out, 0, len);
s = hostname;
for (;;) {
end = end_of_component(s);
if (end - s >= sizeof(comp))
return 0;
memcpy(comp, s, end - s);
comp[end - s] = '\0';
if (!decode(comp, out, len))
return 0;
if (*end == '\0')
return 1;
if (strlcat(out, ".", len) >= len)
return 0;
l = strlen(out);
if (l >= len)
return 0;
out += l;
len -= l;
s = end+1;
}
}

View File

@ -48,16 +48,12 @@ main(int argc, char **argv)
{
struct suite *i;
int failed;
char *hostname;
char buf[64]; /* name len */
failed = 0;
for (i = t; i->src != NULL; ++i) {
if ((hostname = strdup(i->src)) == NULL)
return 0;
memset(buf, 0, sizeof(buf));
if (!puny_decode(hostname, buf, sizeof(buf))) {
if (!puny_decode(i->src, buf, sizeof(buf))) {
printf("decode: failure with %s\n", i->src);
failed = 1;
continue;
@ -70,8 +66,6 @@ main(int argc, char **argv)
continue;
} else
printf("OK: %s => %s\n", i->src, buf);
free(hostname);
}
return failed;