restic/vendor/golang.org/x/text/encoding/charmap/charmap_test.go
Alexander Neumann 2b39f9f4b2 Update dependencies
Among others, this updates minio-go, so that the new "eu-west-3" zone
for AWS is supported.
2018-01-23 19:40:42 +01:00

259 lines
5.9 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package charmap
import (
"testing"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal"
"golang.org/x/text/encoding/internal/enctest"
"golang.org/x/text/transform"
)
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Decode", e.NewDecoder(), nil
}
func encASCIISuperset(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
}
func encEBCDIC(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Encode", e.NewEncoder(), internal.RepertoireError(0x3f)
}
func TestNonRepertoire(t *testing.T) {
testCases := []struct {
init func(e encoding.Encoding) (string, transform.Transformer, error)
e encoding.Encoding
src, want string
}{
{dec, Windows1252, "\x81", "\ufffd"},
{encEBCDIC, CodePage037, "갂", ""},
{encEBCDIC, CodePage1047, "갂", ""},
{encEBCDIC, CodePage1047, "a¤갂", "\x81\x9F"},
{encEBCDIC, CodePage1140, "갂", ""},
{encEBCDIC, CodePage1140, "a€갂", "\x81\x9F"},
{encASCIISuperset, Windows1252, "갂", ""},
{encASCIISuperset, Windows1252, "a갂", "a"},
{encASCIISuperset, Windows1252, "\u00E9갂", "\xE9"},
}
for _, tc := range testCases {
dir, tr, wantErr := tc.init(tc.e)
dst, _, err := transform.String(tr, tc.src)
if err != wantErr {
t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
}
if got := string(dst); got != tc.want {
t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
}
}
}
func TestBasics(t *testing.T) {
testCases := []struct {
e encoding.Encoding
encoded string
utf8 string
}{{
e: CodePage037,
encoded: "\xc8\x51\xba\x93\xcf",
utf8: "Hé[lõ",
}, {
e: CodePage437,
encoded: "H\x82ll\x93 \x9d\xa7\xf4\x9c\xbe",
utf8: "Héllô ¥º⌠£╛",
}, {
e: CodePage866,
encoded: "H\xf3\xd3o \x98\xfd\x9f\xdd\xa1",
utf8: "Hє╙o Ш¤Я▌б",
}, {
e: CodePage1047,
encoded: "\xc8\x54\x93\x93\x9f",
utf8: "Hèll¤",
}, {
e: CodePage1140,
encoded: "\xc8\x9f\x93\x93\xcf",
utf8: "H€llõ",
}, {
e: ISO8859_2,
encoded: "Hel\xe5\xf5",
utf8: "Helĺő",
}, {
e: ISO8859_3,
encoded: "He\xbd\xd4",
utf8: "He½Ô",
}, {
e: ISO8859_4,
encoded: "Hel\xb6\xf8",
utf8: "Helļø",
}, {
e: ISO8859_5,
encoded: "H\xd7\xc6o",
utf8: "HзЦo",
}, {
e: ISO8859_6,
encoded: "Hel\xc2\xc9",
utf8: "Helآة",
}, {
e: ISO8859_7,
encoded: "H\xeel\xebo",
utf8: "Hξlλo",
}, {
e: ISO8859_8,
encoded: "Hel\xf5\xed",
utf8: "Helץם",
}, {
e: ISO8859_9,
encoded: "\xdeayet",
utf8: "Şayet",
}, {
e: ISO8859_10,
encoded: "H\xea\xbfo",
utf8: "Hęŋo",
}, {
e: ISO8859_13,
encoded: "H\xe6l\xf9o",
utf8: "Hęlło",
}, {
e: ISO8859_14,
encoded: "He\xfe\xd0o",
utf8: "HeŷŴo",
}, {
e: ISO8859_15,
encoded: "H\xa4ll\xd8",
utf8: "H€llØ",
}, {
e: ISO8859_16,
encoded: "H\xe6ll\xbd",
utf8: "Hællœ",
}, {
e: KOI8R,
encoded: "He\x93\xad\x9c",
utf8: "He⌠╜°",
}, {
e: KOI8U,
encoded: "He\x93\xad\x9c",
utf8: "He⌠ґ°",
}, {
e: Macintosh,
encoded: "He\xdf\xd7",
utf8: "Hefl◊",
}, {
e: MacintoshCyrillic,
encoded: "He\xbe\x94",
utf8: "HeЊФ",
}, {
e: Windows874,
encoded: "He\xb7\xf0",
utf8: "Heท",
}, {
e: Windows1250,
encoded: "He\xe5\xe5o",
utf8: "Heĺĺo",
}, {
e: Windows1251,
encoded: "H\xball\xfe",
utf8: "Hєllю",
}, {
e: Windows1252,
encoded: "H\xe9ll\xf4 \xa5\xbA\xae\xa3\xd0",
utf8: "Héllô ¥º®£Ð",
}, {
e: Windows1253,
encoded: "H\xe5ll\xd6",
utf8: "HεllΦ",
}, {
e: Windows1254,
encoded: "\xd0ello",
utf8: "Ğello",
}, {
e: Windows1255,
encoded: "He\xd4o",
utf8: "Heװo",
}, {
e: Windows1256,
encoded: "H\xdbllo",
utf8: "Hغllo",
}, {
e: Windows1257,
encoded: "He\xeflo",
utf8: "Heļlo",
}, {
e: Windows1258,
encoded: "Hell\xf5",
utf8: "Hellơ",
}, {
e: XUserDefined,
encoded: "\x00\x40\x7f\x80\xab\xff",
utf8: "\u0000\u0040\u007f\uf780\uf7ab\uf7ff",
}}
for _, tc := range testCases {
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
}
}
var windows1255TestCases = []struct {
b byte
ok bool
r rune
}{
{'\x00', true, '\u0000'},
{'\x1a', true, '\u001a'},
{'\x61', true, '\u0061'},
{'\x7f', true, '\u007f'},
{'\x80', true, '\u20ac'},
{'\x95', true, '\u2022'},
{'\xa0', true, '\u00a0'},
{'\xc0', true, '\u05b0'},
{'\xfc', true, '\ufffd'},
{'\xfd', true, '\u200e'},
{'\xfe', true, '\u200f'},
{'\xff', true, '\ufffd'},
{encoding.ASCIISub, false, '\u0400'},
{encoding.ASCIISub, false, '\u2603'},
{encoding.ASCIISub, false, '\U0001f4a9'},
}
func TestDecodeByte(t *testing.T) {
for _, tc := range windows1255TestCases {
if !tc.ok {
continue
}
got := Windows1255.DecodeByte(tc.b)
want := tc.r
if got != want {
t.Errorf("DecodeByte(%#02x): got %#08x, want %#08x", tc.b, got, want)
}
}
}
func TestEncodeRune(t *testing.T) {
for _, tc := range windows1255TestCases {
// There can be multiple tc.b values that map to tc.r = '\ufffd'.
if tc.r == '\ufffd' {
continue
}
gotB, gotOK := Windows1255.EncodeRune(tc.r)
wantB, wantOK := tc.b, tc.ok
if gotB != wantB || gotOK != wantOK {
t.Errorf("EncodeRune(%#08x): got (%#02x, %t), want (%#02x, %t)", tc.r, gotB, gotOK, wantB, wantOK)
}
}
}
func TestFiles(t *testing.T) { enctest.TestFile(t, Windows1252) }
func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, Windows1252) }