mirror of
https://github.com/restic/restic.git
synced 2024-09-16 07:50:58 +02:00
210 lines
4.9 KiB
Go
210 lines
4.9 KiB
Go
|
// Copyright 2012 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
// +build icu
|
||
|
|
||
|
package main
|
||
|
|
||
|
/*
|
||
|
#cgo LDFLAGS: -licui18n -licuuc
|
||
|
#include <stdlib.h>
|
||
|
#include <unicode/ucol.h>
|
||
|
#include <unicode/uiter.h>
|
||
|
#include <unicode/utypes.h>
|
||
|
*/
|
||
|
import "C"
|
||
|
import (
|
||
|
"fmt"
|
||
|
"log"
|
||
|
"unicode/utf16"
|
||
|
"unicode/utf8"
|
||
|
"unsafe"
|
||
|
)
|
||
|
|
||
|
func init() {
|
||
|
AddFactory(CollatorFactory{"icu", newUTF16,
|
||
|
"Main ICU collator, using native strings."})
|
||
|
AddFactory(CollatorFactory{"icu8", newUTF8iter,
|
||
|
"ICU collator using ICU iterators to process UTF8."})
|
||
|
AddFactory(CollatorFactory{"icu16", newUTF8conv,
|
||
|
"ICU collation by first converting UTF8 to UTF16."})
|
||
|
}
|
||
|
|
||
|
func icuCharP(s []byte) *C.char {
|
||
|
return (*C.char)(unsafe.Pointer(&s[0]))
|
||
|
}
|
||
|
|
||
|
func icuUInt8P(s []byte) *C.uint8_t {
|
||
|
return (*C.uint8_t)(unsafe.Pointer(&s[0]))
|
||
|
}
|
||
|
|
||
|
func icuUCharP(s []uint16) *C.UChar {
|
||
|
return (*C.UChar)(unsafe.Pointer(&s[0]))
|
||
|
}
|
||
|
func icuULen(s []uint16) C.int32_t {
|
||
|
return C.int32_t(len(s))
|
||
|
}
|
||
|
func icuSLen(s []byte) C.int32_t {
|
||
|
return C.int32_t(len(s))
|
||
|
}
|
||
|
|
||
|
// icuCollator implements a Collator based on ICU.
|
||
|
type icuCollator struct {
|
||
|
loc *C.char
|
||
|
col *C.UCollator
|
||
|
keyBuf []byte
|
||
|
}
|
||
|
|
||
|
const growBufSize = 10 * 1024 * 1024
|
||
|
|
||
|
func (c *icuCollator) init(locale string) error {
|
||
|
err := C.UErrorCode(0)
|
||
|
c.loc = C.CString(locale)
|
||
|
c.col = C.ucol_open(c.loc, &err)
|
||
|
if err > 0 {
|
||
|
return fmt.Errorf("failed opening collator for %q", locale)
|
||
|
} else if err < 0 {
|
||
|
loc := C.ucol_getLocaleByType(c.col, 0, &err)
|
||
|
fmt, ok := map[int]string{
|
||
|
-127: "warning: using default collator: %s",
|
||
|
-128: "warning: using fallback collator: %s",
|
||
|
}[int(err)]
|
||
|
if ok {
|
||
|
log.Printf(fmt, C.GoString(loc))
|
||
|
}
|
||
|
}
|
||
|
c.keyBuf = make([]byte, 0, growBufSize)
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (c *icuCollator) buf() (*C.uint8_t, C.int32_t) {
|
||
|
if len(c.keyBuf) == cap(c.keyBuf) {
|
||
|
c.keyBuf = make([]byte, 0, growBufSize)
|
||
|
}
|
||
|
b := c.keyBuf[len(c.keyBuf):cap(c.keyBuf)]
|
||
|
return icuUInt8P(b), icuSLen(b)
|
||
|
}
|
||
|
|
||
|
func (c *icuCollator) extendBuf(n C.int32_t) []byte {
|
||
|
end := len(c.keyBuf) + int(n)
|
||
|
if end > cap(c.keyBuf) {
|
||
|
if len(c.keyBuf) == 0 {
|
||
|
log.Fatalf("icuCollator: max string size exceeded: %v > %v", n, growBufSize)
|
||
|
}
|
||
|
c.keyBuf = make([]byte, 0, growBufSize)
|
||
|
return nil
|
||
|
}
|
||
|
b := c.keyBuf[len(c.keyBuf):end]
|
||
|
c.keyBuf = c.keyBuf[:end]
|
||
|
return b
|
||
|
}
|
||
|
|
||
|
func (c *icuCollator) Close() error {
|
||
|
C.ucol_close(c.col)
|
||
|
C.free(unsafe.Pointer(c.loc))
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// icuUTF16 implements the Collator interface.
|
||
|
type icuUTF16 struct {
|
||
|
icuCollator
|
||
|
}
|
||
|
|
||
|
func newUTF16(locale string) (Collator, error) {
|
||
|
c := &icuUTF16{}
|
||
|
return c, c.init(locale)
|
||
|
}
|
||
|
|
||
|
func (c *icuUTF16) Compare(a, b Input) int {
|
||
|
return int(C.ucol_strcoll(c.col, icuUCharP(a.UTF16), icuULen(a.UTF16), icuUCharP(b.UTF16), icuULen(b.UTF16)))
|
||
|
}
|
||
|
|
||
|
func (c *icuUTF16) Key(s Input) []byte {
|
||
|
bp, bn := c.buf()
|
||
|
n := C.ucol_getSortKey(c.col, icuUCharP(s.UTF16), icuULen(s.UTF16), bp, bn)
|
||
|
if b := c.extendBuf(n); b != nil {
|
||
|
return b
|
||
|
}
|
||
|
return c.Key(s)
|
||
|
}
|
||
|
|
||
|
// icuUTF8iter implements the Collator interface
|
||
|
// This implementation wraps the UTF8 string in an iterator
|
||
|
// which is passed to the collator.
|
||
|
type icuUTF8iter struct {
|
||
|
icuCollator
|
||
|
a, b C.UCharIterator
|
||
|
}
|
||
|
|
||
|
func newUTF8iter(locale string) (Collator, error) {
|
||
|
c := &icuUTF8iter{}
|
||
|
return c, c.init(locale)
|
||
|
}
|
||
|
|
||
|
func (c *icuUTF8iter) Compare(a, b Input) int {
|
||
|
err := C.UErrorCode(0)
|
||
|
C.uiter_setUTF8(&c.a, icuCharP(a.UTF8), icuSLen(a.UTF8))
|
||
|
C.uiter_setUTF8(&c.b, icuCharP(b.UTF8), icuSLen(b.UTF8))
|
||
|
return int(C.ucol_strcollIter(c.col, &c.a, &c.b, &err))
|
||
|
}
|
||
|
|
||
|
func (c *icuUTF8iter) Key(s Input) []byte {
|
||
|
err := C.UErrorCode(0)
|
||
|
state := [2]C.uint32_t{}
|
||
|
C.uiter_setUTF8(&c.a, icuCharP(s.UTF8), icuSLen(s.UTF8))
|
||
|
bp, bn := c.buf()
|
||
|
n := C.ucol_nextSortKeyPart(c.col, &c.a, &(state[0]), bp, bn, &err)
|
||
|
if n >= bn {
|
||
|
// Force failure.
|
||
|
if c.extendBuf(n+1) != nil {
|
||
|
log.Fatal("expected extension to fail")
|
||
|
}
|
||
|
return c.Key(s)
|
||
|
}
|
||
|
return c.extendBuf(n)
|
||
|
}
|
||
|
|
||
|
// icuUTF8conv implements the Collator interface.
|
||
|
// This implementation first converts the give UTF8 string
|
||
|
// to UTF16 and then calls the main ICU collation function.
|
||
|
type icuUTF8conv struct {
|
||
|
icuCollator
|
||
|
}
|
||
|
|
||
|
func newUTF8conv(locale string) (Collator, error) {
|
||
|
c := &icuUTF8conv{}
|
||
|
return c, c.init(locale)
|
||
|
}
|
||
|
|
||
|
func (c *icuUTF8conv) Compare(sa, sb Input) int {
|
||
|
a := encodeUTF16(sa.UTF8)
|
||
|
b := encodeUTF16(sb.UTF8)
|
||
|
return int(C.ucol_strcoll(c.col, icuUCharP(a), icuULen(a), icuUCharP(b), icuULen(b)))
|
||
|
}
|
||
|
|
||
|
func (c *icuUTF8conv) Key(s Input) []byte {
|
||
|
a := encodeUTF16(s.UTF8)
|
||
|
bp, bn := c.buf()
|
||
|
n := C.ucol_getSortKey(c.col, icuUCharP(a), icuULen(a), bp, bn)
|
||
|
if b := c.extendBuf(n); b != nil {
|
||
|
return b
|
||
|
}
|
||
|
return c.Key(s)
|
||
|
}
|
||
|
|
||
|
func encodeUTF16(b []byte) []uint16 {
|
||
|
a := []uint16{}
|
||
|
for len(b) > 0 {
|
||
|
r, sz := utf8.DecodeRune(b)
|
||
|
b = b[sz:]
|
||
|
r1, r2 := utf16.EncodeRune(r)
|
||
|
if r1 != 0xFFFD {
|
||
|
a = append(a, uint16(r1), uint16(r2))
|
||
|
} else {
|
||
|
a = append(a, uint16(r))
|
||
|
}
|
||
|
}
|
||
|
return a
|
||
|
}
|