mirror of
https://github.com/lloeki/fix_latin.git
synced 2025-12-06 10:04:40 +01:00
iso-8859-15 support
This commit is contained in:
parent
1afd423e30
commit
44c907a4bd
1 changed files with 33 additions and 4 deletions
33
fix_latin.go
33
fix_latin.go
|
|
@ -11,13 +11,14 @@ type Encoding int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
ISO_8859_1 Encoding = iota
|
ISO_8859_1 Encoding = iota
|
||||||
// TODO: ISO_8859_15
|
ISO_8859_15
|
||||||
CP1252
|
CP1252
|
||||||
)
|
)
|
||||||
|
|
||||||
type Fixer struct {
|
type Fixer struct {
|
||||||
allowControl bool
|
allowControl bool
|
||||||
handleCP1252 bool
|
handleCP1252 bool
|
||||||
|
handleISO_8859_15 bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func AllowControl(f *Fixer) error {
|
func AllowControl(f *Fixer) error {
|
||||||
|
|
@ -27,11 +28,28 @@ func AllowControl(f *Fixer) error {
|
||||||
|
|
||||||
func Assume(e Encoding) func(*Fixer) error {
|
func Assume(e Encoding) func(*Fixer) error {
|
||||||
return func(f *Fixer) error {
|
return func(f *Fixer) error {
|
||||||
f.handleCP1252 = e == CP1252
|
switch e {
|
||||||
|
case CP1252:
|
||||||
|
f.handleCP1252 = true
|
||||||
|
case ISO_8859_15:
|
||||||
|
f.handleISO_8859_15 = true
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// remainder is ISO-8859-1
|
||||||
|
var iso_8859_15 = map[byte][]byte{
|
||||||
|
0xA4: {0xE2, 0x82, 0xAC}, // EURO SIGN
|
||||||
|
0xA6: {0xC5, 0xA0}, // LATIN CAPITAL LETTER S WITH CARON
|
||||||
|
0xA8: {0xC5, 0xA1}, // LATIN SMALL LETTER S WITH CARON
|
||||||
|
0xB4: {0xC5, 0xBD}, // LATIN CAPITAL LETTER Z WITH CARON
|
||||||
|
0xB8: {0xC5, 0xBE}, // LATIN SMALL LETTER Z WITH CARON
|
||||||
|
0xBC: {0xC5, 0x92}, // LATIN CAPITAL LIGATURE OE
|
||||||
|
0xBD: {0xC5, 0x93}, // LATIN SMALL LIGATURE OE
|
||||||
|
0xBE: {0xC5, 0xB8}, // LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||||
|
}
|
||||||
|
|
||||||
// remainder is ISO-8859-1
|
// remainder is ISO-8859-1
|
||||||
// does not define 0x81, 0x8D, 0x8F, 0x90, 09D
|
// does not define 0x81, 0x8D, 0x8F, 0x90, 09D
|
||||||
var cp1252 = map[byte][]byte{
|
var cp1252 = map[byte][]byte{
|
||||||
|
|
@ -147,6 +165,17 @@ func Fix(r io.Reader, w io.Writer, options ...func(*Fixer) error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ISO-8859-15
|
||||||
|
if f.handleISO_8859_15 {
|
||||||
|
if bytes, ok := iso_8859_15[input[0]]; ok {
|
||||||
|
for _, b := range bytes {
|
||||||
|
output = append(output, b)
|
||||||
|
}
|
||||||
|
input = input[1:]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ISO-8859-1 high-order control chars
|
// ISO-8859-1 high-order control chars
|
||||||
if !f.allowControl && input[0] >= 0x80 && input[0] <= 0x9F {
|
if !f.allowControl && input[0] >= 0x80 && input[0] <= 0x9F {
|
||||||
panic("control char")
|
panic("control char")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue