From 7c49eceebaa5202067b3484abaa1e6278377fe2a Mon Sep 17 00:00:00 2001 From: Loic Nageleisen Date: Tue, 30 Jun 2015 11:46:57 +0200 Subject: [PATCH] factor out CP1252 map --- fix_latin.go | 62 +++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/fix_latin.go b/fix_latin.go index 3187604..70a9b37 100644 --- a/fix_latin.go +++ b/fix_latin.go @@ -32,6 +32,38 @@ func Assume(e Encoding) func(*Fixer) error { } } +// remainder is ISO-8859-1 +// does not define 0x81, 0x8D, 0x8F, 0x90, 09D +var cp1252 = map[byte][]byte{ + 0x80: {0xE2, 0x82, 0xAC}, // EURO SIGN + 0x82: {0xE2, 0x80, 0x9A}, // SINGLE LOW-9 QUOTATION MARK + 0x83: {0xC6, 0x92}, // LATIN SMALL LETTER F WITH HOOK + 0x84: {0xE2, 0x80, 0x9E}, // DOUBLE LOW-9 QUOTATION MARK + 0x85: {0xE2, 0x80, 0xA6}, // HORIZONTAL ELLIPSIS + 0x86: {0xE2, 0x80, 0xA0}, // DAGGER + 0x87: {0xE2, 0x80, 0xA1}, // DOUBLE DAGGER + 0x88: {0xCB, 0x86}, // MODIFIER LETTER CIRCUMFLEX ACCENT + 0x89: {0xE2, 0x80, 0xB0}, // PER MILLE SIGN + 0x8A: {0xC5, 0xA0}, // LATIN CAPITAL LETTER S WITH CARON + 0x8B: {0xE2, 0x80, 0xB9}, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 0x8C: {0xC5, 0x92}, // LATIN CAPITAL LIGATURE OE + 0x8E: {0xC5, 0xBD}, // LATIN CAPITAL LETTER Z WITH CARON + 0x91: {0xE2, 0x80, 0x98}, // LEFT SINGLE QUOTATION MARK + 0x92: {0xE2, 0x80, 0x99}, // RIGHT SINGLE QUOTATION MARK + 0x93: {0xE2, 0x80, 0x9C}, // LEFT DOUBLE QUOTATION MARK + 0x94: {0xE2, 0x80, 0x9D}, // RIGHT DOUBLE QUOTATION MARK + 0x95: {0xE2, 0x80, 0xA2}, // BULLET + 0x96: {0xE2, 0x80, 0x93}, // EN DASH + 0x97: {0xE2, 0x80, 0x94}, // EM DASH + 0x98: {0xCB, 0x9C}, // SMALL TILDE + 0x99: {0xE2, 0x84, 0xA2}, // TRADE MARK SIGN + 0x9A: {0xC5, 0xA1}, // LATIN SMALL LETTER S WITH CARON + 0x9B: {0xE2, 0x80, 0xBA}, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x9C: {0xC5, 0x93}, // LATIN SMALL LIGATURE OE + 0x9E: {0xC5, 0xBE}, // LATIN SMALL LETTER Z WITH CARON + 0x9F: {0xC5, 0xB8}, // LATIN CAPITAL LETTER Y WITH DIAERESIS +} + func Fix(r io.Reader, w io.Writer, options ...func(*Fixer) error) { f := &Fixer{} @@ -108,36 +140,6 @@ func Fix(r io.Reader, w io.Writer, options ...func(*Fixer) error) { // CP1252 if handle_cp1252 { - // does not define 0x81, 0x8D, 0x8F, 0x90, 09D - cp1252 := map[byte][]byte{ - 0x80: {0xE2, 0x82, 0xAC}, // EURO SIGN - 0x82: {0xE2, 0x80, 0x9A}, // SINGLE LOW-9 QUOTATION MARK - 0x83: {0xC6, 0x92}, // LATIN SMALL LETTER F WITH HOOK - 0x84: {0xE2, 0x80, 0x9E}, // DOUBLE LOW-9 QUOTATION MARK - 0x85: {0xE2, 0x80, 0xA6}, // HORIZONTAL ELLIPSIS - 0x86: {0xE2, 0x80, 0xA0}, // DAGGER - 0x87: {0xE2, 0x80, 0xA1}, // DOUBLE DAGGER - 0x88: {0xCB, 0x86}, // MODIFIER LETTER CIRCUMFLEX ACCENT - 0x89: {0xE2, 0x80, 0xB0}, // PER MILLE SIGN - 0x8A: {0xC5, 0xA0}, // LATIN CAPITAL LETTER S WITH CARON - 0x8B: {0xE2, 0x80, 0xB9}, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x8C: {0xC5, 0x92}, // LATIN CAPITAL LIGATURE OE - 0x8E: {0xC5, 0xBD}, // LATIN CAPITAL LETTER Z WITH CARON - 0x91: {0xE2, 0x80, 0x98}, // LEFT SINGLE QUOTATION MARK - 0x92: {0xE2, 0x80, 0x99}, // RIGHT SINGLE QUOTATION MARK - 0x93: {0xE2, 0x80, 0x9C}, // LEFT DOUBLE QUOTATION MARK - 0x94: {0xE2, 0x80, 0x9D}, // RIGHT DOUBLE QUOTATION MARK - 0x95: {0xE2, 0x80, 0xA2}, // BULLET - 0x96: {0xE2, 0x80, 0x93}, // EN DASH - 0x97: {0xE2, 0x80, 0x94}, // EM DASH - 0x98: {0xCB, 0x9C}, // SMALL TILDE - 0x99: {0xE2, 0x84, 0xA2}, // TRADE MARK SIGN - 0x9A: {0xC5, 0xA1}, // LATIN SMALL LETTER S WITH CARON - 0x9B: {0xE2, 0x80, 0xBA}, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x9C: {0xC5, 0x93}, // LATIN SMALL LIGATURE OE - 0x9E: {0xC5, 0xBE}, // LATIN SMALL LETTER Z WITH CARON - 0x9F: {0xC5, 0xB8}, // LATIN CAPITAL LETTER Y WITH DIAERESIS - } if bytes, ok := cp1252[input[0]]; ok { for _, b := range bytes { output = append(output, b)