001 /* 002 * Copyright (c) 2006 Henri Sivonen 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package fi.iki.hsivonen.io; 024 025 import java.io.IOException; 026 import java.io.OutputStream; 027 import java.io.Writer; 028 import java.util.Arrays; 029 030 public class NcrEscapingWindows1252OutputStreamWriter extends Writer { 031 032 private static final int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; 033 034 private static int[] CODE_POINTS = { 035 0x0152, 036 0x0153, 037 0x0160, 038 0x0161, 039 0x0178, 040 0x017D, 041 0x017E, 042 0x0192, 043 0x02C6, 044 0x02DC, 045 0x2013, 046 0x2014, 047 0x2018, 048 0x2019, 049 0x201A, 050 0x201C, 051 0x201D, 052 0x201E, 053 0x2020, 054 0x2021, 055 0x2022, 056 0x2026, 057 0x2030, 058 0x2039, 059 0x203A, 060 0x20AC, 061 0x2122 062 }; 063 064 private static int[] BYTES = { 065 0x8C, 066 0x9C, 067 0x8A, 068 0x9A, 069 0x9F, 070 0x8E, 071 0x9E, 072 0x83, 073 0x88, 074 0x98, 075 0x96, 076 0x97, 077 0x91, 078 0x92, 079 0x82, 080 0x93, 081 0x94, 082 0x84, 083 0x86, 084 0x87, 085 0x95, 086 0x85, 087 0x89, 088 0x8B, 089 0x9B, 090 0x80, 091 0x99 092 }; 093 094 private OutputStream out; 095 096 private int prev; 097 098 public NcrEscapingWindows1252OutputStreamWriter(OutputStream out) { 099 super(); 100 this.out = out; 101 this.prev = 0; 102 } 103 104 public void write(char[] buf, int offset, int count) throws IOException { 105 int end = offset + count; 106 for (int i = offset; i < end; i++) { 107 this.write(buf[i]); 108 } 109 } 110 111 public void flush() throws IOException { 112 out.flush(); 113 } 114 115 public void close() throws IOException { 116 out.close(); 117 } 118 119 /** 120 * @see java.io.Writer#write(int) 121 */ 122 public void write(int c) throws IOException { 123 int i = -1; 124 c &= 0xFFFF; // per API contract 125 if (c < 0x80) { 126 out.write(c); 127 prev = 0; 128 return; 129 } else if (c < 0xA0) { 130 prev = 0; 131 // silent loss 132 return; 133 } else if (c < 0x100) { 134 out.write(c); 135 prev = 0; 136 return; 137 } else if ((i = Arrays.binarySearch(CODE_POINTS, c)) >= 0) { 138 out.write(BYTES[i]); 139 prev = 0; 140 return; 141 } if ((c & 0xFC00) == 0xDC00) { 142 // Got a low surrogate. See if prev was high surrogate 143 if (prev != 0) { 144 int intVal = (prev << 10) + c + SURROGATE_OFFSET; 145 prev = 0; 146 this.writeNcr(intVal); 147 return; 148 } else { 149 prev = 0; 150 // silent loss 151 return; 152 } 153 } else if ((c & 0xFC00) == 0xD800) { 154 // silent loss if prev already was surrogate 155 prev = c; 156 return; 157 } else { 158 this.writeNcr(c); 159 prev = 0; 160 return; 161 } 162 } 163 164 private void writeNcr(int c) throws IOException { 165 out.write('&'); 166 out.write('#'); 167 168 String str = Integer.toString(c); 169 for (int i = 0; i < str.length(); i++) { 170 out.write(str.charAt(i)); 171 } 172 173 out.write(';'); 174 } 175 176 }