001 /*
002 * Copyright (c) 2006 Henri Sivonen
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package fi.iki.hsivonen.io;
024
025 import java.io.IOException;
026 import java.io.OutputStream;
027 import java.io.Writer;
028 import java.util.Arrays;
029
030 public class NcrEscapingWindows1252OutputStreamWriter extends Writer {
031
032 private static final int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
033
034 private static int[] CODE_POINTS = {
035 0x0152,
036 0x0153,
037 0x0160,
038 0x0161,
039 0x0178,
040 0x017D,
041 0x017E,
042 0x0192,
043 0x02C6,
044 0x02DC,
045 0x2013,
046 0x2014,
047 0x2018,
048 0x2019,
049 0x201A,
050 0x201C,
051 0x201D,
052 0x201E,
053 0x2020,
054 0x2021,
055 0x2022,
056 0x2026,
057 0x2030,
058 0x2039,
059 0x203A,
060 0x20AC,
061 0x2122
062 };
063
064 private static int[] BYTES = {
065 0x8C,
066 0x9C,
067 0x8A,
068 0x9A,
069 0x9F,
070 0x8E,
071 0x9E,
072 0x83,
073 0x88,
074 0x98,
075 0x96,
076 0x97,
077 0x91,
078 0x92,
079 0x82,
080 0x93,
081 0x94,
082 0x84,
083 0x86,
084 0x87,
085 0x95,
086 0x85,
087 0x89,
088 0x8B,
089 0x9B,
090 0x80,
091 0x99
092 };
093
094 private OutputStream out;
095
096 private int prev;
097
098 public NcrEscapingWindows1252OutputStreamWriter(OutputStream out) {
099 super();
100 this.out = out;
101 this.prev = 0;
102 }
103
104 public void write(char[] buf, int offset, int count) throws IOException {
105 int end = offset + count;
106 for (int i = offset; i < end; i++) {
107 this.write(buf[i]);
108 }
109 }
110
111 public void flush() throws IOException {
112 out.flush();
113 }
114
115 public void close() throws IOException {
116 out.close();
117 }
118
119 /**
120 * @see java.io.Writer#write(int)
121 */
122 public void write(int c) throws IOException {
123 int i = -1;
124 c &= 0xFFFF; // per API contract
125 if (c < 0x80) {
126 out.write(c);
127 prev = 0;
128 return;
129 } else if (c < 0xA0) {
130 prev = 0;
131 // silent loss
132 return;
133 } else if (c < 0x100) {
134 out.write(c);
135 prev = 0;
136 return;
137 } else if ((i = Arrays.binarySearch(CODE_POINTS, c)) >= 0) {
138 out.write(BYTES[i]);
139 prev = 0;
140 return;
141 } if ((c & 0xFC00) == 0xDC00) {
142 // Got a low surrogate. See if prev was high surrogate
143 if (prev != 0) {
144 int intVal = (prev << 10) + c + SURROGATE_OFFSET;
145 prev = 0;
146 this.writeNcr(intVal);
147 return;
148 } else {
149 prev = 0;
150 // silent loss
151 return;
152 }
153 } else if ((c & 0xFC00) == 0xD800) {
154 // silent loss if prev already was surrogate
155 prev = c;
156 return;
157 } else {
158 this.writeNcr(c);
159 prev = 0;
160 return;
161 }
162 }
163
164 private void writeNcr(int c) throws IOException {
165 out.write('&');
166 out.write('#');
167
168 String str = Integer.toString(c);
169 for (int i = 0; i < str.length(); i++) {
170 out.write(str.charAt(i));
171 }
172
173 out.write(';');
174 }
175
176 }