001    /*
002     * Copyright (c) 2006 Henri Sivonen
003     *
004     * Permission is hereby granted, free of charge, to any person obtaining a 
005     * copy of this software and associated documentation files (the "Software"), 
006     * to deal in the Software without restriction, including without limitation 
007     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
008     * and/or sell copies of the Software, and to permit persons to whom the 
009     * Software is furnished to do so, subject to the following conditions:
010     *
011     * The above copyright notice and this permission notice shall be included in 
012     * all copies or substantial portions of the Software.
013     *
014     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
015     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
016     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
017     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
018     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
019     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
020     * DEALINGS IN THE SOFTWARE.
021     */
022    
023    package fi.iki.hsivonen.io;
024    
025    import java.io.IOException;
026    import java.io.OutputStream;
027    import java.io.Writer;
028    import java.util.Arrays;
029    
030    public class NcrEscapingWindows1252OutputStreamWriter extends Writer {
031    
032        private static final int SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
033    
034        private static int[] CODE_POINTS = {
035            0x0152,
036            0x0153,
037            0x0160,
038            0x0161,
039            0x0178,
040            0x017D,
041            0x017E,
042            0x0192,
043            0x02C6,
044            0x02DC,
045            0x2013,
046            0x2014,
047            0x2018,
048            0x2019,
049            0x201A,
050            0x201C,
051            0x201D,
052            0x201E,
053            0x2020,
054            0x2021,
055            0x2022,
056            0x2026,
057            0x2030,
058            0x2039,
059            0x203A,
060            0x20AC,
061            0x2122
062        };
063        
064        private static int[] BYTES = {
065            0x8C,
066            0x9C,
067            0x8A,
068            0x9A,
069            0x9F,
070            0x8E,
071            0x9E,
072            0x83,
073            0x88,
074            0x98,
075            0x96,
076            0x97,
077            0x91,
078            0x92,
079            0x82,
080            0x93,
081            0x94,
082            0x84,
083            0x86,
084            0x87,
085            0x95,
086            0x85,
087            0x89,
088            0x8B,
089            0x9B,
090            0x80,
091            0x99  
092        };
093        
094        private OutputStream out;
095        
096        private int prev;
097        
098        public NcrEscapingWindows1252OutputStreamWriter(OutputStream out) {
099            super();
100            this.out = out;
101            this.prev = 0;
102        }
103    
104        public void write(char[] buf, int offset, int count) throws IOException {
105            int end = offset + count;
106            for (int i = offset; i < end; i++) {
107                this.write(buf[i]);
108            }
109        }
110    
111        public void flush() throws IOException {
112            out.flush();
113        }
114    
115        public void close() throws IOException {
116            out.close();
117        }
118    
119        /**
120         * @see java.io.Writer#write(int)
121         */
122        public void write(int c) throws IOException {
123            int i = -1;
124            c &= 0xFFFF; // per API contract
125            if (c < 0x80) {
126                out.write(c);
127                prev = 0;
128                return;
129            } else if (c < 0xA0) {
130                prev = 0;
131                // silent loss
132                return;
133            } else if (c < 0x100) {
134                out.write(c);
135                prev = 0;
136                return;            
137            } else if ((i = Arrays.binarySearch(CODE_POINTS, c)) >= 0) {
138                out.write(BYTES[i]);
139                prev = 0;
140                return;                        
141            } if ((c & 0xFC00) == 0xDC00) {
142                // Got a low surrogate. See if prev was high surrogate
143                if (prev != 0) {
144                    int intVal = (prev << 10) + c + SURROGATE_OFFSET;
145                    prev = 0;
146                    this.writeNcr(intVal);
147                    return;
148                } else {
149                    prev = 0;
150                    // silent loss
151                    return;
152                }
153            } else if ((c & 0xFC00) == 0xD800) {
154                // silent loss if prev already was surrogate
155                prev = c;
156                return;
157            } else {
158                this.writeNcr(c);
159                prev = 0;
160                return;
161            }
162        }
163    
164        private void writeNcr(int c) throws IOException {
165            out.write('&');
166            out.write('#');
167            
168            String str = Integer.toString(c);
169            for (int i = 0; i < str.length(); i++) {
170                out.write(str.charAt(i));
171            }
172            
173            out.write(';');
174        }
175    
176    }