001 /*
002 * Copyright (c) 2005 Marko Karppinen & Co. LLC
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package fi.karppinen.xml;
024
025 import gnu.xml.pipeline.EventConsumer;
026 import gnu.xml.pipeline.EventFilter;
027
028 import org.xml.sax.SAXException;
029
030 /**
031 * This filter replaces astral characters with U+FFFD REPLACEMENT CHARACTER.
032 * This filter is useful when preparing data for a recipient that does not
033 * handle astral characters gracefully.
034 *
035 * @version $Id: AstralContentFilter.java,v 1.1 2006/10/30 19:57:09 hsivonen Exp $
036 * @author hsivonen
037 */
038 public class AstralContentFilter extends EventFilter {
039
040 private static final char[] REPLACEMENT_CHARACTER = {'\uFFFD'};
041
042 /**
043 * Constructs a new <code>AstralContentFilter</code>.
044 */
045 public AstralContentFilter() {
046 super();
047 setContentHandler(this);
048 }
049
050 /**
051 * Constructs a new <code>AstralContentFilter</code> chaining it to an
052 * <code>EventConsumer</code>.
053 * @param consumer the next <code>EventConsumer</code> in the chain
054 */
055 public AstralContentFilter(EventConsumer consumer) {
056 super(consumer);
057 setContentHandler(this);
058 }
059
060 /**
061 * @see org.xml.sax.ContentHandler#characters(char[], int, int)
062 */
063 public void characters(char[] ch, int start, int length)
064 throws SAXException {
065 int s = start;
066 int i = start;
067 int end = start + length;
068 while(i < end) {
069 char c = ch[i];
070 if('\uD800' <= c && c <= '\uDFFF') {
071 // Found a surrogate. Flush.
072 if(s < i) {
073 super.characters(ch, s, i - s);
074 }
075 // Emit replacement char once per pair
076 if('\uD800' <= c && c <= '\uDBFF') {
077 super.characters(REPLACEMENT_CHARACTER, 0, 1);
078 }
079 s = i + 1;
080 }
081 i++;
082 }
083 // Flush.
084 if(s < i) {
085 super.characters(ch, s, i - s);
086 }
087 }
088 }