001 /* 002 * Copyright (c) 2005 Marko Karppinen & Co. LLC 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package fi.karppinen.xml; 024 025 import gnu.xml.pipeline.EventConsumer; 026 import gnu.xml.pipeline.EventFilter; 027 028 import org.xml.sax.SAXException; 029 030 /** 031 * This filter replaces astral characters with U+FFFD REPLACEMENT CHARACTER. 032 * This filter is useful when preparing data for a recipient that does not 033 * handle astral characters gracefully. 034 * 035 * @version $Id: AstralContentFilter.java,v 1.1 2006/10/30 19:57:09 hsivonen Exp $ 036 * @author hsivonen 037 */ 038 public class AstralContentFilter extends EventFilter { 039 040 private static final char[] REPLACEMENT_CHARACTER = {'\uFFFD'}; 041 042 /** 043 * Constructs a new <code>AstralContentFilter</code>. 044 */ 045 public AstralContentFilter() { 046 super(); 047 setContentHandler(this); 048 } 049 050 /** 051 * Constructs a new <code>AstralContentFilter</code> chaining it to an 052 * <code>EventConsumer</code>. 053 * @param consumer the next <code>EventConsumer</code> in the chain 054 */ 055 public AstralContentFilter(EventConsumer consumer) { 056 super(consumer); 057 setContentHandler(this); 058 } 059 060 /** 061 * @see org.xml.sax.ContentHandler#characters(char[], int, int) 062 */ 063 public void characters(char[] ch, int start, int length) 064 throws SAXException { 065 int s = start; 066 int i = start; 067 int end = start + length; 068 while(i < end) { 069 char c = ch[i]; 070 if('\uD800' <= c && c <= '\uDFFF') { 071 // Found a surrogate. Flush. 072 if(s < i) { 073 super.characters(ch, s, i - s); 074 } 075 // Emit replacement char once per pair 076 if('\uD800' <= c && c <= '\uDBFF') { 077 super.characters(REPLACEMENT_CHARACTER, 0, 1); 078 } 079 s = i + 1; 080 } 081 i++; 082 } 083 // Flush. 084 if(s < i) { 085 super.characters(ch, s, i - s); 086 } 087 } 088 }