001 /* 002 * Copyright (c) 2006 Henri Sivonen 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package fi.iki.hsivonen.xml.checker; 024 025 import java.util.Arrays; 026 import java.util.LinkedList; 027 028 import org.xml.sax.Attributes; 029 import org.xml.sax.SAXException; 030 031 import com.ibm.icu.lang.UCharacter; 032 import com.ibm.icu.text.UnicodeSet; 033 034 /** 035 * Checks whether elements that require significant inline content have it. 036 * 037 * @version $Id: SignificantInlineChecker.java,v 1.6 2006/12/01 12:34:31 hsivonen Exp $ 038 * @author hsivonen 039 */ 040 public final class SignificantInlineChecker extends Checker { 041 042 /** 043 * A thread-safe set of insignificant chacarcters. 044 */ 045 @SuppressWarnings("deprecation") 046 private static final UnicodeSet INSIGNIFICANT_CHARACTERS = (UnicodeSet) new UnicodeSet( 047 "[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:]]").freeze(); 048 049 /** 050 * A lexicographically sorted array of names of XHTML elements that count as 051 * significant inline content. 052 */ 053 private static final String[] SIGNIFICANT_ELEMENTS = { "button", "canvas", 054 "embed", "iframe", "img", "input", "object", "output", "select", 055 "textarea" }; 056 057 /** 058 * A lexicographically sorted array of names of XHTML elements that require 059 * significant inline content. 060 */ 061 private static final String[] REQUIRE_SIGNIFICANT_CONTENT = { "a", 062 "caption", "h1", "h2", "h3", "h4", "h5", "h6", "p" }; 063 064 /** 065 * The stack for keeping track of which elements have already had 066 * significant inline content. <em>Grows from the head of the list!</em> 067 */ 068 private LinkedList<StackNode> stack = new LinkedList<StackNode>(); 069 070 /** 071 * Indicates whether checking for significant inline content is necessary. 072 */ 073 private boolean needToCheck = false; 074 075 /** 076 * A holder for the previous UTF-16 code unit for dealing with 077 * high surrogates. 078 */ 079 private char prev = '\u0000'; 080 081 /** 082 * Returns <code>true</code> if the argument names an XHTML element that 083 * counts as significant inline content. 084 * @param localName name of an HTML element 085 * @return <code>true</code> if the argument names an XHTML element that 086 * counts as significant inline content 087 */ 088 private static boolean isSignificantElement(String localName) { 089 return Arrays.binarySearch(SIGNIFICANT_ELEMENTS, localName) > -1; 090 } 091 092 /** 093 * Returns <code>true</code> if the argument names an XHTML element that 094 * requires significant inline content. 095 * @param localName name of an HTML element 096 * @return <code>true</code> if the argument names an XHTML element that 097 * requires significant inline content 098 */ 099 private static boolean requiresSignificantContent(String localName) { 100 return Arrays.binarySearch(REQUIRE_SIGNIFICANT_CONTENT, localName) > -1; 101 } 102 103 /** 104 * Returns <code>true</code> if teh argument is a significant character. 105 * @param c a Unicode code point 106 * @return <code>true</code> if teh argument is a significant character 107 */ 108 private static boolean isSignificantCharacter(int c) { 109 return !INSIGNIFICANT_CHARACTERS.contains(c); 110 } 111 112 /** 113 * Constructor. 114 */ 115 public SignificantInlineChecker() { 116 super(); 117 } 118 119 /** 120 * @see fi.iki.hsivonen.xml.checker.Checker#characters(char[], int, int) 121 */ 122 public void characters(char[] ch, int start, int length) 123 throws SAXException { 124 if (!needToCheck) { 125 return; 126 } 127 for (int i = start; i < (start + length); i++) { 128 char c = ch[i]; 129 if (!UCharacter.isHighSurrogate(c)) { 130 if (UCharacter.isLowSurrogate(c)) { 131 if (!UCharacter.isHighSurrogate(prev)) { 132 throw new SAXException("Malformed UTF-16!"); 133 } 134 if (isSignificantCharacter(UCharacter.getCodePoint(prev, c))) { 135 prev = '\u0000'; 136 markSignificant(); 137 return; 138 } 139 } else { 140 if (isSignificantCharacter(c)) { 141 prev = '\u0000'; 142 markSignificant(); 143 return; 144 } 145 } 146 } 147 prev = c; 148 } 149 } 150 151 /** 152 * @see fi.iki.hsivonen.xml.checker.Checker#endElement(java.lang.String, 153 * java.lang.String, java.lang.String) 154 */ 155 public void endElement(String uri, String localName, String qName) 156 throws SAXException { 157 if ("http://www.w3.org/1999/xhtml".equals(uri)) { 158 StackNode node = stack.removeFirst(); 159 if (!node.hasSignificantInline 160 && requiresSignificantContent(localName)) { 161 err("Element \u201C" 162 + localName 163 + "\u201D from namespace \u201Chttp://www.w3.org/1999/xhtml\u201D requires significant inline content but did not have any."); 164 } 165 } 166 } 167 168 /** 169 * @see fi.iki.hsivonen.xml.checker.Checker#startElement(java.lang.String, 170 * java.lang.String, java.lang.String, org.xml.sax.Attributes) 171 */ 172 public void startElement(String uri, String localName, String qName, 173 Attributes atts) throws SAXException { 174 if ("http://www.w3.org/1999/xhtml".equals(uri)) { 175 if (needToCheck && isSignificantElement(localName)) { 176 markSignificant(); 177 } 178 stack.addFirst(new StackNode()); 179 if (!needToCheck) { 180 needToCheck = requiresSignificantContent(localName); 181 } 182 } 183 } 184 185 /** 186 * Marks the currently open elements as having significant inline content. 187 */ 188 private void markSignificant() { 189 needToCheck = false; 190 for (StackNode node : stack) { 191 if (node.hasSignificantInline) { 192 break; 193 } else { 194 node.hasSignificantInline = true; 195 } 196 } 197 } 198 199 /** 200 * @see fi.iki.hsivonen.xml.checker.Checker#reset() 201 */ 202 public void reset() { 203 stack.clear(); 204 needToCheck = false; 205 prev = '\u0000'; 206 } 207 208 /** 209 * Inner class for wrapping a mutable boolean in an object. 210 */ 211 class StackNode { 212 boolean hasSignificantInline = false; 213 } 214 215 }