001 /*
002 * Copyright (c) 2006 Henri Sivonen
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package fi.iki.hsivonen.xml.checker;
024
025 import java.util.Arrays;
026 import java.util.LinkedList;
027
028 import org.xml.sax.Attributes;
029 import org.xml.sax.SAXException;
030
031 import com.ibm.icu.lang.UCharacter;
032 import com.ibm.icu.text.UnicodeSet;
033
034 /**
035 * Checks whether elements that require significant inline content have it.
036 *
037 * @version $Id: SignificantInlineChecker.java,v 1.6 2006/12/01 12:34:31 hsivonen Exp $
038 * @author hsivonen
039 */
040 public final class SignificantInlineChecker extends Checker {
041
042 /**
043 * A thread-safe set of insignificant chacarcters.
044 */
045 @SuppressWarnings("deprecation")
046 private static final UnicodeSet INSIGNIFICANT_CHARACTERS = (UnicodeSet) new UnicodeSet(
047 "[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:]]").freeze();
048
049 /**
050 * A lexicographically sorted array of names of XHTML elements that count as
051 * significant inline content.
052 */
053 private static final String[] SIGNIFICANT_ELEMENTS = { "button", "canvas",
054 "embed", "iframe", "img", "input", "object", "output", "select",
055 "textarea" };
056
057 /**
058 * A lexicographically sorted array of names of XHTML elements that require
059 * significant inline content.
060 */
061 private static final String[] REQUIRE_SIGNIFICANT_CONTENT = { "a",
062 "caption", "h1", "h2", "h3", "h4", "h5", "h6", "p" };
063
064 /**
065 * The stack for keeping track of which elements have already had
066 * significant inline content. <em>Grows from the head of the list!</em>
067 */
068 private LinkedList<StackNode> stack = new LinkedList<StackNode>();
069
070 /**
071 * Indicates whether checking for significant inline content is necessary.
072 */
073 private boolean needToCheck = false;
074
075 /**
076 * A holder for the previous UTF-16 code unit for dealing with
077 * high surrogates.
078 */
079 private char prev = '\u0000';
080
081 /**
082 * Returns <code>true</code> if the argument names an XHTML element that
083 * counts as significant inline content.
084 * @param localName name of an HTML element
085 * @return <code>true</code> if the argument names an XHTML element that
086 * counts as significant inline content
087 */
088 private static boolean isSignificantElement(String localName) {
089 return Arrays.binarySearch(SIGNIFICANT_ELEMENTS, localName) > -1;
090 }
091
092 /**
093 * Returns <code>true</code> if the argument names an XHTML element that
094 * requires significant inline content.
095 * @param localName name of an HTML element
096 * @return <code>true</code> if the argument names an XHTML element that
097 * requires significant inline content
098 */
099 private static boolean requiresSignificantContent(String localName) {
100 return Arrays.binarySearch(REQUIRE_SIGNIFICANT_CONTENT, localName) > -1;
101 }
102
103 /**
104 * Returns <code>true</code> if teh argument is a significant character.
105 * @param c a Unicode code point
106 * @return <code>true</code> if teh argument is a significant character
107 */
108 private static boolean isSignificantCharacter(int c) {
109 return !INSIGNIFICANT_CHARACTERS.contains(c);
110 }
111
112 /**
113 * Constructor.
114 */
115 public SignificantInlineChecker() {
116 super();
117 }
118
119 /**
120 * @see fi.iki.hsivonen.xml.checker.Checker#characters(char[], int, int)
121 */
122 public void characters(char[] ch, int start, int length)
123 throws SAXException {
124 if (!needToCheck) {
125 return;
126 }
127 for (int i = start; i < (start + length); i++) {
128 char c = ch[i];
129 if (!UCharacter.isHighSurrogate(c)) {
130 if (UCharacter.isLowSurrogate(c)) {
131 if (!UCharacter.isHighSurrogate(prev)) {
132 throw new SAXException("Malformed UTF-16!");
133 }
134 if (isSignificantCharacter(UCharacter.getCodePoint(prev, c))) {
135 prev = '\u0000';
136 markSignificant();
137 return;
138 }
139 } else {
140 if (isSignificantCharacter(c)) {
141 prev = '\u0000';
142 markSignificant();
143 return;
144 }
145 }
146 }
147 prev = c;
148 }
149 }
150
151 /**
152 * @see fi.iki.hsivonen.xml.checker.Checker#endElement(java.lang.String,
153 * java.lang.String, java.lang.String)
154 */
155 public void endElement(String uri, String localName, String qName)
156 throws SAXException {
157 if ("http://www.w3.org/1999/xhtml".equals(uri)) {
158 StackNode node = stack.removeFirst();
159 if (!node.hasSignificantInline
160 && requiresSignificantContent(localName)) {
161 err("Element \u201C"
162 + localName
163 + "\u201D from namespace \u201Chttp://www.w3.org/1999/xhtml\u201D requires significant inline content but did not have any.");
164 }
165 }
166 }
167
168 /**
169 * @see fi.iki.hsivonen.xml.checker.Checker#startElement(java.lang.String,
170 * java.lang.String, java.lang.String, org.xml.sax.Attributes)
171 */
172 public void startElement(String uri, String localName, String qName,
173 Attributes atts) throws SAXException {
174 if ("http://www.w3.org/1999/xhtml".equals(uri)) {
175 if (needToCheck && isSignificantElement(localName)) {
176 markSignificant();
177 }
178 stack.addFirst(new StackNode());
179 if (!needToCheck) {
180 needToCheck = requiresSignificantContent(localName);
181 }
182 }
183 }
184
185 /**
186 * Marks the currently open elements as having significant inline content.
187 */
188 private void markSignificant() {
189 needToCheck = false;
190 for (StackNode node : stack) {
191 if (node.hasSignificantInline) {
192 break;
193 } else {
194 node.hasSignificantInline = true;
195 }
196 }
197 }
198
199 /**
200 * @see fi.iki.hsivonen.xml.checker.Checker#reset()
201 */
202 public void reset() {
203 stack.clear();
204 needToCheck = false;
205 prev = '\u0000';
206 }
207
208 /**
209 * Inner class for wrapping a mutable boolean in an object.
210 */
211 class StackNode {
212 boolean hasSignificantInline = false;
213 }
214
215 }