001    /*
002     * Copyright (c) 2006 Henri Sivonen
003     *
004     * Permission is hereby granted, free of charge, to any person obtaining a 
005     * copy of this software and associated documentation files (the "Software"), 
006     * to deal in the Software without restriction, including without limitation 
007     * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
008     * and/or sell copies of the Software, and to permit persons to whom the 
009     * Software is furnished to do so, subject to the following conditions:
010     *
011     * The above copyright notice and this permission notice shall be included in 
012     * all copies or substantial portions of the Software.
013     *
014     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
015     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
016     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
017     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
018     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
019     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
020     * DEALINGS IN THE SOFTWARE.
021     */
022    
023    package fi.iki.hsivonen.xml.checker;
024    
025    import java.util.Arrays;
026    import java.util.LinkedList;
027    
028    import org.xml.sax.Attributes;
029    import org.xml.sax.SAXException;
030    
031    import com.ibm.icu.lang.UCharacter;
032    import com.ibm.icu.text.UnicodeSet;
033    
034    /**
035     * Checks whether elements that require significant inline content have it.
036     * 
037     * @version $Id: SignificantInlineChecker.java,v 1.6 2006/12/01 12:34:31 hsivonen Exp $
038     * @author hsivonen
039     */
040    public final class SignificantInlineChecker extends Checker {
041    
042        /**
043         * A thread-safe set of insignificant chacarcters.
044         */
045        @SuppressWarnings("deprecation")
046        private static final UnicodeSet INSIGNIFICANT_CHARACTERS = (UnicodeSet) new UnicodeSet(
047                "[[:Zs:][:Zl:][:Zp:][:Cc:][:Cf:]]").freeze();
048    
049        /**
050         * A lexicographically sorted array of names of XHTML elements that count as 
051         * significant inline content.
052         */
053        private static final String[] SIGNIFICANT_ELEMENTS = { "button", "canvas",
054                "embed", "iframe", "img", "input", "object", "output", "select",
055                "textarea" };
056    
057        /**
058         * A lexicographically sorted array of names of XHTML elements that require 
059         * significant inline content.
060         */
061        private static final String[] REQUIRE_SIGNIFICANT_CONTENT = { "a",
062                "caption", "h1", "h2", "h3", "h4", "h5", "h6", "p" };
063    
064        /**
065         * The stack for keeping track of which elements have already had 
066         * significant inline content. <em>Grows from the head of the list!</em>
067         */
068        private LinkedList<StackNode> stack = new LinkedList<StackNode>();
069    
070        /**
071         * Indicates whether checking for significant inline content is necessary.
072         */
073        private boolean needToCheck = false;
074    
075        /**
076         * A holder for the previous UTF-16 code unit for dealing with 
077         * high surrogates.
078         */
079        private char prev = '\u0000';
080    
081        /**
082         * Returns <code>true</code> if the argument names an XHTML element that 
083         * counts as significant inline content.
084         * @param localName name of an HTML element
085         * @return <code>true</code> if the argument names an XHTML element that 
086         * counts as significant inline content
087         */
088        private static boolean isSignificantElement(String localName) {
089            return Arrays.binarySearch(SIGNIFICANT_ELEMENTS, localName) > -1;
090        }
091    
092        /**
093         * Returns <code>true</code> if the argument names an XHTML element that 
094         * requires significant inline content.
095         * @param localName name of an HTML element
096         * @return <code>true</code> if the argument names an XHTML element that 
097         * requires significant inline content
098         */
099        private static boolean requiresSignificantContent(String localName) {
100            return Arrays.binarySearch(REQUIRE_SIGNIFICANT_CONTENT, localName) > -1;
101        }
102    
103        /**
104         * Returns <code>true</code> if teh argument is a significant character.
105         * @param c a Unicode code point
106         * @return <code>true</code> if teh argument is a significant character
107         */
108        private static boolean isSignificantCharacter(int c) {
109            return !INSIGNIFICANT_CHARACTERS.contains(c);
110        }
111    
112        /**
113         * Constructor.
114         */
115        public SignificantInlineChecker() {
116            super();
117        }
118    
119        /**
120         * @see fi.iki.hsivonen.xml.checker.Checker#characters(char[], int, int)
121         */
122        public void characters(char[] ch, int start, int length)
123                throws SAXException {
124            if (!needToCheck) {
125                return;
126            }
127            for (int i = start; i < (start + length); i++) {
128                char c = ch[i];
129                if (!UCharacter.isHighSurrogate(c)) {
130                    if (UCharacter.isLowSurrogate(c)) {
131                        if (!UCharacter.isHighSurrogate(prev)) {
132                            throw new SAXException("Malformed UTF-16!");
133                        }
134                        if (isSignificantCharacter(UCharacter.getCodePoint(prev, c))) {
135                            prev = '\u0000';
136                            markSignificant();
137                            return;
138                        }
139                    } else {
140                        if (isSignificantCharacter(c)) {
141                            prev = '\u0000';
142                            markSignificant();
143                            return;
144                        }
145                    }
146                }
147                prev = c;
148            }
149        }
150    
151        /**
152         * @see fi.iki.hsivonen.xml.checker.Checker#endElement(java.lang.String,
153         *      java.lang.String, java.lang.String)
154         */
155        public void endElement(String uri, String localName, String qName)
156                throws SAXException {
157            if ("http://www.w3.org/1999/xhtml".equals(uri)) {
158                StackNode node = stack.removeFirst();
159                if (!node.hasSignificantInline
160                        && requiresSignificantContent(localName)) {
161                    err("Element \u201C"
162                            + localName
163                            + "\u201D from namespace \u201Chttp://www.w3.org/1999/xhtml\u201D requires significant inline content but did not have any.");
164                }
165            }
166        }
167    
168        /**
169         * @see fi.iki.hsivonen.xml.checker.Checker#startElement(java.lang.String,
170         *      java.lang.String, java.lang.String, org.xml.sax.Attributes)
171         */
172        public void startElement(String uri, String localName, String qName,
173                Attributes atts) throws SAXException {
174            if ("http://www.w3.org/1999/xhtml".equals(uri)) {
175                if (needToCheck && isSignificantElement(localName)) {
176                    markSignificant();
177                }
178                stack.addFirst(new StackNode());
179                if (!needToCheck) {
180                    needToCheck = requiresSignificantContent(localName);
181                }
182            }
183        }
184    
185        /**
186         * Marks the currently open elements as having significant inline content.
187         */
188        private void markSignificant() {
189            needToCheck = false;
190            for (StackNode node : stack) {
191                if (node.hasSignificantInline) {
192                    break;
193                } else {
194                    node.hasSignificantInline = true;
195                }
196            }
197        }
198    
199        /**
200         * @see fi.iki.hsivonen.xml.checker.Checker#reset()
201         */
202        public void reset() {
203            stack.clear();
204            needToCheck = false;
205            prev = '\u0000';
206        }
207    
208        /**
209         * Inner class for wrapping a mutable boolean in an object.
210         */
211        class StackNode {
212            boolean hasSignificantInline = false;
213        }
214    
215    }