001 /*
002 * Copyright (c) 2005 Henri Sivonen
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022
023 package fi.iki.hsivonen.htmlparser;
024
025 import java.util.Arrays;
026
027 import org.xml.sax.Attributes;
028 import org.xml.sax.SAXException;
029
030 import fi.iki.hsivonen.xml.ContentHandlerFilter;
031
032 /**
033 * @version $Id: EmptyElementFilter.java,v 1.7 2006/12/01 12:34:31 hsivonen Exp $
034 * @author hsivonen
035 */
036 public final class EmptyElementFilter extends ContentHandlerFilter {
037 private static final String XHTML_NS = "http://www.w3.org/1999/xhtml";
038
039 /**
040 * HTML 4.01 Strict elements which don't have an end tag
041 */
042 private static final String[] EMPTY_ELEMENTS = { "area", "base",
043 "basefont", "br", "col", "command", "frame", "hr", "img", "input",
044 "isindex", "link", "meta", "param" };
045
046 // should we include things like <spacer> and <image>?
047
048 // 01:22 < Hixie> well, my list right now is base, link, meta, hr, br, img,
049 // embed, param, area, col, input
050 //01:22 < Hixie> plus command and event-source
051 //
052 private static final boolean isEmpty(String name) {
053 return (Arrays.binarySearch(EMPTY_ELEMENTS, name) >= 0);
054 }
055
056 /**
057 * @see org.xml.sax.ContentHandler#endElement(java.lang.String,
058 * java.lang.String, java.lang.String)
059 */
060 public void endElement(String uri, String local, String qName)
061 throws SAXException {
062 if (XHTML_NS.equals(uri)) {
063 if (isEmpty(local)) {
064 fatal("End tag \u201C"
065 + local
066 + "\u201D seen even though the element is an empty element.");
067 }
068 }
069 super.endElement(uri, local, qName);
070 }
071
072 /**
073 * @see org.xml.sax.ContentHandler#startElement(java.lang.String,
074 * java.lang.String, java.lang.String, org.xml.sax.Attributes)
075 */
076 public void startElement(String uri, String local, String qName,
077 Attributes attrs) throws SAXException {
078 // FIXME just dropping base for now
079 boolean drop = "base".equals(local);
080 if (!drop) {
081 super.startElement(uri, local, qName, attrs);
082 }
083 if (XHTML_NS.equals(uri)) {
084 if (!drop && isEmpty(local)) {
085 super.endElement(uri, local, qName);
086 }
087 }
088 }
089 }