001 /* 002 * Copyright (c) 2005 Henri Sivonen 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 023 package fi.iki.hsivonen.htmlparser; 024 025 import java.util.Arrays; 026 027 import org.xml.sax.Attributes; 028 import org.xml.sax.SAXException; 029 030 import fi.iki.hsivonen.xml.ContentHandlerFilter; 031 032 /** 033 * @version $Id: EmptyElementFilter.java,v 1.7 2006/12/01 12:34:31 hsivonen Exp $ 034 * @author hsivonen 035 */ 036 public final class EmptyElementFilter extends ContentHandlerFilter { 037 private static final String XHTML_NS = "http://www.w3.org/1999/xhtml"; 038 039 /** 040 * HTML 4.01 Strict elements which don't have an end tag 041 */ 042 private static final String[] EMPTY_ELEMENTS = { "area", "base", 043 "basefont", "br", "col", "command", "frame", "hr", "img", "input", 044 "isindex", "link", "meta", "param" }; 045 046 // should we include things like <spacer> and <image>? 047 048 // 01:22 < Hixie> well, my list right now is base, link, meta, hr, br, img, 049 // embed, param, area, col, input 050 //01:22 < Hixie> plus command and event-source 051 // 052 private static final boolean isEmpty(String name) { 053 return (Arrays.binarySearch(EMPTY_ELEMENTS, name) >= 0); 054 } 055 056 /** 057 * @see org.xml.sax.ContentHandler#endElement(java.lang.String, 058 * java.lang.String, java.lang.String) 059 */ 060 public void endElement(String uri, String local, String qName) 061 throws SAXException { 062 if (XHTML_NS.equals(uri)) { 063 if (isEmpty(local)) { 064 fatal("End tag \u201C" 065 + local 066 + "\u201D seen even though the element is an empty element."); 067 } 068 } 069 super.endElement(uri, local, qName); 070 } 071 072 /** 073 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, 074 * java.lang.String, java.lang.String, org.xml.sax.Attributes) 075 */ 076 public void startElement(String uri, String local, String qName, 077 Attributes attrs) throws SAXException { 078 // FIXME just dropping base for now 079 boolean drop = "base".equals(local); 080 if (!drop) { 081 super.startElement(uri, local, qName, attrs); 082 } 083 if (XHTML_NS.equals(uri)) { 084 if (!drop && isEmpty(local)) { 085 super.endElement(uri, local, qName); 086 } 087 } 088 } 089 }