001 /* 002 * Copyright (c) 2003-2004 Henri Sivonen, Yrjö Kari-Koskinen 003 * 004 * Permission is hereby granted, free of charge, to any person obtaining a 005 * copy of this software and associated documentation files (the "Software"), 006 * to deal in the Software without restriction, including without limitation 007 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 008 * and/or sell copies of the Software, and to permit persons to whom the 009 * Software is furnished to do so, subject to the following conditions: 010 * 011 * The above copyright notice and this permission notice shall be included in 012 * all copies or substantial portions of the Software. 013 * 014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 020 * DEALINGS IN THE SOFTWARE. 021 */ 022 package fi.iki.hsivonen.xml; 023 024 import javax.xml.parsers.SAXParserFactory; 025 026 import org.ccil.cowan.tagsoup.Parser; 027 import org.xml.sax.XMLReader; 028 029 import fi.iki.hsivonen.schemas.dtd.DTDCatalog; 030 031 /** 032 * 033 * @author hsivonen 034 * @author ykk@kallio.tky.hut.fi 035 */ 036 public class SAXUtils { 037 038 /** 039 * Instantiates a new <code>XMLReader</code> which is configured to be 040 * non-validating. The entity resolver is set to the local DTD catalog. The 041 * entity resolver doesn't connect to the network. 042 * 043 * @return a non-validating <code>XMLReader</code> 044 */ 045 public static final XMLReader newNonvalidatingXMLReader() { 046 SAXParserFactory factory = SAXParserFactory.newInstance(); 047 factory.setNamespaceAware(true); 048 factory.setValidating(false); 049 try { 050 XMLReader reader = factory.newSAXParser().getXMLReader(); 051 reader.setEntityResolver(DTDCatalog.getInstance()); 052 reader.setErrorHandler(new SilentDraconianErrorHandler()); 053 return reader; 054 } catch (Exception e) { 055 throw new RuntimeException(e); 056 } 057 } 058 059 /** 060 * Instantiates a new <code>XMLReader</code> which is configured to be 061 * validating. The entity resolver is set to the local DTD catalog. The 062 * entity resolver doesn't connect to the network. 063 * 064 * @return a validating <code>XMLReader</code> 065 */ 066 public static final XMLReader newValidatingXMLReader() { 067 SAXParserFactory factory = SAXParserFactory.newInstance(); 068 factory.setNamespaceAware(true); 069 factory.setValidating(true); 070 try { 071 XMLReader reader = factory.newSAXParser().getXMLReader(); 072 reader.setEntityResolver(DTDCatalog.getInstance()); 073 reader.setErrorHandler(new SilentDraconianErrorHandler()); 074 return reader; 075 } catch (Exception e) { 076 throw new RuntimeException(e); 077 } 078 } 079 080 /** 081 * Instantiates a new <code>XMLReader</code> which can parse HTML but 082 * appears to parse XHTML. <em>Note:</em> If you are parsing from a byte 083 * stream, you <em>must</em> set the character encoding of the 084 * <code>InputSource</code> explicitly. Use <code>MetadataExtractor</code> 085 * if in doubt. 086 * 087 * @return an <code>XMLReader</code> for parsing tag soup 088 */ 089 public static final XMLReader newTagSoupXMLReader() { 090 return new Parser(); 091 } 092 093 }