001 /*
002 * Copyright (c) 2003-2004 Henri Sivonen, Yrjö Kari-Koskinen
003 *
004 * Permission is hereby granted, free of charge, to any person obtaining a
005 * copy of this software and associated documentation files (the "Software"),
006 * to deal in the Software without restriction, including without limitation
007 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008 * and/or sell copies of the Software, and to permit persons to whom the
009 * Software is furnished to do so, subject to the following conditions:
010 *
011 * The above copyright notice and this permission notice shall be included in
012 * all copies or substantial portions of the Software.
013 *
014 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020 * DEALINGS IN THE SOFTWARE.
021 */
022 package fi.iki.hsivonen.xml;
023
024 import javax.xml.parsers.SAXParserFactory;
025
026 import org.ccil.cowan.tagsoup.Parser;
027 import org.xml.sax.XMLReader;
028
029 import fi.iki.hsivonen.schemas.dtd.DTDCatalog;
030
031 /**
032 *
033 * @author hsivonen
034 * @author ykk@kallio.tky.hut.fi
035 */
036 public class SAXUtils {
037
038 /**
039 * Instantiates a new <code>XMLReader</code> which is configured to be
040 * non-validating. The entity resolver is set to the local DTD catalog. The
041 * entity resolver doesn't connect to the network.
042 *
043 * @return a non-validating <code>XMLReader</code>
044 */
045 public static final XMLReader newNonvalidatingXMLReader() {
046 SAXParserFactory factory = SAXParserFactory.newInstance();
047 factory.setNamespaceAware(true);
048 factory.setValidating(false);
049 try {
050 XMLReader reader = factory.newSAXParser().getXMLReader();
051 reader.setEntityResolver(DTDCatalog.getInstance());
052 reader.setErrorHandler(new SilentDraconianErrorHandler());
053 return reader;
054 } catch (Exception e) {
055 throw new RuntimeException(e);
056 }
057 }
058
059 /**
060 * Instantiates a new <code>XMLReader</code> which is configured to be
061 * validating. The entity resolver is set to the local DTD catalog. The
062 * entity resolver doesn't connect to the network.
063 *
064 * @return a validating <code>XMLReader</code>
065 */
066 public static final XMLReader newValidatingXMLReader() {
067 SAXParserFactory factory = SAXParserFactory.newInstance();
068 factory.setNamespaceAware(true);
069 factory.setValidating(true);
070 try {
071 XMLReader reader = factory.newSAXParser().getXMLReader();
072 reader.setEntityResolver(DTDCatalog.getInstance());
073 reader.setErrorHandler(new SilentDraconianErrorHandler());
074 return reader;
075 } catch (Exception e) {
076 throw new RuntimeException(e);
077 }
078 }
079
080 /**
081 * Instantiates a new <code>XMLReader</code> which can parse HTML but
082 * appears to parse XHTML. <em>Note:</em> If you are parsing from a byte
083 * stream, you <em>must</em> set the character encoding of the
084 * <code>InputSource</code> explicitly. Use <code>MetadataExtractor</code>
085 * if in doubt.
086 *
087 * @return an <code>XMLReader</code> for parsing tag soup
088 */
089 public static final XMLReader newTagSoupXMLReader() {
090 return new Parser();
091 }
092
093 }