001    /*
002     * Copyright (c) 2003-2004 Henri Sivonen, Yrjö Kari-Koskinen
003     *
004     * Permission is hereby granted, free of charge, to any person obtaining a
005     * copy of this software and associated documentation files (the "Software"),
006     * to deal in the Software without restriction, including without limitation
007     * the rights to use, copy, modify, merge, publish, distribute, sublicense,
008     * and/or sell copies of the Software, and to permit persons to whom the
009     * Software is furnished to do so, subject to the following conditions:
010     *
011     * The above copyright notice and this permission notice shall be included in
012     * all copies or substantial portions of the Software.
013     *
014     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
015     * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
016     * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
017     * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
018     * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
019     * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
020     * DEALINGS IN THE SOFTWARE.
021     */
022    package fi.iki.hsivonen.xml;
023    
024    import javax.xml.parsers.SAXParserFactory;
025    
026    import org.ccil.cowan.tagsoup.Parser;
027    import org.xml.sax.XMLReader;
028    
029    import fi.iki.hsivonen.schemas.dtd.DTDCatalog;
030    
031    /**
032     * 
033     * @author hsivonen
034     * @author ykk@kallio.tky.hut.fi
035     */
036    public class SAXUtils {
037    
038        /**
039         * Instantiates a new <code>XMLReader</code> which is configured to be
040         * non-validating. The entity resolver is set to the local DTD catalog. The
041         * entity resolver doesn't connect to the network.
042         * 
043         * @return a non-validating <code>XMLReader</code>
044         */
045        public static final XMLReader newNonvalidatingXMLReader() {
046            SAXParserFactory factory = SAXParserFactory.newInstance();
047            factory.setNamespaceAware(true);
048            factory.setValidating(false);
049            try {
050                XMLReader reader = factory.newSAXParser().getXMLReader();
051                reader.setEntityResolver(DTDCatalog.getInstance());
052                reader.setErrorHandler(new SilentDraconianErrorHandler());
053                return reader;
054            } catch (Exception e) {
055                throw new RuntimeException(e);
056            }
057        }
058    
059        /**
060         * Instantiates a new <code>XMLReader</code> which is configured to be
061         * validating. The entity resolver is set to the local DTD catalog. The
062         * entity resolver doesn't connect to the network.
063         * 
064         * @return a validating <code>XMLReader</code>
065         */
066        public static final XMLReader newValidatingXMLReader() {
067            SAXParserFactory factory = SAXParserFactory.newInstance();
068            factory.setNamespaceAware(true);
069            factory.setValidating(true);
070            try {
071                XMLReader reader = factory.newSAXParser().getXMLReader();
072                reader.setEntityResolver(DTDCatalog.getInstance());
073                reader.setErrorHandler(new SilentDraconianErrorHandler());
074                return reader;
075            } catch (Exception e) {
076                throw new RuntimeException(e);
077            }
078        }
079    
080        /**
081         * Instantiates a new <code>XMLReader</code> which can parse HTML but
082         * appears to parse XHTML. <em>Note:</em> If you are parsing from a byte
083         * stream, you <em>must</em> set the character encoding of the
084         * <code>InputSource</code> explicitly. Use <code>MetadataExtractor</code>
085         * if in doubt.
086         * 
087         * @return an <code>XMLReader</code> for parsing tag soup
088         */
089        public static final XMLReader newTagSoupXMLReader() {
090            return new Parser();
091        }
092    
093    }