001    /*
002     * DomConsumer.java
003     * Copyright (C) 1999,2000,2001 The Free Software Foundation
004     * 
005     * This file is part of GNU JAXP, a library.
006     *
007     * GNU JAXP is free software; you can redistribute it and/or modify
008     * it under the terms of the GNU General Public License as published by
009     * the Free Software Foundation; either version 2 of the License, or
010     * (at your option) any later version.
011     * 
012     * GNU JAXP is distributed in the hope that it will be useful,
013     * but WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
015     * GNU General Public License for more details.
016     * 
017     * You should have received a copy of the GNU General Public License
018     * along with this program; if not, write to the Free Software
019     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
020     *
021     * Linking this library statically or dynamically with other modules is
022     * making a combined work based on this library.  Thus, the terms and
023     * conditions of the GNU General Public License cover the whole
024     * combination.
025     *
026     * As a special exception, the copyright holders of this library give you
027     * permission to link this library with independent modules to produce an
028     * executable, regardless of the license terms of these independent
029     * modules, and to copy and distribute the resulting executable under
030     * terms of your choice, provided that you also meet, for each linked
031     * independent module, the terms and conditions of the license of that
032     * module.  An independent module is a module which is not derived from
033     * or based on this library.  If you modify this library, you may extend
034     * this exception to your version of the library, but you are not
035     * obliged to do so.  If you do not wish to do so, delete this
036     * exception statement from your version. 
037     */
038    
039    // moved to fi.karppinen.* -- 2005-09-24 hsivonen
040    package fi.karppinen.gnu.xml.pipeline;
041    
042    // organized imports -- 2005-09-24 hsivonen
043    
044    import gnu.xml.pipeline.EventConsumer;
045    import gnu.xml.pipeline.EventFilter;
046    import gnu.xml.pipeline.NSFilter;
047    import gnu.xml.pipeline.TeeConsumer;
048    import gnu.xml.util.DomParser;
049    
050    import org.w3c.dom.Attr;
051    import org.w3c.dom.CDATASection;
052    import org.w3c.dom.CharacterData;
053    import org.w3c.dom.DOMImplementation;
054    import org.w3c.dom.Document;
055    import org.w3c.dom.Element;
056    import org.w3c.dom.EntityReference;
057    import org.w3c.dom.Node;
058    import org.w3c.dom.ProcessingInstruction;
059    import org.w3c.dom.Text;
060    import org.xml.sax.Attributes;
061    import org.xml.sax.ContentHandler;
062    import org.xml.sax.DTDHandler;
063    import org.xml.sax.ErrorHandler;
064    import org.xml.sax.Locator;
065    import org.xml.sax.SAXException;
066    import org.xml.sax.SAXNotRecognizedException;
067    import org.xml.sax.SAXParseException;
068    import org.xml.sax.ext.DeclHandler;
069    import org.xml.sax.ext.LexicalHandler;
070    import org.xml.sax.helpers.AttributesImpl;
071    
072    
073    /**
074     * This consumer builds a DOM Document from its input, acting either as a
075     * pipeline terminus or as an intermediate buffer.  When a document's worth
076     * of events has been delivered to this consumer, that document is read with
077     * a {@link DomParser} and sent to the next consumer.  It is also available
078     * as a read-once property.
079     *
080     * <p>The DOM tree is constructed as faithfully as possible.  There are some
081     * complications since a DOM should expose behaviors that can't be implemented
082     * without API backdoors into that DOM, and because some SAX parsers don't
083     * report all the information that DOM permits to be exposed.  The general
084     * problem areas involve information from the Document Type Declaration (DTD).
085     * DOM only represents a limited subset, but has some behaviors that depend
086     * on much deeper knowledge of a document's DTD.  You shouldn't have much to
087     * worry about unless you change handling of "noise" nodes from its default
088     * setting (which ignores them all); note if you use JAXP to populate your
089     * DOM trees, it wants to save "noise" nodes by default.  (Such nodes include
090     * ignorable whitespace, comments, entity references and CDATA boundaries.)
091     * Otherwise, your
092     * main worry will be if you use a SAX parser that doesn't flag ignorable
093     * whitespace unless it's validating (few don't).
094     *
095     * <p> The SAX2 events used as input must contain XML Names for elements
096     * and attributes, with original prefixes.  In SAX2,
097     * this is optional unless the "namespace-prefixes" parser feature is set.
098     * Moreover, many application components won't provide completely correct
099     * structures anyway.  <em>Before you convert a DOM to an output document,
100     * you should plan to postprocess it to create or repair such namespace
101     * information.</em> The {@link NSFilter} pipeline stage does such work.
102     *
103     * <p> <em>Note:  changes late in DOM L2 process made it impractical to
104     * attempt to create the DocumentType node in any implementation-neutral way,
105     * much less to populate it (L1 didn't support even creating such nodes).
106     * To create and populate such a node, subclass the inner
107     * {@link DomConsumer.Handler} class and teach it about the backdoors into
108     * whatever DOM implementation you want.  It's possible that some revised
109     * DOM API (L3?) will make this problem solvable again. </em>
110     *
111     * @see DomParser
112     *
113     * @author David Brownell
114     * @author Henri Sivonen
115     */
116    public class DomConsumer implements EventConsumer
117    {
118        private Class       domImpl;
119    
120        private boolean     hidingCDATA = true;
121        private boolean     hidingComments = true;
122        private boolean     hidingWhitespace = true;
123        private boolean     hidingReferences = true;
124    
125        private Handler     handler;
126        private ErrorHandler    errHandler;
127    
128        private EventConsumer   next;
129    
130        // FIXME:  this can't be a generic pipeline stage just now,
131        // since its input became a Class not a String (to be turned
132        // into a class, using the right class loader)
133    
134    
135        /**
136         * Configures this pipeline terminus to use the specified implementation
137         * of DOM when constructing its result value.
138         *
139         * @param impl class implementing {@link org.w3c.dom.Document Document}
140         *  which publicly exposes a default constructor
141         *
142         * @exception SAXException when there is a problem creating an
143         *  empty DOM document using the specified implementation
144         */
145        public DomConsumer (Class impl)
146        throws SAXException
147        {
148        domImpl = impl;
149        handler = new Handler (this);
150        }
151    
152        /**
153         * This is the hook through which a subclass provides a handler
154         * which knows how to access DOM extensions, specific to some
155         * implementation, to record additional data in a DOM.
156         * Treat this as part of construction; don't call it except
157         * before (or between) parses.
158         */
159        protected void setHandler (Handler h)
160        {
161        handler = h;
162        }
163    
164    
165        private Document emptyDocument ()
166        throws SAXException
167        {
168        try {
169            return (Document) domImpl.newInstance ();
170        } catch (IllegalAccessException e) {
171            throw new SAXException ("can't access constructor: "
172                + e.getMessage ());
173        } catch (InstantiationException e) {
174            throw new SAXException ("can't instantiate Document: "
175                + e.getMessage ());
176        }
177        }
178    
179    
180        /**
181         * Configures this consumer as a buffer/filter, using the specified
182         * DOM implementation when constructing its result value.
183         *
184         * <p> This event consumer acts as a buffer and filter, in that it
185         * builds a DOM tree and then writes it out when <em>endDocument</em>
186         * is invoked.  Because of the limitations of DOM, much information
187         * will as a rule not be seen in that replay.  To get a full fidelity
188         * copy of the input event stream, use a {@link TeeConsumer}.
189         *
190         * @param impl class implementing {@link org.w3c.dom.Document Document}
191         *  which publicly exposes a default constructor
192         * @param next receives a "replayed" sequence of parse events when
193         *  the <em>endDocument</em> method is invoked.
194         *
195         * @exception SAXException when there is a problem creating an
196         *  empty DOM document using the specified DOM implementation
197         */
198        public DomConsumer (Class impl, EventConsumer n)
199        throws SAXException
200        {
201        this (impl);
202        next = n;
203        }
204    
205    
206        /**
207         * Returns the document constructed from the preceding
208         * sequence of events.  This method should not be
209         * used again until another sequence of events has been
210         * given to this EventConsumer.  
211         */
212        final public Document getDocument ()
213        {
214        return handler.clearDocument ();
215        }
216    
217        public void setErrorHandler (ErrorHandler handler)
218        {
219        errHandler = handler;
220        }
221    
222    
223        /**
224         * Returns true if the consumer is hiding entity references nodes
225         * (the default), and false if EntityReference nodes should
226         * instead be created.  Such EntityReference nodes will normally be
227         * empty, unless an implementation arranges to populate them and then
228         * turn them back into readonly objects.
229         *
230         * @see #setHidingReferences
231         */
232        final public boolean    isHidingReferences ()
233        { return hidingReferences; }
234    
235        /**
236         * Controls whether the consumer will hide entity expansions,
237         * or will instead mark them with entity reference nodes.
238         *
239         * @see #isHidingReferences
240         * @param flag False if entity reference nodes will appear
241         */
242        final public void       setHidingReferences (boolean flag)
243        { hidingReferences = flag; }
244        
245    
246        /**
247         * Returns true if the consumer is hiding comments (the default),
248         * and false if they should be placed into the output document.
249         *
250         * @see #setHidingComments
251         */
252        public final boolean isHidingComments ()
253        { return hidingComments; }
254    
255        /**
256         * Controls whether the consumer is hiding comments.
257         *
258         * @see #isHidingComments
259         */
260        public final void setHidingComments (boolean flag)
261        { hidingComments = flag; }
262    
263    
264        /**
265         * Returns true if the consumer is hiding ignorable whitespace
266         * (the default), and false if such whitespace should be placed
267         * into the output document as children of element nodes.
268         *
269         * @see #setHidingWhitespace
270         */
271        public final boolean isHidingWhitespace ()
272        { return hidingWhitespace; }
273    
274        /**
275         * Controls whether the consumer hides ignorable whitespace
276         *
277         * @see #isHidingComments
278         */
279        public final void setHidingWhitespace (boolean flag)
280        { hidingWhitespace = flag; }
281    
282    
283        /**
284         * Returns true if the consumer is saving CDATA boundaries, or
285         * false (the default) otherwise.
286         *
287         * @see #setHidingCDATA
288         */
289        final public boolean    isHidingCDATA ()
290        { return hidingCDATA; }
291    
292        /**
293         * Controls whether the consumer will save CDATA boundaries.
294         *
295         * @see #isHidingCDATA
296         * @param flag True to treat CDATA text differently from other
297         *  text nodes
298         */
299        final public void       setHidingCDATA (boolean flag)
300        { hidingCDATA = flag; }
301        
302    
303    
304        /** Returns the document handler being used. */
305        final public ContentHandler getContentHandler ()
306        { return handler; }
307    
308        /** Returns the DTD handler being used. */
309        final public DTDHandler getDTDHandler ()
310        { return handler; }
311    
312        /**
313         * Returns the lexical handler being used.
314         * (DOM construction can't really use declaration handlers.)
315         */
316        final public Object getProperty (String id)
317        throws SAXNotRecognizedException
318        {
319        if ("http://xml.org/sax/properties/lexical-handler".equals (id))
320            return handler;
321        if ("http://xml.org/sax/properties/declaration-handler".equals (id))
322            return handler;
323        throw new SAXNotRecognizedException (id);
324        }
325    
326        EventConsumer getNext () { return next; }
327    
328        ErrorHandler getErrorHandler () { return errHandler; }
329    
330        /**
331         * Class used to intercept various parsing events and use them to
332         * populate a DOM document.  Subclasses would typically know and use
333         * backdoors into specific DOM implementations, used to implement 
334         * DTD-related functionality.
335         *
336         * <p> Note that if this ever throws a DOMException (runtime exception)
337         * that will indicate a bug in the DOM (e.g. doesn't support something
338         * per specification) or the parser (e.g. emitted an illegal name, or
339         * accepted illegal input data). </p>
340         */
341        public static class Handler
342        implements ContentHandler, LexicalHandler,
343            DTDHandler, DeclHandler
344        {
345        protected DomConsumer       consumer;
346    
347        private DOMImplementation   impl;
348        private Document        document;
349        private boolean     isL2;
350    
351        private Locator     locator;
352        private Node        top;
353        private boolean     inCDATA;
354        private boolean     mergeCDATA;
355        private boolean     inDTD;
356        private String      currentEntity;
357    
358        private boolean     recreatedAttrs;
359        private AttributesImpl  attributes = new AttributesImpl ();
360    
361        /**
362         * Subclasses may use SAX2 events to provide additional
363         * behaviors in the resulting DOM.
364         */
365        protected Handler (DomConsumer consumer)
366        throws SAXException
367        {
368            this.consumer = consumer;
369            document = consumer.emptyDocument ();
370            impl = document.getImplementation ();
371            isL2 = impl.hasFeature ("XML", "2.0");
372        }
373    
374        private void fatal (String message, Exception x)
375        throws SAXException
376        {
377            SAXParseException   e;
378            ErrorHandler    errHandler = consumer.getErrorHandler ();
379    
380            if (locator == null)
381            e = new SAXParseException (message, null, null, -1, -1, x);
382            else
383            e = new SAXParseException (message, locator, x);
384            if (errHandler != null)
385            errHandler.fatalError (e);
386            throw e;
387        }
388    
389        /**
390         * Returns and forgets the document produced.  If the handler is
391         * reused, a new document may be created.
392         */
393        Document clearDocument ()
394        {
395            Document retval = document;
396            document = null;
397            locator = null;
398            return retval;
399        }
400    
401        /**
402         * Returns the document under construction.
403         */
404        protected Document getDocument ()
405            { return document; }
406        
407        /**
408         * Returns the current node being populated.  This is usually
409         * an Element or Document, but it might be an EntityReference
410         * node if some implementation-specific code knows how to put
411         * those into the result tree and later mark them as readonly.
412         */
413        protected Node getTop ()
414            { return top; }
415    
416    
417        // SAX1
418        public void setDocumentLocator (Locator locator)
419        {
420            this.locator = locator;
421        }
422    
423        // SAX1
424        public void startDocument ()
425        throws SAXException
426        {
427            if (document == null)
428            try {
429                if (isL2) {
430                // couple to original implementation
431                document = impl.createDocument (null, "foo", null);
432                document.removeChild (document.getFirstChild ());
433                } else {
434                document = consumer.emptyDocument ();
435                }
436            } catch (Exception e) {
437                fatal ("DOM create document", e);
438            }
439            top = document;
440        }
441    
442    //  removed XML decl handling -- 2005-09-24 hsivonen
443    
444    
445        // SAX1
446        public void endDocument ()
447        throws SAXException
448        {
449            try {
450            if (consumer.getNext () != null && document != null) {
451                DomParser   parser = new DomParser (document);
452    
453                EventFilter.bind (parser, consumer.getNext ());
454                parser.parse ("ignored");
455            }
456            } finally {
457            top = null;
458            }
459        }
460    
461        // SAX1
462        public void processingInstruction (String target, String data)
463        throws SAXException
464        {
465            // we can't create populated entity ref nodes using
466            // only public DOM APIs (they've got to be readonly)
467            if (currentEntity != null)
468            return;
469    
470            ProcessingInstruction   pi;
471    
472            if (isL2
473                // && consumer.isUsingNamespaces ()
474                && target.indexOf (':') != -1)
475            namespaceError (
476                "PI target name is namespace nonconformant: "
477                + target);
478            if (inDTD)
479            return;
480            pi = document.createProcessingInstruction (target, data);
481            top.appendChild (pi);
482        }
483    
484        /**
485         * Subclasses may overrride this method to provide a more efficient
486         * way to construct text nodes.
487         * Typically, copying the text into a single character array will
488         * be more efficient than doing that as well as allocating other
489         * needed for a String, including an internal StringBuilder.
490         * Those additional memory and CPU costs can be incurred later,
491         * if ever needed.
492         * Unfortunately the standard DOM factory APIs encourage those costs
493         * to be incurred early.
494         */
495        protected Text createText (
496            boolean isCDATA,
497            char    ch [],
498            int     start,
499            int     length
500        ) {
501            String  value = new String (ch, start, length);
502    
503            if (isCDATA)
504            return document.createCDATASection (value);
505            else
506            return document.createTextNode (value);
507        }
508    
509        // SAX1
510        public void characters (char ch [], int start, int length)
511        throws SAXException
512        {
513            // we can't create populated entity ref nodes using
514            // only public DOM APIs (they've got to be readonly
515            // at creation time)
516            if (currentEntity != null)
517            return;
518    
519            Node    lastChild = top.getLastChild ();
520    
521            // merge consecutive text or CDATA nodes if appropriate.
522            if (lastChild instanceof Text) {
523            if (consumer.isHidingCDATA ()
524                // consecutive Text content ... always merge
525                || (!inCDATA
526                    && !(lastChild instanceof CDATASection))
527                // consecutive CDATASection content ... don't
528                // merge between sections, only within them
529                || (inCDATA && mergeCDATA
530                    && lastChild instanceof CDATASection)
531                    ) {
532                CharacterData   last = (CharacterData) lastChild;
533                String      value = new String (ch, start, length);
534                
535                last.appendData (value);
536                return;
537            }
538            }
539            if (inCDATA && !consumer.isHidingCDATA ()) {
540            top.appendChild (createText (true, ch, start, length));
541            mergeCDATA = true;
542            } else
543            top.appendChild (createText (false, ch, start, length));
544        }
545    
546        // SAX2
547        public void skippedEntity (String name)
548        throws SAXException
549        {
550            // this callback is useless except to report errors, since
551            // we can't know if the ref was in content, within an
552            // attribute, within a declaration ... only one of those
553            // cases supports more intelligent action than a panic.
554            fatal ("skipped entity: " + name, null);
555        }
556    
557        // SAX2
558        public void startPrefixMapping (String prefix, String uri)
559        throws SAXException
560        {
561            // reconstruct "xmlns" attributes deleted by all
562            // SAX2 parsers without "namespace-prefixes" = true
563            if ("".equals (prefix))
564            attributes.addAttribute ("", "", "xmlns",
565                "CDATA", uri);
566            else
567            attributes.addAttribute ("", "", "xmlns:" + prefix,
568                "CDATA", uri);
569            recreatedAttrs = true;
570        }
571    
572        // SAX2
573        public void endPrefixMapping (String prefix)
574        throws SAXException
575            { }
576    
577        // SAX2
578        public void startElement (
579            String uri,
580            String localName,
581            String qName,
582            Attributes atts
583        ) throws SAXException
584        {
585            // we can't create populated entity ref nodes using
586            // only public DOM APIs (they've got to be readonly)
587            if (currentEntity != null)
588            return;
589    
590            // parser discarded basic information; DOM tree isn't writable
591            // without massaging to assign prefixes to all nodes.
592            // the "NSFilter" class does that massaging.
593            if (qName.length () == 0)
594            qName = localName;
595    
596    
597            Element element;
598            int     length = atts.getLength ();
599    
600            if (!isL2) {
601            element = document.createElement (qName);
602    
603            // first the explicit attributes ...
604            length = atts.getLength ();
605            for (int i = 0; i < length; i++)
606                element.setAttribute (atts.getQName (i),
607                            atts.getValue (i));
608            // ... then any recreated ones (DOM deletes duplicates)
609            if (recreatedAttrs) {
610                recreatedAttrs = false;
611                length = attributes.getLength ();
612                for (int i = 0; i < length; i++)
613                element.setAttribute (attributes.getQName (i),
614                            attributes.getValue (i));
615                attributes.clear ();
616            }
617    
618            top.appendChild (element);
619            top = element;
620            return;
621            }
622    
623            // For an L2 DOM when namespace use is enabled, use
624            // createElementNS/createAttributeNS except when
625            // (a) it's an element in the default namespace, or
626            // (b) it's an attribute with no prefix
627            String  namespace;
628            
629            if (localName.length () != 0)
630            namespace = (uri.length () == 0) ? null : uri;
631            else
632            namespace = getNamespace (getPrefix (qName), atts);
633    
634            // Always use createElementNS -- 2005-09-24 hsivonen
635            element = document.createElementNS (namespace, qName);
636    
637            populateAttributes (element, atts);
638            if (recreatedAttrs) {
639            recreatedAttrs = false;
640            // ... DOM deletes any duplicates
641            populateAttributes (element, attributes);
642            attributes.clear ();
643            }
644    
645            top.appendChild (element);
646            top = element;
647        }
648    
649        final static String xmlnsURI = "http://www.w3.org/2000/xmlns/";
650    
651        private void populateAttributes (Element element, Attributes attrs)
652        throws SAXParseException
653        {
654            int     length = attrs.getLength ();
655    
656            for (int i = 0; i < length; i++) {
657            String  type = attrs.getType (i);
658            String  value = attrs.getValue (i);
659            String  name = attrs.getQName (i);
660            String  local = attrs.getLocalName (i);
661            String  uri = attrs.getURI (i);
662    
663            // parser discarded basic information, DOM tree isn't writable
664            if (name.length () == 0)
665                name = local;
666    
667            // all attribute types other than these three may not
668            // contain scoped names... enumerated attributes get
669            // reported as NMTOKEN, except for NOTATION values
670            if (!("CDATA".equals (type)
671                || "NMTOKEN".equals (type)
672                || "NMTOKENS".equals (type))) {
673                if (value.indexOf (':') != -1) {
674                namespaceError (
675                    "namespace nonconformant attribute value: "
676                        + "<" + element.getNodeName ()
677                        + " " + name + "='" + value + "' ...>");
678                }
679            }
680    
681            // xmlns="" is legal (undoes default NS)
682            // xmlns:foo="" is illegal
683            String prefix = getPrefix (name);
684            String namespace;
685    
686            if ("xmlns".equals (prefix)) {
687                if ("".equals (value))
688                namespaceError ("illegal null namespace decl, " + name);
689                namespace = xmlnsURI;
690            } else if ("xmlns".equals (name))
691                namespace = xmlnsURI;
692    
693            else if (prefix == null)
694                namespace = null;
695            else if (!"".equals(uri) && uri.length () != 0)
696                namespace = uri;
697            else
698                namespace = getNamespace (prefix, attrs);
699    
700            if (namespace == null)
701                element.setAttribute (name, value);
702            else
703                element.setAttributeNS (namespace, name, value);
704            }
705        }
706    
707        private String getPrefix (String name)
708        {
709            int     temp;
710    
711            if ((temp = name.indexOf (':')) > 0)
712            return name.substring (0, temp);
713            return null;
714        }
715    
716        // used with SAX1-level parser output 
717        private String getNamespace (String prefix, Attributes attrs)
718        throws SAXParseException
719        {
720            String namespace;
721            String decl;
722    
723            // defaulting 
724            if (prefix == null) {
725            decl = "xmlns";
726            namespace = attrs.getValue (decl);
727            if ("".equals (namespace))
728                return null;
729            else if (namespace != null)
730                return namespace;
731    
732            // "xmlns" is like a keyword
733            // ... according to the Namespace REC, but DOM L2 CR2+
734            // and Infoset violate that by assigning a namespace.
735            // that conflict is resolved elsewhere.
736            } else if ("xmlns".equals (prefix))
737            return null;
738    
739            // "xml" prefix is fixed
740            else if ("xml".equals (prefix))
741            return "http://www.w3.org/XML/1998/namespace";
742    
743            // otherwise, expect a declaration
744            else {
745            decl = "xmlns:" + prefix;
746            namespace = attrs.getValue (decl);
747            }
748            
749            // if we found a local declaration, great
750            if (namespace != null)
751            return namespace;
752    
753    
754            // ELSE ... search up the tree we've been building
755            for (Node n = top;
756                n != null && n.getNodeType () != Node.DOCUMENT_NODE;
757                n = n.getParentNode ()) {
758            if (n.getNodeType () == Node.ENTITY_REFERENCE_NODE)
759                continue;
760            Element e = (Element) n;
761            Attr attr = e.getAttributeNode (decl);
762            if (attr != null)
763                return attr.getNodeValue ();
764            }
765            // see above re "xmlns" as keyword
766            if ("xmlns".equals (decl))
767            return null;
768    
769            namespaceError ("Undeclared namespace prefix: " + prefix);
770            return null;
771        }
772    
773        // SAX2
774        public void endElement (String uri, String localName, String qName)
775        throws SAXException
776        {
777            // we can't create populated entity ref nodes using
778            // only public DOM APIs (they've got to be readonly)
779            if (currentEntity != null)
780            return;
781    
782            top = top.getParentNode ();
783        }
784    
785        // SAX1 (mandatory reporting if validating)
786        public void ignorableWhitespace (char ch [], int start, int length)
787        throws SAXException
788        {
789            if (consumer.isHidingWhitespace ())
790            return;
791            characters (ch, start, length);
792        }
793    
794        // SAX2 lexical event
795        public void startCDATA ()
796        throws SAXException
797        {
798            inCDATA = true;
799            // true except for the first fragment of a cdata section
800            mergeCDATA = false;
801        }
802        
803        // SAX2 lexical event
804        public void endCDATA ()
805        throws SAXException
806        {
807            inCDATA = false;
808        }
809        
810        // SAX2 lexical event
811        //
812        // this SAX2 callback merges two unrelated things:
813        //  - Declaration of the root element type ... belongs with
814        //    the other DTD declaration methods, NOT HERE.
815        //  - IDs for the optional external subset ... belongs here
816        //    with other lexical information.
817        //
818        // ...and it doesn't include the internal DTD subset, desired
819        // both to support DOM L2 and to enable "pass through" processing
820        //
821        public void startDTD (String name, String publicId, String SystemId)
822        throws SAXException
823        {
824            // need to filter out comments and PIs within the DTD
825            inDTD = true;
826        }
827        
828        // SAX2 lexical event
829        public void endDTD ()
830        throws SAXException
831        {
832            inDTD = false;
833        }
834        
835        // SAX2 lexical event
836        public void comment (char ch [], int start, int length)
837        throws SAXException
838        {
839            Node    comment;
840    
841            // we can't create populated entity ref nodes using
842            // only public DOM APIs (they've got to be readonly)
843            if (consumer.isHidingComments ()
844                || inDTD
845                || currentEntity != null)
846            return;
847            comment = document.createComment (new String (ch, start, length));
848            top.appendChild (comment);
849        }
850    
851        /**
852         * May be overridden by subclasses to return true, indicating
853         * that entity reference nodes can be populated and then made
854         * read-only.
855         */
856        public boolean canPopulateEntityRefs ()
857            { return false; }
858    
859        // SAX2 lexical event
860        public void startEntity (String name)
861        throws SAXException
862        {
863            // are we ignoring what would be contents of an
864            // entity ref, since we can't populate it?
865            if (currentEntity != null)
866            return;
867    
868            // Are we hiding all entity boundaries?
869            if (consumer.isHidingReferences ())
870            return;
871    
872            // SAX2 shows parameter entities; DOM hides them
873            if (name.charAt (0) == '%' || "[dtd]".equals (name))
874            return;
875    
876            // Since we can't create a populated entity ref node in any
877            // standard way, we create an unpopulated one.
878            EntityReference ref = document.createEntityReference (name);
879            top.appendChild (ref);
880            top = ref;
881    
882            // ... allowing subclasses to populate them
883            if (!canPopulateEntityRefs ())
884            currentEntity = name;
885        }
886    
887        // SAX2 lexical event
888        public void endEntity (String name)
889        throws SAXException
890        {
891            if (name.charAt (0) == '%' || "[dtd]".equals (name))
892            return;
893            if (name.equals (currentEntity))
894            currentEntity = null;
895            if (!consumer.isHidingReferences ())
896            top = top.getParentNode ();
897        }
898    
899    
900        // SAX1 DTD event
901        public void notationDecl (
902            String name,
903            String publicId, String SystemId
904        ) throws SAXException
905        {
906            /* IGNORE -- no public DOM API lets us store these
907             * into the doctype node
908             */
909        }
910    
911        // SAX1 DTD event
912        public void unparsedEntityDecl (
913            String name,
914            String publicId, String SystemId,
915            String notationName
916        ) throws SAXException
917        {
918            /* IGNORE -- no public DOM API lets us store these
919             * into the doctype node
920             */
921        }
922    
923        // SAX2 declaration event
924        public void elementDecl (String name, String model)
925        throws SAXException
926        {
927            /* IGNORE -- no content model support in DOM L2 */
928        }
929    
930        // SAX2 declaration event
931        public void attributeDecl (
932            String eName,
933            String aName,
934            String type,
935            String mode,
936            String value
937        ) throws SAXException
938        {
939            /* IGNORE -- no attribute model support in DOM L2 */
940        }
941    
942        // SAX2 declaration event
943        public void internalEntityDecl (String name, String value)
944        throws SAXException
945        {
946            /* IGNORE -- no public DOM API lets us store these
947             * into the doctype node
948             */
949        }
950    
951        // SAX2 declaration event
952        public void externalEntityDecl (
953            String name,
954            String publicId,
955            String SystemId
956        ) throws SAXException
957        {
958            /* IGNORE -- no public DOM API lets us store these
959             * into the doctype node
960             */
961        }
962    
963        //
964        // These really should offer the option of nonfatal handling,
965        // like other validity errors, though that would cause major
966        // chaos in the DOM data structures.  DOM is already spec'd
967        // to treat many of these as fatal, so this is consistent.
968        //
969        private void namespaceError (String description)
970        throws SAXParseException
971        {
972            SAXParseException err;
973            
974            err = new SAXParseException (description, locator);
975            throw err;
976        }
977        }
978    }