001 /* 002 * DomConsumer.java 003 * Copyright (C) 1999,2000,2001 The Free Software Foundation 004 * 005 * This file is part of GNU JAXP, a library. 006 * 007 * GNU JAXP is free software; you can redistribute it and/or modify 008 * it under the terms of the GNU General Public License as published by 009 * the Free Software Foundation; either version 2 of the License, or 010 * (at your option) any later version. 011 * 012 * GNU JAXP is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 015 * GNU General Public License for more details. 016 * 017 * You should have received a copy of the GNU General Public License 018 * along with this program; if not, write to the Free Software 019 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 020 * 021 * Linking this library statically or dynamically with other modules is 022 * making a combined work based on this library. Thus, the terms and 023 * conditions of the GNU General Public License cover the whole 024 * combination. 025 * 026 * As a special exception, the copyright holders of this library give you 027 * permission to link this library with independent modules to produce an 028 * executable, regardless of the license terms of these independent 029 * modules, and to copy and distribute the resulting executable under 030 * terms of your choice, provided that you also meet, for each linked 031 * independent module, the terms and conditions of the license of that 032 * module. An independent module is a module which is not derived from 033 * or based on this library. If you modify this library, you may extend 034 * this exception to your version of the library, but you are not 035 * obliged to do so. If you do not wish to do so, delete this 036 * exception statement from your version. 037 */ 038 039 package fi.iki.hsivonen.gnu.xml.pipeline; 040 041 import gnu.xml.dom.DomDocument; 042 import gnu.xml.pipeline.EventConsumer; 043 import gnu.xml.pipeline.EventFilter; 044 import gnu.xml.pipeline.NSFilter; 045 import gnu.xml.pipeline.TeeConsumer; 046 import gnu.xml.util.DomParser; 047 048 import org.w3c.dom.Attr; 049 import org.w3c.dom.CDATASection; 050 import org.w3c.dom.CharacterData; 051 import org.w3c.dom.DOMImplementation; 052 import org.w3c.dom.Document; 053 import org.w3c.dom.Element; 054 import org.w3c.dom.EntityReference; 055 import org.w3c.dom.Node; 056 import org.w3c.dom.ProcessingInstruction; 057 import org.w3c.dom.Text; 058 import org.xml.sax.Attributes; 059 import org.xml.sax.ContentHandler; 060 import org.xml.sax.DTDHandler; 061 import org.xml.sax.ErrorHandler; 062 import org.xml.sax.Locator; 063 import org.xml.sax.SAXException; 064 import org.xml.sax.SAXNotRecognizedException; 065 import org.xml.sax.SAXParseException; 066 import org.xml.sax.ext.DeclHandler; 067 import org.xml.sax.ext.LexicalHandler; 068 import org.xml.sax.helpers.AttributesImpl; 069 070 071 /** 072 * This consumer builds a DOM Document from its input, acting either as a 073 * pipeline terminus or as an intermediate buffer. When a document's worth 074 * of events has been delivered to this consumer, that document is read with 075 * a {@link DomParser} and sent to the next consumer. It is also available 076 * as a read-once property. 077 * 078 * <p>The DOM tree is constructed as faithfully as possible. There are some 079 * complications since a DOM should expose behaviors that can't be implemented 080 * without API backdoors into that DOM, and because some SAX parsers don't 081 * report all the information that DOM permits to be exposed. The general 082 * problem areas involve information from the Document Type Declaration (DTD). 083 * DOM only represents a limited subset, but has some behaviors that depend 084 * on much deeper knowledge of a document's DTD. You shouldn't have much to 085 * worry about unless you change handling of "noise" nodes from its default 086 * setting (which ignores them all); note if you use JAXP to populate your 087 * DOM trees, it wants to save "noise" nodes by default. (Such nodes include 088 * ignorable whitespace, comments, entity references and CDATA boundaries.) 089 * Otherwise, your 090 * main worry will be if you use a SAX parser that doesn't flag ignorable 091 * whitespace unless it's validating (few don't). 092 * 093 * <p> The SAX2 events used as input must contain XML Names for elements 094 * and attributes, with original prefixes. In SAX2, 095 * this is optional unless the "namespace-prefixes" parser feature is set. 096 * Moreover, many application components won't provide completely correct 097 * structures anyway. <em>Before you convert a DOM to an output document, 098 * you should plan to postprocess it to create or repair such namespace 099 * information.</em> The {@link NSFilter} pipeline stage does such work. 100 * 101 * <p> <em>Note: changes late in DOM L2 process made it impractical to 102 * attempt to create the DocumentType node in any implementation-neutral way, 103 * much less to populate it (L1 didn't support even creating such nodes). 104 * To create and populate such a node, subclass the inner 105 * {@link DomConsumer.Handler} class and teach it about the backdoors into 106 * whatever DOM implementation you want. It's possible that some revised 107 * DOM API (L3?) will make this problem solvable again. </em> 108 * 109 * @see DomParser 110 * 111 * @author David Brownell 112 */ 113 public class DomConsumer implements EventConsumer 114 { 115 private Class<DomDocument> domImpl; 116 117 private boolean hidingCDATA = true; 118 private boolean hidingComments = true; 119 private boolean hidingWhitespace = true; 120 private boolean hidingReferences = true; 121 122 private Handler handler; 123 private ErrorHandler errHandler; 124 125 private EventConsumer next; 126 127 // FIXME: this can't be a generic pipeline stage just now, 128 // since its input became a Class not a String (to be turned 129 // into a class, using the right class loader) 130 131 132 /** 133 * Configures this pipeline terminus to use the specified implementation 134 * of DOM when constructing its result value. 135 * 136 * @param impl class implementing {@link org.w3c.dom.Document Document} 137 * which publicly exposes a default constructor 138 * 139 * @exception SAXException when there is a problem creating an 140 * empty DOM document using the specified implementation 141 */ 142 public DomConsumer (Class<DomDocument> impl) 143 throws SAXException 144 { 145 domImpl = impl; 146 handler = new Handler (this); 147 } 148 149 /** 150 * This is the hook through which a subclass provides a handler 151 * which knows how to access DOM extensions, specific to some 152 * implementation, to record additional data in a DOM. 153 * Treat this as part of construction; don't call it except 154 * before (or between) parses. 155 */ 156 protected void setHandler (Handler h) 157 { 158 handler = h; 159 } 160 161 162 private Document emptyDocument () 163 throws SAXException 164 { 165 try { 166 return domImpl.newInstance (); 167 } catch (IllegalAccessException e) { 168 throw new SAXException ("can't access constructor: " 169 + e.getMessage ()); 170 } catch (InstantiationException e) { 171 throw new SAXException ("can't instantiate Document: " 172 + e.getMessage ()); 173 } 174 } 175 176 177 /** 178 * Configures this consumer as a buffer/filter, using the specified 179 * DOM implementation when constructing its result value. 180 * 181 * <p> This event consumer acts as a buffer and filter, in that it 182 * builds a DOM tree and then writes it out when <em>endDocument</em> 183 * is invoked. Because of the limitations of DOM, much information 184 * will as a rule not be seen in that replay. To get a full fidelity 185 * copy of the input event stream, use a {@link TeeConsumer}. 186 * 187 * @param impl class implementing {@link org.w3c.dom.Document Document} 188 * which publicly exposes a default constructor 189 * @param next receives a "replayed" sequence of parse events when 190 * the <em>endDocument</em> method is invoked. 191 * 192 * @exception SAXException when there is a problem creating an 193 * empty DOM document using the specified DOM implementation 194 */ 195 public DomConsumer (Class impl, EventConsumer n) 196 throws SAXException 197 { 198 this (impl); 199 next = n; 200 } 201 202 203 /** 204 * Returns the document constructed from the preceding 205 * sequence of events. This method should not be 206 * used again until another sequence of events has been 207 * given to this EventConsumer. 208 */ 209 final public Document getDocument () 210 { 211 return handler.clearDocument (); 212 } 213 214 public void setErrorHandler (ErrorHandler handler) 215 { 216 errHandler = handler; 217 } 218 219 220 /** 221 * Returns true if the consumer is hiding entity references nodes 222 * (the default), and false if EntityReference nodes should 223 * instead be created. Such EntityReference nodes will normally be 224 * empty, unless an implementation arranges to populate them and then 225 * turn them back into readonly objects. 226 * 227 * @see #setHidingReferences 228 */ 229 final public boolean isHidingReferences () 230 { return hidingReferences; } 231 232 /** 233 * Controls whether the consumer will hide entity expansions, 234 * or will instead mark them with entity reference nodes. 235 * 236 * @see #isHidingReferences 237 * @param flag False if entity reference nodes will appear 238 */ 239 final public void setHidingReferences (boolean flag) 240 { hidingReferences = flag; } 241 242 243 /** 244 * Returns true if the consumer is hiding comments (the default), 245 * and false if they should be placed into the output document. 246 * 247 * @see #setHidingComments 248 */ 249 public final boolean isHidingComments () 250 { return hidingComments; } 251 252 /** 253 * Controls whether the consumer is hiding comments. 254 * 255 * @see #isHidingComments 256 */ 257 public final void setHidingComments (boolean flag) 258 { hidingComments = flag; } 259 260 261 /** 262 * Returns true if the consumer is hiding ignorable whitespace 263 * (the default), and false if such whitespace should be placed 264 * into the output document as children of element nodes. 265 * 266 * @see #setHidingWhitespace 267 */ 268 public final boolean isHidingWhitespace () 269 { return hidingWhitespace; } 270 271 /** 272 * Controls whether the consumer hides ignorable whitespace 273 * 274 * @see #isHidingComments 275 */ 276 public final void setHidingWhitespace (boolean flag) 277 { hidingWhitespace = flag; } 278 279 280 /** 281 * Returns true if the consumer is saving CDATA boundaries, or 282 * false (the default) otherwise. 283 * 284 * @see #setHidingCDATA 285 */ 286 final public boolean isHidingCDATA () 287 { return hidingCDATA; } 288 289 /** 290 * Controls whether the consumer will save CDATA boundaries. 291 * 292 * @see #isHidingCDATA 293 * @param flag True to treat CDATA text differently from other 294 * text nodes 295 */ 296 final public void setHidingCDATA (boolean flag) 297 { hidingCDATA = flag; } 298 299 300 301 /** Returns the document handler being used. */ 302 final public ContentHandler getContentHandler () 303 { return handler; } 304 305 /** Returns the DTD handler being used. */ 306 final public DTDHandler getDTDHandler () 307 { return handler; } 308 309 /** 310 * Returns the lexical handler being used. 311 * (DOM construction can't really use declaration handlers.) 312 */ 313 final public Object getProperty (String id) 314 throws SAXNotRecognizedException 315 { 316 if ("http://xml.org/sax/properties/lexical-handler".equals (id)) 317 return handler; 318 if ("http://xml.org/sax/properties/declaration-handler".equals (id)) 319 return handler; 320 throw new SAXNotRecognizedException (id); 321 } 322 323 EventConsumer getNext () { return next; } 324 325 ErrorHandler getErrorHandler () { return errHandler; } 326 327 /** 328 * Class used to intercept various parsing events and use them to 329 * populate a DOM document. Subclasses would typically know and use 330 * backdoors into specific DOM implementations, used to implement 331 * DTD-related functionality. 332 * 333 * <p> Note that if this ever throws a DOMException (runtime exception) 334 * that will indicate a bug in the DOM (e.g. doesn't support something 335 * per specification) or the parser (e.g. emitted an illegal name, or 336 * accepted illegal input data). </p> 337 */ 338 public static class Handler 339 implements ContentHandler, LexicalHandler, 340 DTDHandler, DeclHandler 341 { 342 protected DomConsumer consumer; 343 344 private DOMImplementation impl; 345 private Document document; 346 private boolean isL2; 347 348 private Locator locator; 349 private Node top; 350 private boolean inCDATA; 351 private boolean mergeCDATA; 352 private boolean inDTD; 353 private String currentEntity; 354 355 private boolean recreatedAttrs; 356 private AttributesImpl attributes = new AttributesImpl (); 357 358 /** 359 * Subclasses may use SAX2 events to provide additional 360 * behaviors in the resulting DOM. 361 */ 362 protected Handler (DomConsumer consumer) 363 throws SAXException 364 { 365 this.consumer = consumer; 366 document = consumer.emptyDocument (); 367 impl = document.getImplementation (); 368 isL2 = impl.hasFeature ("XML", "2.0"); 369 } 370 371 private void fatal (String message, Exception x) 372 throws SAXException 373 { 374 SAXParseException e; 375 ErrorHandler errHandler = consumer.getErrorHandler (); 376 377 if (locator == null) 378 e = new SAXParseException (message, null, null, -1, -1, x); 379 else 380 e = new SAXParseException (message, locator, x); 381 if (errHandler != null) 382 errHandler.fatalError (e); 383 throw e; 384 } 385 386 /** 387 * Returns and forgets the document produced. If the handler is 388 * reused, a new document may be created. 389 */ 390 Document clearDocument () 391 { 392 Document retval = document; 393 document = null; 394 locator = null; 395 return retval; 396 } 397 398 /** 399 * Returns the document under construction. 400 */ 401 protected Document getDocument () 402 { return document; } 403 404 /** 405 * Returns the current node being populated. This is usually 406 * an Element or Document, but it might be an EntityReference 407 * node if some implementation-specific code knows how to put 408 * those into the result tree and later mark them as readonly. 409 */ 410 protected Node getTop () 411 { return top; } 412 413 414 // SAX1 415 public void setDocumentLocator (Locator locator) 416 { 417 this.locator = locator; 418 } 419 420 // SAX1 421 public void startDocument () 422 throws SAXException 423 { 424 if (document == null) 425 try { 426 if (isL2) { 427 // couple to original implementation 428 document = impl.createDocument (null, "foo", null); 429 document.removeChild (document.getFirstChild ()); 430 } else { 431 document = consumer.emptyDocument (); 432 } 433 } catch (Exception e) { 434 fatal ("DOM create document", e); 435 } 436 top = document; 437 } 438 439 // ContentHandler2 440 public void xmlDecl(String version, 441 String encoding, 442 boolean standalone, 443 String inputEncoding) 444 throws SAXException 445 { 446 if (document != null) 447 { 448 //document.setXmlVersion(version); 449 //document.setXmlStandalone(standalone); 450 } 451 } 452 453 // SAX1 454 public void endDocument () 455 throws SAXException 456 { 457 try { 458 if (consumer.getNext () != null && document != null) { 459 DomParser parser = new DomParser (document); 460 461 EventFilter.bind (parser, consumer.getNext ()); 462 parser.parse ("ignored"); 463 } 464 } finally { 465 top = null; 466 } 467 } 468 469 // SAX1 470 public void processingInstruction (String target, String data) 471 throws SAXException 472 { 473 // we can't create populated entity ref nodes using 474 // only public DOM APIs (they've got to be readonly) 475 if (currentEntity != null) 476 return; 477 478 ProcessingInstruction pi; 479 480 if (isL2 481 // && consumer.isUsingNamespaces () 482 && target.indexOf (':') != -1) 483 namespaceError ( 484 "PI target name is namespace nonconformant: " 485 + target); 486 if (inDTD) 487 return; 488 pi = document.createProcessingInstruction (target, data); 489 top.appendChild (pi); 490 } 491 492 /** 493 * Subclasses may overrride this method to provide a more efficient 494 * way to construct text nodes. 495 * Typically, copying the text into a single character array will 496 * be more efficient than doing that as well as allocating other 497 * needed for a String, including an internal StringBuilder. 498 * Those additional memory and CPU costs can be incurred later, 499 * if ever needed. 500 * Unfortunately the standard DOM factory APIs encourage those costs 501 * to be incurred early. 502 */ 503 protected Text createText ( 504 boolean isCDATA, 505 char ch [], 506 int start, 507 int length 508 ) { 509 String value = new String (ch, start, length); 510 511 if (isCDATA) 512 return document.createCDATASection (value); 513 else 514 return document.createTextNode (value); 515 } 516 517 // SAX1 518 public void characters (char ch [], int start, int length) 519 throws SAXException 520 { 521 // we can't create populated entity ref nodes using 522 // only public DOM APIs (they've got to be readonly 523 // at creation time) 524 if (currentEntity != null) 525 return; 526 527 Node lastChild = top.getLastChild (); 528 529 // merge consecutive text or CDATA nodes if appropriate. 530 if (lastChild instanceof Text) { 531 if (consumer.isHidingCDATA () 532 // consecutive Text content ... always merge 533 || (!inCDATA 534 && !(lastChild instanceof CDATASection)) 535 // consecutive CDATASection content ... don't 536 // merge between sections, only within them 537 || (inCDATA && mergeCDATA 538 && lastChild instanceof CDATASection) 539 ) { 540 CharacterData last = (CharacterData) lastChild; 541 String value = new String (ch, start, length); 542 543 last.appendData (value); 544 return; 545 } 546 } 547 if (inCDATA && !consumer.isHidingCDATA ()) { 548 top.appendChild (createText (true, ch, start, length)); 549 mergeCDATA = true; 550 } else 551 top.appendChild (createText (false, ch, start, length)); 552 } 553 554 // SAX2 555 public void skippedEntity (String name) 556 throws SAXException 557 { 558 // this callback is useless except to report errors, since 559 // we can't know if the ref was in content, within an 560 // attribute, within a declaration ... only one of those 561 // cases supports more intelligent action than a panic. 562 fatal ("skipped entity: " + name, null); 563 } 564 565 // SAX2 566 public void startPrefixMapping (String prefix, String uri) 567 throws SAXException 568 { 569 // reconstruct "xmlns" attributes deleted by all 570 // SAX2 parsers without "namespace-prefixes" = true 571 if ("".equals (prefix)) 572 attributes.addAttribute ("", "", "xmlns", 573 "CDATA", uri); 574 else 575 attributes.addAttribute ("", "", "xmlns:" + prefix, 576 "CDATA", uri); 577 recreatedAttrs = true; 578 } 579 580 // SAX2 581 public void endPrefixMapping (String prefix) 582 throws SAXException 583 { } 584 585 // SAX2 586 public void startElement ( 587 String uri, 588 String localName, 589 String qName, 590 Attributes atts 591 ) throws SAXException 592 { 593 // we can't create populated entity ref nodes using 594 // only public DOM APIs (they've got to be readonly) 595 if (currentEntity != null) 596 return; 597 598 // parser discarded basic information; DOM tree isn't writable 599 // without massaging to assign prefixes to all nodes. 600 // the "NSFilter" class does that massaging. 601 if (qName.length () == 0) 602 qName = localName; 603 604 605 Element element; 606 int length = atts.getLength (); 607 608 if (!isL2) { 609 element = document.createElement (qName); 610 611 // first the explicit attributes ... 612 length = atts.getLength (); 613 for (int i = 0; i < length; i++) 614 element.setAttribute (atts.getQName (i), 615 atts.getValue (i)); 616 // ... then any recreated ones (DOM deletes duplicates) 617 if (recreatedAttrs) { 618 recreatedAttrs = false; 619 length = attributes.getLength (); 620 for (int i = 0; i < length; i++) 621 element.setAttribute (attributes.getQName (i), 622 attributes.getValue (i)); 623 attributes.clear (); 624 } 625 626 top.appendChild (element); 627 top = element; 628 return; 629 } 630 631 // For an L2 DOM when namespace use is enabled, use 632 // createElementNS/createAttributeNS except when 633 // (a) it's an element in the default namespace, or 634 // (b) it's an attribute with no prefix 635 String namespace; 636 637 if (localName.length () != 0) 638 namespace = (uri.length () == 0) ? null : uri; 639 else 640 namespace = getNamespace (getPrefix (qName), atts); 641 642 if (namespace == null) 643 element = document.createElement (qName); 644 else 645 element = document.createElementNS (namespace, qName); 646 647 populateAttributes (element, atts); 648 if (recreatedAttrs) { 649 recreatedAttrs = false; 650 // ... DOM deletes any duplicates 651 populateAttributes (element, attributes); 652 attributes.clear (); 653 } 654 655 top.appendChild (element); 656 top = element; 657 } 658 659 final static String xmlnsURI = "http://www.w3.org/2000/xmlns/"; 660 661 private void populateAttributes (Element element, Attributes attrs) 662 throws SAXParseException 663 { 664 int length = attrs.getLength (); 665 666 for (int i = 0; i < length; i++) { 667 String type = attrs.getType (i); 668 String value = attrs.getValue (i); 669 String name = attrs.getQName (i); 670 String local = attrs.getLocalName (i); 671 String uri = attrs.getURI (i); 672 673 // parser discarded basic information, DOM tree isn't writable 674 if (name.length () == 0) 675 name = local; 676 677 // all attribute types other than these three may not 678 // contain scoped names... enumerated attributes get 679 // reported as NMTOKEN, except for NOTATION values 680 if (!("CDATA".equals (type) 681 || "NMTOKEN".equals (type) 682 || "NMTOKENS".equals (type))) { 683 if (value.indexOf (':') != -1) { 684 namespaceError ( 685 "namespace nonconformant attribute value: " 686 + "<" + element.getNodeName () 687 + " " + name + "='" + value + "' ...>"); 688 } 689 } 690 691 // xmlns="" is legal (undoes default NS) 692 // xmlns:foo="" is illegal 693 String prefix = getPrefix (name); 694 String namespace; 695 696 if ("xmlns".equals (prefix)) { 697 if ("".equals (value)) 698 namespaceError ("illegal null namespace decl, " + name); 699 namespace = xmlnsURI; 700 } else if ("xmlns".equals (name)) 701 namespace = xmlnsURI; 702 703 else if (prefix == null) 704 namespace = null; 705 else if (!"".equals(uri) && uri.length () != 0) 706 namespace = uri; 707 else 708 namespace = getNamespace (prefix, attrs); 709 710 if (namespace == null) 711 element.setAttribute (name, value); 712 else 713 element.setAttributeNS (namespace, name, value); 714 } 715 } 716 717 private String getPrefix (String name) 718 { 719 int temp; 720 721 if ((temp = name.indexOf (':')) > 0) 722 return name.substring (0, temp); 723 return null; 724 } 725 726 // used with SAX1-level parser output 727 private String getNamespace (String prefix, Attributes attrs) 728 throws SAXParseException 729 { 730 String namespace; 731 String decl; 732 733 // defaulting 734 if (prefix == null) { 735 decl = "xmlns"; 736 namespace = attrs.getValue (decl); 737 if ("".equals (namespace)) 738 return null; 739 else if (namespace != null) 740 return namespace; 741 742 // "xmlns" is like a keyword 743 // ... according to the Namespace REC, but DOM L2 CR2+ 744 // and Infoset violate that by assigning a namespace. 745 // that conflict is resolved elsewhere. 746 } else if ("xmlns".equals (prefix)) 747 return null; 748 749 // "xml" prefix is fixed 750 else if ("xml".equals (prefix)) 751 return "http://www.w3.org/XML/1998/namespace"; 752 753 // otherwise, expect a declaration 754 else { 755 decl = "xmlns:" + prefix; 756 namespace = attrs.getValue (decl); 757 } 758 759 // if we found a local declaration, great 760 if (namespace != null) 761 return namespace; 762 763 764 // ELSE ... search up the tree we've been building 765 for (Node n = top; 766 n != null && n.getNodeType () != Node.DOCUMENT_NODE; 767 n = n.getParentNode ()) { 768 if (n.getNodeType () == Node.ENTITY_REFERENCE_NODE) 769 continue; 770 Element e = (Element) n; 771 Attr attr = e.getAttributeNode (decl); 772 if (attr != null) 773 return attr.getNodeValue (); 774 } 775 // see above re "xmlns" as keyword 776 if ("xmlns".equals (decl)) 777 return null; 778 779 namespaceError ("Undeclared namespace prefix: " + prefix); 780 return null; 781 } 782 783 // SAX2 784 public void endElement (String uri, String localName, String qName) 785 throws SAXException 786 { 787 // we can't create populated entity ref nodes using 788 // only public DOM APIs (they've got to be readonly) 789 if (currentEntity != null) 790 return; 791 792 top = top.getParentNode (); 793 } 794 795 // SAX1 (mandatory reporting if validating) 796 public void ignorableWhitespace (char ch [], int start, int length) 797 throws SAXException 798 { 799 if (consumer.isHidingWhitespace ()) 800 return; 801 characters (ch, start, length); 802 } 803 804 // SAX2 lexical event 805 public void startCDATA () 806 throws SAXException 807 { 808 inCDATA = true; 809 // true except for the first fragment of a cdata section 810 mergeCDATA = false; 811 } 812 813 // SAX2 lexical event 814 public void endCDATA () 815 throws SAXException 816 { 817 inCDATA = false; 818 } 819 820 // SAX2 lexical event 821 // 822 // this SAX2 callback merges two unrelated things: 823 // - Declaration of the root element type ... belongs with 824 // the other DTD declaration methods, NOT HERE. 825 // - IDs for the optional external subset ... belongs here 826 // with other lexical information. 827 // 828 // ...and it doesn't include the internal DTD subset, desired 829 // both to support DOM L2 and to enable "pass through" processing 830 // 831 public void startDTD (String name, String publicId, String SystemId) 832 throws SAXException 833 { 834 // need to filter out comments and PIs within the DTD 835 inDTD = true; 836 } 837 838 // SAX2 lexical event 839 public void endDTD () 840 throws SAXException 841 { 842 inDTD = false; 843 } 844 845 // SAX2 lexical event 846 public void comment (char ch [], int start, int length) 847 throws SAXException 848 { 849 Node comment; 850 851 // we can't create populated entity ref nodes using 852 // only public DOM APIs (they've got to be readonly) 853 if (consumer.isHidingComments () 854 || inDTD 855 || currentEntity != null) 856 return; 857 comment = document.createComment (new String (ch, start, length)); 858 top.appendChild (comment); 859 } 860 861 /** 862 * May be overridden by subclasses to return true, indicating 863 * that entity reference nodes can be populated and then made 864 * read-only. 865 */ 866 public boolean canPopulateEntityRefs () 867 { return false; } 868 869 // SAX2 lexical event 870 public void startEntity (String name) 871 throws SAXException 872 { 873 // are we ignoring what would be contents of an 874 // entity ref, since we can't populate it? 875 if (currentEntity != null) 876 return; 877 878 // Are we hiding all entity boundaries? 879 if (consumer.isHidingReferences ()) 880 return; 881 882 // SAX2 shows parameter entities; DOM hides them 883 if (name.charAt (0) == '%' || "[dtd]".equals (name)) 884 return; 885 886 // Since we can't create a populated entity ref node in any 887 // standard way, we create an unpopulated one. 888 EntityReference ref = document.createEntityReference (name); 889 top.appendChild (ref); 890 top = ref; 891 892 // ... allowing subclasses to populate them 893 if (!canPopulateEntityRefs ()) 894 currentEntity = name; 895 } 896 897 // SAX2 lexical event 898 public void endEntity (String name) 899 throws SAXException 900 { 901 if (name.charAt (0) == '%' || "[dtd]".equals (name)) 902 return; 903 if (name.equals (currentEntity)) 904 currentEntity = null; 905 if (!consumer.isHidingReferences ()) 906 top = top.getParentNode (); 907 } 908 909 910 // SAX1 DTD event 911 public void notationDecl ( 912 String name, 913 String publicId, String SystemId 914 ) throws SAXException 915 { 916 /* IGNORE -- no public DOM API lets us store these 917 * into the doctype node 918 */ 919 } 920 921 // SAX1 DTD event 922 public void unparsedEntityDecl ( 923 String name, 924 String publicId, String SystemId, 925 String notationName 926 ) throws SAXException 927 { 928 /* IGNORE -- no public DOM API lets us store these 929 * into the doctype node 930 */ 931 } 932 933 // SAX2 declaration event 934 public void elementDecl (String name, String model) 935 throws SAXException 936 { 937 /* IGNORE -- no content model support in DOM L2 */ 938 } 939 940 // SAX2 declaration event 941 public void attributeDecl ( 942 String eName, 943 String aName, 944 String type, 945 String mode, 946 String value 947 ) throws SAXException 948 { 949 /* IGNORE -- no attribute model support in DOM L2 */ 950 } 951 952 // SAX2 declaration event 953 public void internalEntityDecl (String name, String value) 954 throws SAXException 955 { 956 /* IGNORE -- no public DOM API lets us store these 957 * into the doctype node 958 */ 959 } 960 961 // SAX2 declaration event 962 public void externalEntityDecl ( 963 String name, 964 String publicId, 965 String SystemId 966 ) throws SAXException 967 { 968 /* IGNORE -- no public DOM API lets us store these 969 * into the doctype node 970 */ 971 } 972 973 // 974 // These really should offer the option of nonfatal handling, 975 // like other validity errors, though that would cause major 976 // chaos in the DOM data structures. DOM is already spec'd 977 // to treat many of these as fatal, so this is consistent. 978 // 979 private void namespaceError (String description) 980 throws SAXParseException 981 { 982 SAXParseException err; 983 984 err = new SAXParseException (description, locator); 985 throw err; 986 } 987 } 988 }