fi.iki.hsivonen.gnu.xml.aelfred2
Class XmlParser

java.lang.Object
  extended by fi.iki.hsivonen.gnu.xml.aelfred2.XmlParser

final class XmlParser
extends Object

Parse XML documents and return parse events through call-backs. Use the SAXDriver class as your entry point, as all internal parser interfaces are subject to change.

Author:
Written by David Megginson <dmeggins@microstar.com> (version 1.2a with bugfixes), Updated by David Brownell <dbrownell@users.sourceforge.net>
See Also:
SAXDriver

Nested Class Summary
(package private) static class XmlParser.AttributeDecl
           
(package private) static class XmlParser.ElementDecl
           
(package private) static class XmlParser.EntityInfo
           
(package private) static class XmlParser.ExternalIdentifiers
           
(package private) static class XmlParser.Input
           
 
Field Summary
private  boolean alreadyWarnedAboutPrivateUseCharacters
           
static int ATTRIBUTE_DEFAULT_FIXED
          Constant: the attribute was declared #FIXED.
static int ATTRIBUTE_DEFAULT_IMPLIED
          Constant: the attribute was declared #IMPLIED.
static int ATTRIBUTE_DEFAULT_REQUIRED
          Constant: the attribute was declared #REQUIRED.
static int ATTRIBUTE_DEFAULT_SPECIFIED
          Constant: the attribute has a literal default value specified.
static int ATTRIBUTE_DEFAULT_UNDECLARED
          Constant: the attribute is not declared.
private  String characterEncoding
           
private  int column
           
static int CONTENT_ANY
          Constant: the element has a content model of ANY.
static int CONTENT_ELEMENTS
          Constant: the element has element content.
static int CONTENT_EMPTY
          Constant: the element has declared content of EMPTY.
static int CONTENT_MIXED
          Constant: the element has mixed content.
static int CONTENT_UNDECLARED
          Constant: an element has not been declared.
private static int CONTEXT_LITERAL
           
private static int CONTEXT_NORMAL
           
private  int currentByteCount
           
private  String currentElement
           
private  int currentElementContent
           
private static int DATA_BUFFER_INITIAL
           
private  char[] dataBuffer
           
private  int dataBufferPos
           
private  boolean docIsStandalone
           
private  boolean doReport
           
private  HashMap<String,XmlParser.ElementDecl> elementInfo
           
(package private) static char[] endDelimCDATA
           
(package private) static char[] endDelimComment
           
(package private) static char[] endDelimPI
           
static int ENTITY_INTERNAL
          Constant: the entity is internal.
static int ENTITY_NDATA
          Constant: the entity is external, non-parsable data.
static int ENTITY_TEXT
          Constant: the entity is external XML data.
static int ENTITY_UNDECLARED
          Constant: the entity has not been declared.
private  HashMap<String,XmlParser.EntityInfo> entityInfo
           
private  LinkedList<String> entityStack
           
private  boolean expandPE
           
private  SAXDriver handler
           
private  boolean inCDATA
           
private  boolean inLiteral
           
private static int INPUT_INTERNAL
           
private static int INPUT_NONE
           
private static int INPUT_READER
           
private  LinkedList<XmlParser.Input> inputStack
           
private  InputStream is
           
private  boolean isDirtyCurrentElement
           
private  int line
           
private static int LIT_ATTRIBUTE
           
private static int LIT_DISABLE_CREF
           
private static int LIT_DISABLE_EREF
           
private static int LIT_DISABLE_PE
           
private static int LIT_ENTITY_REF
           
private static int LIT_NORMALIZE
           
private static int LIT_PUBID
           
private static int NAME_BUFFER_INITIAL
           
private  char[] nameBuffer
           
private  int nameBufferPos
           
private  NormalizationChecker normalizationChecker
           
private  HashMap<String,String> notationInfo
           
private  boolean peIsError
           
private  char prev
           
private  byte[] rawReadBuffer
           
private static int READ_BUFFER_MAX
           
private  char[] readBuffer
           
private  int readBufferLength
           
private  int readBufferOverflow
           
private  int readBufferPos
           
private  Reader reader
           
private  boolean sawCR
           
private  InputSource scratch
           
private  boolean skippedPE
           
private  int sourceType
           
(package private) static char[] startDelimComment
           
(package private) static char[] startDelimPI
           
private static int SURROGATE_OFFSET
           
private static int SYMBOL_TABLE_LENGTH
           
private  Object[][] symbolTable
           
private  int tagAttributePos
           
private  String[] tagAttributes
           
(package private) static boolean uriWarnings
           
private static boolean USE_CHEATS
           
private static int XML_10
           
private static int XML_11
           
private  int xmlVersion
           
 
Constructor Summary
XmlParser()
          Construct a new parser with no associated handler.
 
Method Summary
private  void checkEncodingLiteral(String encodingName)
           
private  void checkEncodingMatch(String used, String detected)
           
private  void checkLegalVersion(String version)
           
private  void dataBufferAppend(char c)
          Add a character to the data buffer.
private  void dataBufferAppend(char[] ch, int start, int length)
          Append (part of) a character array to the data buffer.
private  void dataBufferAppend(String s)
          Add a string to the data buffer.
private  void dataBufferFlush()
          Flush the contents of the data buffer to the handler, as appropriate, and reset the buffer for new input.
private  void dataBufferNormalize()
          Normalise space characters in the data buffer.
private  String dataBufferToString()
          Convert the data buffer to a string.
 Iterator<String> declaredAttributes(String elname)
          Get the declared attributes for an element type.
private  Iterator<String> declaredAttributes(XmlParser.ElementDecl element)
          Get the declared attributes for an element type.
private  void detectEncoding()
          Attempt to detect the encoding of an entity.
(package private)  void doParse(String systemId, String publicId, Reader reader, InputStream stream, String encoding)
          Parse an XML document from the character stream, byte stream, or URI that you provide (in that order of preference).
private  void draconianInputStreamReader(String encoding, InputStream stream, boolean requireAsciiSuperset)
           
private  void draconianInputStreamReader(String encoding, InputStream stream, boolean requireAsciiSuperset, String actualName)
           
private  void err(String message)
          Report non-fatal errors.
private  Object extendArray(Object array, int currentSize, int requiredSize)
          Ensure the capacity of an array, allocating a new one if necessary.
private  void fatal(String message)
          Report typical case fatal errors.
private  void fatal(String message, char textFound, String textExpected)
          Report a serious error.
private  void fatal(String message, String textFound, String textExpected)
          Report an error.
private  void filterCR(boolean moreData)
          Filter carriage returns in the read buffer.
private  XmlParser.AttributeDecl getAttribute(String elName, String name)
          Retrieve the attribute declaration for the given element name and name.
 String getAttributeDefaultValue(String name, String aname)
          Retrieve the default value of a declared attribute.
 int getAttributeDefaultValueType(String name, String aname)
          Retrieve the default value mode of a declared attribute.
 String getAttributeEnumeration(String name, String aname)
          Retrieve the allowed values for an enumerated attribute type.
 String getAttributeType(String name, String aname)
          Retrieve the declared type of an attribute.
 int getColumnNumber()
          Return the current column number.
private  int getContentType(XmlParser.ElementDecl element, int defaultType)
           
private  HashMap<String,XmlParser.AttributeDecl> getElementAttributes(String name)
          Look up the attribute hash table for an element.
 int getElementContentType(String name)
          Look up the content type of an element.
 XmlParser.ExternalIdentifiers getEntityIds(String ename)
          Return an external entity's identifiers.
 int getEntityType(String ename)
          Find the type of an entity.
 String getEntityValue(String ename)
          Return an internal entity's replacement text.
 int getLineNumber()
          Return the current line number.
private  void initializeVariables()
          Re-initialize the variables for each parse.
 String intern(char[] ch, int start, int length)
          Create an interned string from a character array.
private  boolean isAstralPrivateUse(int c)
           
private static boolean isExtender(char c)
           
private  boolean isNonCharacter(int c)
           
private  boolean isPrivateUse(char c)
           
private  boolean isPrivateUse(int c)
           
(package private)  boolean isStandalone()
           
private  boolean isWhitespace(char c)
          Test if a character is whitespace.
private  void parseAttDef(String elementName)
          Parse a single attribute definition.
private  void parseAttlistDecl()
          Parse an attribute list declaration.
private  void parseAttribute(String name)
          Parse an attribute assignment.
private  void parseCDSect()
          Parse a CDATA section.
private  void parseCharData()
          Parse character data.
private  void parseCharRef()
           
private  void parseCharRef(boolean doFlush)
          Read and interpret a character reference.
private  void parseComment()
          Skip a comment.
private  void parseConditionalSect(char[] saved)
          Parse a conditional section.
private  void parseContent()
          Parse the content of an element.
private  void parseContentspec(String name)
          Content specification.
private  void parseCp()
          Parse a content particle.
private  void parseDefault(String elementName, String name, String type, String enumer)
          Parse the default value for an attribute.
private  void parseDoctypedecl()
          Parse a document type declaration.
private  void parseDocument()
          Parse an XML document.
private  void parseElement(boolean maybeGetSubset)
          Parse an element, with its tags.
private  void parseElementDecl()
          Parse an element type declaration.
private  void parseElements(char[] saved)
          Parse an element-content model.
private  void parseEntityDecl()
          Parse an entity declaration.
private  void parseEntityRef(boolean externalAllowed)
          Parse and expand an entity reference.
private  void parseEnumeration(boolean isNames)
          Parse an enumeration.
private  void parseEq()
          Parse an equals sign surrounded by optional whitespace.
private  void parseETag()
          Parse an end tag.
private  void parseMarkupdecl()
          Parse a markup declaration in the internal or external DTD subset.
private  void parseMisc()
          Parse miscellaneous markup outside the document element and DOCTYPE declaration.
private  void parseMixed(char[] saved)
          Parse mixed content.
private  void parseNotationDecl()
          Parse a notation declaration.
private  void parseNotationType()
          Parse a notation type for an attribute.
private  void parsePEReference()
          Parse and expand a parameter entity reference.
private  void parsePI()
          Parse a processing instruction and do a call-back.
private  boolean parseProlog()
          Parse the prolog of an XML document.
private  String parseTextDecl(String encoding)
          Parse a text declaration.
private  void parseUntil(char[] delim)
           
private  String parseXMLDecl(String encoding)
          Parse the XML declaration.
private  void popInput()
          Restore a previous input source.
private  void prefetchASCIIEncodingDecl()
          Prefetch US-ASCII XML/text decl from input stream into read buffer.
private  void pushCharArray(String ename, char[] ch, int start, int length)
          Push a new internal input source.
private  void pushInput(String ename)
          Save the current input source onto the stack.
private  void pushString(String ename, String s)
          This method pushes a string back onto input.
private  void pushURL(boolean isPE, String ename, XmlParser.ExternalIdentifiers ids, Reader aReader, InputStream aStream, String aEncoding, boolean doResolve)
          Push, or skip, a new external input source.
private  String readAttType()
          Parse the attribute type.
private  char readCh()
          Read a single character from the readBuffer.
private  void readDataChunk()
          Read a chunk of data from an external input source.
private  XmlParser.ExternalIdentifiers readExternalIds(boolean inNotation, boolean isSubset)
          Try reading external identifiers.
private  String readLiteral(int flags)
          Read a literal.
private  String readNmtoken(boolean isName)
          Read a name or (when parsing an enumeration) name token.
private  void require(char delim)
          Require a character to appear, or throw an exception.
private  void require(String delim)
          Require a string to appear, or throw an exception.
private  void requireWhitespace()
          Require whitespace characters.
private  void setAttribute(String elName, String name, String type, String enumeration, String value, int valueType)
          Register an attribute declaration for later retrieval.
private  void setElement(String name, int contentType, String contentModel, HashMap<String,XmlParser.AttributeDecl> attributes)
          Register an element.
private  void setExternalEntity(String eName, int eClass, XmlParser.ExternalIdentifiers ids, String nName)
          Register an external entity declaration for later retrieval.
(package private)  void setHandler(SAXDriver handler)
          Set the handler that will receive parsing events.
private  void setInternalEntity(String eName, String value)
          Register an entity declaration for later retrieval.
private  void setNotation(String nname, XmlParser.ExternalIdentifiers ids)
          Report a notation declaration, checking for duplicates.
private  void skipWhitespace()
          Skip whitespace characters.
private static boolean tryEncoding(byte[] sig, byte b1, byte b2)
          Check for a two-byte signature.
private static boolean tryEncoding(byte[] sig, byte b1, byte b2, byte b3, byte b4)
          Check for a four-byte signature.
private  String tryEncodingDecl(String encoding)
          Check for an encoding declaration.
private  boolean tryRead(char delim)
          Return true if we can read the expected character.
private  boolean tryRead(char[] ch)
           
private  boolean tryRead(String delim)
          Return true if we can read the expected string.
private  void tryReadCharRef()
          Try to read a character reference without consuming data from buffer.
private  boolean tryWhitespace()
          Return true if we can read some whitespace.
private  void unread(char c)
          Push a single character back onto the current input stream.
private  void unread(char[] ch, int length)
          Push a char array back onto the current input stream.
private  void warnAboutLackOfEncodingDecl(String encoding)
           
private  void warnAboutPrivateUseChar()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

USE_CHEATS

private static final boolean USE_CHEATS
See Also:
Constant Field Values

SURROGATE_OFFSET

private static final int SURROGATE_OFFSET
See Also:
Constant Field Values

CONTENT_UNDECLARED

public static final int CONTENT_UNDECLARED
Constant: an element has not been declared.

See Also:
getElementContentType(java.lang.String), Constant Field Values

CONTENT_ANY

public static final int CONTENT_ANY
Constant: the element has a content model of ANY.

See Also:
getElementContentType(java.lang.String), Constant Field Values

CONTENT_EMPTY

public static final int CONTENT_EMPTY
Constant: the element has declared content of EMPTY.

See Also:
getElementContentType(java.lang.String), Constant Field Values

CONTENT_MIXED

public static final int CONTENT_MIXED
Constant: the element has mixed content.

See Also:
getElementContentType(java.lang.String), Constant Field Values

CONTENT_ELEMENTS

public static final int CONTENT_ELEMENTS
Constant: the element has element content.

See Also:
getElementContentType(java.lang.String), Constant Field Values

ENTITY_UNDECLARED

public static final int ENTITY_UNDECLARED
Constant: the entity has not been declared.

See Also:
getEntityType(java.lang.String), Constant Field Values

ENTITY_INTERNAL

public static final int ENTITY_INTERNAL
Constant: the entity is internal.

See Also:
getEntityType(java.lang.String), Constant Field Values

ENTITY_NDATA

public static final int ENTITY_NDATA
Constant: the entity is external, non-parsable data.

See Also:
getEntityType(java.lang.String), Constant Field Values

ENTITY_TEXT

public static final int ENTITY_TEXT
Constant: the entity is external XML data.

See Also:
getEntityType(java.lang.String), Constant Field Values

ATTRIBUTE_DEFAULT_UNDECLARED

public static final int ATTRIBUTE_DEFAULT_UNDECLARED
Constant: the attribute is not declared.

See Also:
getAttributeDefaultValueType(java.lang.String, java.lang.String), Constant Field Values

ATTRIBUTE_DEFAULT_SPECIFIED

public static final int ATTRIBUTE_DEFAULT_SPECIFIED
Constant: the attribute has a literal default value specified.

See Also:
getAttributeDefaultValueType(java.lang.String, java.lang.String), getAttributeDefaultValue(java.lang.String, java.lang.String), Constant Field Values

ATTRIBUTE_DEFAULT_IMPLIED

public static final int ATTRIBUTE_DEFAULT_IMPLIED
Constant: the attribute was declared #IMPLIED.

See Also:
getAttributeDefaultValueType(java.lang.String, java.lang.String), Constant Field Values

ATTRIBUTE_DEFAULT_REQUIRED

public static final int ATTRIBUTE_DEFAULT_REQUIRED
Constant: the attribute was declared #REQUIRED.

See Also:
getAttributeDefaultValueType(java.lang.String, java.lang.String), Constant Field Values

ATTRIBUTE_DEFAULT_FIXED

public static final int ATTRIBUTE_DEFAULT_FIXED
Constant: the attribute was declared #FIXED.

See Also:
getAttributeDefaultValueType(java.lang.String, java.lang.String), getAttributeDefaultValue(java.lang.String, java.lang.String), Constant Field Values

INPUT_NONE

private static final int INPUT_NONE
See Also:
Constant Field Values

INPUT_INTERNAL

private static final int INPUT_INTERNAL
See Also:
Constant Field Values

INPUT_READER

private static final int INPUT_READER
See Also:
Constant Field Values

LIT_ENTITY_REF

private static final int LIT_ENTITY_REF
See Also:
Constant Field Values

LIT_NORMALIZE

private static final int LIT_NORMALIZE
See Also:
Constant Field Values

LIT_ATTRIBUTE

private static final int LIT_ATTRIBUTE
See Also:
Constant Field Values

LIT_DISABLE_PE

private static final int LIT_DISABLE_PE
See Also:
Constant Field Values

LIT_DISABLE_CREF

private static final int LIT_DISABLE_CREF
See Also:
Constant Field Values

LIT_DISABLE_EREF

private static final int LIT_DISABLE_EREF
See Also:
Constant Field Values

LIT_PUBID

private static final int LIT_PUBID
See Also:
Constant Field Values

CONTEXT_NORMAL

private static final int CONTEXT_NORMAL
See Also:
Constant Field Values

CONTEXT_LITERAL

private static final int CONTEXT_LITERAL
See Also:
Constant Field Values

uriWarnings

static boolean uriWarnings

handler

private SAXDriver handler

reader

private Reader reader

is

private InputStream is

line

private int line

column

private int column

sourceType

private int sourceType

inputStack

private LinkedList<XmlParser.Input> inputStack

characterEncoding

private String characterEncoding

currentByteCount

private int currentByteCount

scratch

private InputSource scratch

readBuffer

private char[] readBuffer

readBufferPos

private int readBufferPos

readBufferLength

private int readBufferLength

readBufferOverflow

private int readBufferOverflow

READ_BUFFER_MAX

private static final int READ_BUFFER_MAX
See Also:
Constant Field Values

rawReadBuffer

private byte[] rawReadBuffer

DATA_BUFFER_INITIAL

private static int DATA_BUFFER_INITIAL

dataBuffer

private char[] dataBuffer

dataBufferPos

private int dataBufferPos

NAME_BUFFER_INITIAL

private static int NAME_BUFFER_INITIAL

nameBuffer

private char[] nameBuffer

nameBufferPos

private int nameBufferPos

docIsStandalone

private boolean docIsStandalone

elementInfo

private HashMap<String,XmlParser.ElementDecl> elementInfo

entityInfo

private HashMap<String,XmlParser.EntityInfo> entityInfo

notationInfo

private HashMap<String,String> notationInfo

skippedPE

private boolean skippedPE

currentElement

private String currentElement

currentElementContent

private int currentElementContent

entityStack

private LinkedList<String> entityStack

inLiteral

private boolean inLiteral

expandPE

private boolean expandPE

peIsError

private boolean peIsError

doReport

private boolean doReport

SYMBOL_TABLE_LENGTH

private static final int SYMBOL_TABLE_LENGTH
See Also:
Constant Field Values

symbolTable

private Object[][] symbolTable

tagAttributes

private String[] tagAttributes

tagAttributePos

private int tagAttributePos

sawCR

private boolean sawCR

inCDATA

private boolean inCDATA

XML_10

private static final int XML_10
See Also:
Constant Field Values

XML_11

private static final int XML_11
See Also:
Constant Field Values

xmlVersion

private int xmlVersion

normalizationChecker

private NormalizationChecker normalizationChecker

startDelimComment

static final char[] startDelimComment

endDelimComment

static final char[] endDelimComment

startDelimPI

static final char[] startDelimPI

endDelimPI

static final char[] endDelimPI

endDelimCDATA

static final char[] endDelimCDATA

isDirtyCurrentElement

private boolean isDirtyCurrentElement

alreadyWarnedAboutPrivateUseCharacters

private boolean alreadyWarnedAboutPrivateUseCharacters

prev

private char prev
Constructor Detail

XmlParser

XmlParser()
Construct a new parser with no associated handler.

See Also:
setHandler(fi.iki.hsivonen.gnu.xml.aelfred2.SAXDriver), #parse
Method Detail

setHandler

void setHandler(SAXDriver handler)
Set the handler that will receive parsing events.

Parameters:
handler - The handler to receive callback events.
See Also:
#parse

doParse

void doParse(String systemId,
             String publicId,
             Reader reader,
             InputStream stream,
             String encoding)
       throws Exception
Parse an XML document from the character stream, byte stream, or URI that you provide (in that order of preference). Any URI that you supply will become the base URI for resolving relative URI, and may be used to acquire a reader or byte stream.

Only one thread at a time may use this parser; since it is private to this package, post-parse cleanup is done by the caller, which MUST NOT REUSE the parser (just null it).

Parameters:
systemId - Absolute URI of the document; should never be null, but may be so iff a reader or a stream is provided.
publicId - The public identifier of the document, or null.
reader - A character stream; must be null if stream isn't.
stream - A byte input stream; must be null if reader isn't.
characterEncoding - The suggested encoding, or null if unknown.
Throws:
Exception - Basically SAXException or IOException

fatal

private void fatal(String message,
                   String textFound,
                   String textExpected)
            throws SAXException
Report an error.

Parameters:
message - The error message.
textFound - The text that caused the error (or null).
Throws:
SAXException
See Also:
SAXDriver#error, line

fatal

private void fatal(String message,
                   char textFound,
                   String textExpected)
            throws SAXException
Report a serious error.

Parameters:
message - The error message.
textFound - The text that caused the error (or null).
Throws:
SAXException

fatal

private void fatal(String message)
            throws SAXException
Report typical case fatal errors.

Throws:
SAXException

err

private void err(String message)
          throws SAXException
Report non-fatal errors.

Throws:
SAXException

parseDocument

private void parseDocument()
                    throws Exception
Parse an XML document.
 [1] document ::= prolog element Misc*
 

This is the top-level parsing function for a single XML document. As a minimum, a well-formed document must have a document element, and a valid document must have a prolog (one with doctype) as well.

Throws:
Exception

parseComment

private void parseComment()
                   throws Exception
Skip a comment.
 [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
 

(The <!-- has already been read.)

Throws:
Exception

parsePI

private void parsePI()
              throws SAXException,
                     IOException
Parse a processing instruction and do a call-back.
 [16] PI ::= '<?' PITarget
    (S (Char* - (Char* '?>' Char*)))?
    '?>'
 [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') )
 

(The <? has already been read.)

Throws:
SAXException
IOException

parseCDSect

private void parseCDSect()
                  throws Exception
Parse a CDATA section.
 [18] CDSect ::= CDStart CData CDEnd
 [19] CDStart ::= '<![CDATA['
 [20] CData ::= (Char* - (Char* ']]>' Char*))
 [21] CDEnd ::= ']]>'
 

(The '<![CDATA[' has already been read.)

Throws:
Exception

parseProlog

private boolean parseProlog()
                     throws Exception
Parse the prolog of an XML document.
 [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?
 

We do not look for the XML declaration here, because it was handled by pushURL ().

Returns:
true if a DTD was read.
Throws:
Exception
See Also:
pushURL

checkLegalVersion

private void checkLegalVersion(String version)
                        throws SAXException
Throws:
SAXException

parseXMLDecl

private String parseXMLDecl(String encoding)
                     throws SAXException,
                            IOException
Parse the XML declaration.
 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
 [24] VersionInfo ::= S 'version' Eq
    ("'" VersionNum "'" | '"' VersionNum '"' )
 [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')*
 [32] SDDecl ::= S 'standalone' Eq
    ( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' )
 [80] EncodingDecl ::= S 'encoding' Eq
    ( "'" EncName "'" | "'" EncName "'" )
 [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
 

(The <?xml and whitespace have already been read.)

Returns:
the encoding in the declaration, uppercased; or null
Throws:
SAXException
IOException
See Also:
parseTextDecl(java.lang.String), #setupDecoding

checkEncodingLiteral

private void checkEncodingLiteral(String encodingName)
                           throws SAXException
Throws:
SAXException

parseTextDecl

private String parseTextDecl(String encoding)
                      throws SAXException,
                             IOException
Parse a text declaration.
 [79] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
 [80] EncodingDecl ::= S 'encoding' Eq
    ( '"' EncName '"' | "'" EncName "'" )
 [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
 

(The <?xml' and whitespace have already been read.)

Returns:
the encoding in the declaration, uppercased; or null
Throws:
SAXException
IOException
See Also:
parseXMLDecl(java.lang.String), #setupDecoding

checkEncodingMatch

private void checkEncodingMatch(String used,
                                String detected)
                         throws SAXException
Throws:
SAXException

draconianInputStreamReader

private void draconianInputStreamReader(String encoding,
                                        InputStream stream,
                                        boolean requireAsciiSuperset)
                                 throws SAXException,
                                        IOException
Throws:
SAXException
IOException

draconianInputStreamReader

private void draconianInputStreamReader(String encoding,
                                        InputStream stream,
                                        boolean requireAsciiSuperset,
                                        String actualName)
                                 throws SAXException,
                                        IOException
Throws:
SAXException
IOException

parseMisc

private void parseMisc()
                throws Exception
Parse miscellaneous markup outside the document element and DOCTYPE declaration.
 [27] Misc ::= Comment | PI | S
 

Throws:
Exception

parseDoctypedecl

private void parseDoctypedecl()
                       throws Exception
Parse a document type declaration.
 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
    ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
 

(The <!DOCTYPE has already been read.)

Throws:
Exception

parseMarkupdecl

private void parseMarkupdecl()
                      throws Exception
Parse a markup declaration in the internal or external DTD subset.
 [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl
    | NotationDecl | PI | Comment
 [30] extSubsetDecl ::= (markupdecl | conditionalSect
    | PEReference | S) *
 

Reading toplevel PE references is handled as a lexical issue by the caller, as is whitespace.

Throws:
Exception

parseElement

private void parseElement(boolean maybeGetSubset)
                   throws Exception
Parse an element, with its tags.
 [39] element ::= EmptyElementTag | STag content ETag
 [40] STag ::= '<' Name (S Attribute)* S? '>'
 [44] EmptyElementTag ::= '<' Name (S Attribute)* S? '/>'
 

(The '<' has already been read.)

NOTE: this method actually chains onto parseContent (), if necessary, and parseContent () will take care of calling parseETag ().

Throws:
Exception

parseAttribute

private void parseAttribute(String name)
                     throws Exception
Parse an attribute assignment.
 [41] Attribute ::= Name Eq AttValue
 

Parameters:
name - The name of the attribute's element.
Throws:
Exception
See Also:
SAXDriver.attribute(java.lang.String, java.lang.String, boolean)

parseEq

private void parseEq()
              throws SAXException,
                     IOException
Parse an equals sign surrounded by optional whitespace.
 [25] Eq ::= S? '=' S?
 

Throws:
SAXException
IOException

parseETag

private void parseETag()
                throws Exception
Parse an end tag.
 [42] ETag ::= ''
 

NOTE: parseContent () chains to here, we already read the "</".

Throws:
Exception

parseContent

private void parseContent()
                   throws Exception
Parse the content of an element.
 [43] content ::= (element | CharData | Reference
    | CDSect | PI | Comment)*
 [67] Reference ::= EntityRef | CharRef
 

NOTE: consumes ETtag.

Throws:
Exception

parseElementDecl

private void parseElementDecl()
                       throws Exception
Parse an element type declaration.
 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
 

NOTE: the '<!ELEMENT' has already been read.

Throws:
Exception

parseContentspec

private void parseContentspec(String name)
                       throws Exception
Content specification.
 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements
 

Throws:
Exception

parseElements

private void parseElements(char[] saved)
                    throws Exception
Parse an element-content model.
 [47] elements ::= (choice | seq) ('?' | '*' | '+')?
 [49] choice ::= '(' S? cp (S? '|' S? cp)+ S? ')'
 [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
 

NOTE: the opening '(' and S have already been read.

Parameters:
saved - Buffer for entity that should have the terminal ')'
Throws:
Exception

parseCp

private void parseCp()
              throws Exception
Parse a content particle.
 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
 

Throws:
Exception

parseMixed

private void parseMixed(char[] saved)
                 throws Exception
Parse mixed content.
 [51] Mixed ::= '(' S? ( '#PCDATA' (S? '|' S? Name)*) S? ')*'
        | '(' S? ('#PCDATA') S? ')'
 

Parameters:
saved - Buffer for entity that should have the terminal ')'
Throws:
Exception

parseAttlistDecl

private void parseAttlistDecl()
                       throws Exception
Parse an attribute list declaration.
 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
 

NOTE: the '<!ATTLIST' has already been read.

Throws:
Exception

parseAttDef

private void parseAttDef(String elementName)
                  throws Exception
Parse a single attribute definition.
 [53] AttDef ::= S Name S AttType S DefaultDecl
 

Throws:
Exception

readAttType

private String readAttType()
                    throws Exception
Parse the attribute type.
 [54] AttType ::= StringType | TokenizedType | EnumeratedType
 [55] StringType ::= 'CDATA'
 [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY'
    | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
 [57] EnumeratedType ::= NotationType | Enumeration
 

Throws:
Exception

parseEnumeration

private void parseEnumeration(boolean isNames)
                       throws Exception
Parse an enumeration.
 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
 

NOTE: the '(' has already been read.

Throws:
Exception

parseNotationType

private void parseNotationType()
                        throws Exception
Parse a notation type for an attribute.
 [58] NotationType ::= 'NOTATION' S '(' S? NameNtoks
    (S? '|' S? name)* S? ')'
 

NOTE: the 'NOTATION' has already been read

Throws:
Exception

parseDefault

private void parseDefault(String elementName,
                          String name,
                          String type,
                          String enumer)
                   throws Exception
Parse the default value for an attribute.
 [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
    | (('#FIXED' S)? AttValue)
 

Throws:
Exception

parseConditionalSect

private void parseConditionalSect(char[] saved)
                           throws Exception
Parse a conditional section.
 [61] conditionalSect ::= includeSect || ignoreSect
 [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
    extSubsetDecl ']]>'
 [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
    ignoreSectContents* ']]>'
 [64] ignoreSectContents ::= Ignore
    ('<![' ignoreSectContents* ']]>' Ignore )*
 [65] Ignore ::= Char* - (Char* ( '<![' | ']]>') Char* )
 

NOTE: the '>![' has already been read.

Throws:
Exception

parseCharRef

private void parseCharRef()
                   throws SAXException,
                          IOException
Throws:
SAXException
IOException

tryReadCharRef

private void tryReadCharRef()
                     throws SAXException,
                            IOException
Try to read a character reference without consuming data from buffer.
 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
 

NOTE: the '&#' has already been read.

Throws:
SAXException
IOException

parseCharRef

private void parseCharRef(boolean doFlush)
                   throws SAXException,
                          IOException
Read and interpret a character reference.
 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
 

NOTE: the '&#' has already been read.

Throws:
SAXException
IOException

parseEntityRef

private void parseEntityRef(boolean externalAllowed)
                     throws SAXException,
                            IOException
Parse and expand an entity reference.
 [68] EntityRef ::= '&' Name ';'
 

NOTE: the '&' has already been read.

Parameters:
externalAllowed - External entities are allowed here.
Throws:
SAXException
IOException

parsePEReference

private void parsePEReference()
                       throws SAXException,
                              IOException
Parse and expand a parameter entity reference.
 [69] PEReference ::= '%' Name ';'
 

NOTE: the '%' has already been read.

Throws:
SAXException
IOException

parseEntityDecl

private void parseEntityDecl()
                      throws Exception
Parse an entity declaration.
 [70] EntityDecl ::= GEDecl | PEDecl
 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
 [74] PEDef ::= EntityValue | ExternalID
 [75] ExternalID ::= 'SYSTEM' S SystemLiteral
       | 'PUBLIC' S PubidLiteral S SystemLiteral
 [76] NDataDecl ::= S 'NDATA' S Name
 

NOTE: the '<!ENTITY' has already been read.

Throws:
Exception

parseNotationDecl

private void parseNotationDecl()
                        throws Exception
Parse a notation declaration.
 [82] NotationDecl ::= '<!NOTATION' S Name S
    (ExternalID | PublicID) S? '>'
 [83] PublicID ::= 'PUBLIC' S PubidLiteral
 

NOTE: the '<!NOTATION' has already been read.

Throws:
Exception

parseCharData

private void parseCharData()
                    throws Exception
Parse character data.
 [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
 

Throws:
Exception

requireWhitespace

private void requireWhitespace()
                        throws SAXException,
                               IOException
Require whitespace characters.

Throws:
SAXException
IOException

skipWhitespace

private void skipWhitespace()
                     throws SAXException,
                            IOException
Skip whitespace characters.
 [3] S ::= (#x20 | #x9 | #xd | #xa)+
 

Throws:
SAXException
IOException

readNmtoken

private String readNmtoken(boolean isName)
                    throws SAXException,
                           IOException
Read a name or (when parsing an enumeration) name token.
 [5] Name ::= (Letter | '_' | ':') (NameChar)*
 [7] Nmtoken ::= (NameChar)+
 

Throws:
SAXException
IOException

isExtender

private static boolean isExtender(char c)

readLiteral

private String readLiteral(int flags)
                    throws SAXException,
                           IOException
Read a literal. With matching single or double quotes as delimiters (and not embedded!) this is used to parse:
  [9] EntityValue ::= ... ([^%&] | PEReference | Reference)* ...
  [10] AttValue ::= ... ([^<&] | Reference)* ...
  [11] SystemLiteral ::= ... (URLchar - "'")* ...
  [12] PubidLiteral ::= ... (PubidChar - "'")* ...
 
as well as the quoted strings in XML and text declarations (for version, encoding, and standalone) which have their own constraints.

Throws:
SAXException
IOException

readExternalIds

private XmlParser.ExternalIdentifiers readExternalIds(boolean inNotation,
                                                      boolean isSubset)
                                               throws Exception
Try reading external identifiers. A system identifier is not required for notations.

Parameters:
inNotation - Are we parsing a notation decl?
isSubset - Parsing external subset decl (may be omitted)?
Returns:
A three-member String array containing the identifiers, or nulls. Order: public, system, baseURI.
Throws:
Exception

isWhitespace

private final boolean isWhitespace(char c)
Test if a character is whitespace.
 [3] S ::= (#x20 | #x9 | #xd | #xa)+
 

Parameters:
c - The character to test.
Returns:
true if the character is whitespace.

dataBufferAppend

private void dataBufferAppend(char c)
Add a character to the data buffer.


dataBufferAppend

private void dataBufferAppend(String s)
Add a string to the data buffer.


dataBufferAppend

private void dataBufferAppend(char[] ch,
                              int start,
                              int length)
Append (part of) a character array to the data buffer.


dataBufferNormalize

private void dataBufferNormalize()
Normalise space characters in the data buffer.


dataBufferToString

private String dataBufferToString()
Convert the data buffer to a string.


dataBufferFlush

private void dataBufferFlush()
                      throws SAXException
Flush the contents of the data buffer to the handler, as appropriate, and reset the buffer for new input.

Throws:
SAXException

require

private void require(String delim)
              throws SAXException,
                     IOException
Require a string to appear, or throw an exception.

Precondition: Entity expansion is not required.

Precondition: data buffer has no characters that will get sent to the application.

Throws:
SAXException
IOException

require

private void require(char delim)
              throws SAXException,
                     IOException
Require a character to appear, or throw an exception.

Throws:
SAXException
IOException

intern

public String intern(char[] ch,
                     int start,
                     int length)
Create an interned string from a character array. Ælfred uses this method to create an interned version of all names and name tokens, so that it can test equality with == instead of String.equals ().

This is much more efficient than constructing a non-interned string first, and then interning it.

Parameters:
ch - an array of characters for building the string.
start - the starting position in the array.
length - the number of characters to place in the string.
Returns:
an interned string.
See Also:
(String), String.intern()

extendArray

private Object extendArray(Object array,
                           int currentSize,
                           int requiredSize)
Ensure the capacity of an array, allocating a new one if necessary. Usually extends only for name hash collisions.


isStandalone

boolean isStandalone()

getContentType

private int getContentType(XmlParser.ElementDecl element,
                           int defaultType)

getElementContentType

public int getElementContentType(String name)
Look up the content type of an element.

Parameters:
name - The element type name.
Returns:
An integer constant representing the content type.
See Also:
CONTENT_UNDECLARED, CONTENT_ANY, CONTENT_EMPTY, CONTENT_MIXED, CONTENT_ELEMENTS

setElement

private void setElement(String name,
                        int contentType,
                        String contentModel,
                        HashMap<String,XmlParser.AttributeDecl> attributes)
                 throws SAXException
Register an element. Array format: [0] element type name [1] content model (mixed, elements only) [2] attribute hash table

Throws:
SAXException

getElementAttributes

private HashMap<String,XmlParser.AttributeDecl> getElementAttributes(String name)
Look up the attribute hash table for an element. The hash table is the second item in the element array.


declaredAttributes

private Iterator<String> declaredAttributes(XmlParser.ElementDecl element)
Get the declared attributes for an element type.

Parameters:
elname - The name of the element type.
Returns:
An iterator over all the attributes declared for a specific element type. The results will be valid only after the DTD (if any) has been parsed.
See Also:
getAttributeType(java.lang.String, java.lang.String), getAttributeEnumeration(java.lang.String, java.lang.String), getAttributeDefaultValueType(java.lang.String, java.lang.String), getAttributeDefaultValue(java.lang.String, java.lang.String), #getAttributeExpandedValue

declaredAttributes

public Iterator<String> declaredAttributes(String elname)
Get the declared attributes for an element type.

Parameters:
elname - The name of the element type.
Returns:
An iterator over all the attributes declared for a specific element type. The results will be valid only after the DTD (if any) has been parsed.
See Also:
getAttributeType(java.lang.String, java.lang.String), getAttributeEnumeration(java.lang.String, java.lang.String), getAttributeDefaultValueType(java.lang.String, java.lang.String), getAttributeDefaultValue(java.lang.String, java.lang.String), #getAttributeExpandedValue

getAttributeType

public String getAttributeType(String name,
                               String aname)
Retrieve the declared type of an attribute.

Parameters:
name - The name of the associated element.
aname - The name of the attribute.
Returns:
An interend string denoting the type, or null indicating an undeclared attribute.

getAttributeEnumeration

public String getAttributeEnumeration(String name,
                                      String aname)
Retrieve the allowed values for an enumerated attribute type.

Parameters:
name - The name of the associated element.
aname - The name of the attribute.
Returns:
A string containing the token list.

getAttributeDefaultValue

public String getAttributeDefaultValue(String name,
                                       String aname)
Retrieve the default value of a declared attribute.

Parameters:
name - The name of the associated element.
aname - The name of the attribute.
Returns:
The default value, or null if the attribute was #IMPLIED or simply undeclared and unspecified.
See Also:
#getAttributeExpandedValue

getAttributeDefaultValueType

public int getAttributeDefaultValueType(String name,
                                        String aname)
Retrieve the default value mode of a declared attribute.

See Also:
ATTRIBUTE_DEFAULT_SPECIFIED, ATTRIBUTE_DEFAULT_IMPLIED, ATTRIBUTE_DEFAULT_REQUIRED, ATTRIBUTE_DEFAULT_FIXED

setAttribute

private void setAttribute(String elName,
                          String name,
                          String type,
                          String enumeration,
                          String value,
                          int valueType)
                   throws Exception
Register an attribute declaration for later retrieval. Format: - String type - String default value - int value type - enumeration - processed default value

Throws:
Exception

getAttribute

private XmlParser.AttributeDecl getAttribute(String elName,
                                             String name)
Retrieve the attribute declaration for the given element name and name.


getEntityType

public int getEntityType(String ename)
Find the type of an entity.

See Also:
ENTITY_UNDECLARED, ENTITY_INTERNAL, ENTITY_NDATA, ENTITY_TEXT

getEntityIds

public XmlParser.ExternalIdentifiers getEntityIds(String ename)
Return an external entity's identifiers.

Parameters:
ename - The name of the external entity.
Returns:
The entity's public identifier, system identifier, and base URI. Null if the entity was not declared as an external entity.
See Also:
getEntityType(java.lang.String)

getEntityValue

public String getEntityValue(String ename)
Return an internal entity's replacement text.

Parameters:
ename - The name of the internal entity.
Returns:
The entity's replacement text, or null if the entity was not declared as an internal entity.
See Also:
getEntityType(java.lang.String)

setInternalEntity

private void setInternalEntity(String eName,
                               String value)
                        throws SAXException
Register an entity declaration for later retrieval.

Throws:
SAXException

setExternalEntity

private void setExternalEntity(String eName,
                               int eClass,
                               XmlParser.ExternalIdentifiers ids,
                               String nName)
Register an external entity declaration for later retrieval.


setNotation

private void setNotation(String nname,
                         XmlParser.ExternalIdentifiers ids)
                  throws SAXException
Report a notation declaration, checking for duplicates.

Throws:
SAXException

getLineNumber

public int getLineNumber()
Return the current line number.


getColumnNumber

public int getColumnNumber()
Return the current column number.


readCh

private char readCh()
             throws SAXException,
                    IOException
Read a single character from the readBuffer.

The readDataChunk () method maintains the buffer.

If we hit the end of an entity, try to pop the stack and keep going.

(This approach doesn't really enforce XML's rules about entity boundaries, but this is not currently a validating parser).

This routine also attempts to keep track of the current position in external entities, but it's not entirely accurate.

Returns:
The next available input character.
Throws:
SAXException
IOException
See Also:
(char), readDataChunk(), readBuffer, line

unread

private void unread(char c)
             throws SAXException
Push a single character back onto the current input stream.

This method usually pushes the character back onto the readBuffer.

I don't think that this would ever be called with readBufferPos = 0, because the methods always reads a character before unreading it, but just in case, I've added a boundary condition.

Parameters:
c - The character to push back.
Throws:
SAXException
See Also:
readCh(), (char[]), readBuffer

unread

private void unread(char[] ch,
                    int length)
             throws SAXException
Push a char array back onto the current input stream.

NOTE: you must never push back characters that you haven't actually read: use pushString () instead.

Throws:
SAXException
See Also:
readCh(), (char), readBuffer, pushString(java.lang.String, java.lang.String)

pushURL

private void pushURL(boolean isPE,
                     String ename,
                     XmlParser.ExternalIdentifiers ids,
                     Reader aReader,
                     InputStream aStream,
                     String aEncoding,
                     boolean doResolve)
              throws SAXException,
                     IOException
Push, or skip, a new external input source. The source will be some kind of parsed entity, such as a PE (including the external DTD subset) or content for the body.

Parameters:
url - The java.net.URL object for the entity.
Throws:
SAXException
IOException
See Also:
SAXDriver.resolveEntity(boolean, java.lang.String, org.xml.sax.InputSource, java.lang.String), pushString(java.lang.String, java.lang.String), sourceType, pushInput(java.lang.String), detectEncoding(), sourceType, readBuffer

tryEncodingDecl

private String tryEncodingDecl(String encoding)
                        throws SAXException,
                               IOException
Check for an encoding declaration. This is the second part of the XML encoding autodetection algorithm, relying on detectEncoding to get to the point that this part can read any encoding declaration in the document (using only US-ASCII characters).

Because this part starts to fill parser buffers with this data, it's tricky to setup a reader so that Java's built-in decoders can be used for the character encodings that aren't built in to this parser (such as EUC-JP, KOI8-R, Big5, etc).

Returns:
any encoding in the declaration, uppercased; or null
Throws:
SAXException
IOException
See Also:
detectEncoding

warnAboutLackOfEncodingDecl

private void warnAboutLackOfEncodingDecl(String encoding)
                                  throws SAXException
Parameters:
characterEncoding -
Throws:
SAXException

detectEncoding

private void detectEncoding()
                     throws SAXException,
                            IOException
Attempt to detect the encoding of an entity.

The trick here (as suggested in the XML standard) is that any entity not in UTF-8, or in UCS-2 with a byte-order mark, must begin with an XML declaration or an encoding declaration; we simply have to look for "<?xml" in various encodings.

This method has no way to distinguish among 8-bit encodings. Instead, it sets up for UTF-8, then (possibly) revises its assumption later in setupDecoding (). Any ASCII-derived 8-bit encoding should work, but most will be rejected later by setupDecoding ().

Throws:
SAXException
IOException
See Also:
(byte[], byte, byte, byte, byte), (byte[], byte, byte), #setupDecoding

tryEncoding

private static boolean tryEncoding(byte[] sig,
                                   byte b1,
                                   byte b2,
                                   byte b3,
                                   byte b4)
Check for a four-byte signature.

Utility routine for detectEncoding ().

Always looks for some part of "

Parameters:
sig - The first four bytes read.
b1 - The first byte of the signature
b2 - The second byte of the signature
b3 - The third byte of the signature
b4 - The fourth byte of the signature
See Also:
detectEncoding()

tryEncoding

private static boolean tryEncoding(byte[] sig,
                                   byte b1,
                                   byte b2)
Check for a two-byte signature.

Looks for a UCS-2 byte-order mark.

Utility routine for detectEncoding ().

Parameters:
sig - The first four bytes read.
b1 - The first byte of the signature
b2 - The second byte of the signature
See Also:
detectEncoding()

pushString

private void pushString(String ename,
                        String s)
                 throws SAXException
This method pushes a string back onto input.

It is useful either as the expansion of an internal entity, or for backtracking during the parse.

Call pushCharArray () to do the actual work.

Parameters:
s - The string to push back onto input.
Throws:
SAXException
See Also:
pushCharArray(java.lang.String, char[], int, int)

pushCharArray

private void pushCharArray(String ename,
                           char[] ch,
                           int start,
                           int length)
                    throws SAXException
Push a new internal input source.

This method is useful for expanding an internal entity, or for unreading a string of characters. It creates a new readBuffer containing the characters in the array, instead of characters converted from an input byte stream.

Parameters:
ch - The char array to push.
Throws:
SAXException
See Also:
pushString(java.lang.String, java.lang.String), pushURL(boolean, java.lang.String, fi.iki.hsivonen.gnu.xml.aelfred2.XmlParser.ExternalIdentifiers, java.io.Reader, java.io.InputStream, java.lang.String, boolean), readBuffer, sourceType, pushInput(java.lang.String)

pushInput

private void pushInput(String ename)
                throws SAXException
Save the current input source onto the stack.

This method saves all of the global variables associated with the current input source, so that they can be restored when a new input source has finished. It also tests for entity recursion.

The method saves the following global variables onto a stack using a fixed-length array:

  1. sourceType
  2. externalEntity
  3. readBuffer
  4. readBufferPos
  5. readBufferLength
  6. line
  7. characterEncoding

Parameters:
ename - The name of the entity (if any) causing the new input.
Throws:
SAXException
See Also:
popInput(), sourceType, #externalEntity, readBuffer, readBufferPos, readBufferLength, line, characterEncoding

popInput

private void popInput()
               throws SAXException,
                      IOException
Restore a previous input source.

This method restores all of the global variables associated with the current input source.

Throws:
EOFException - If there are no more entries on the input stack.
SAXException
IOException
See Also:
pushInput(java.lang.String), sourceType, readBuffer, readBufferPos, readBufferLength, line, characterEncoding

tryRead

private boolean tryRead(char delim)
                 throws SAXException,
                        IOException
Return true if we can read the expected character.

Note that the character will be removed from the input stream on success, but will be put back on failure. Do not attempt to read the character again if the method succeeds.

Parameters:
delim - The character that should appear next. For a insensitive match, you must supply this in upper-case.
Returns:
true if the character was successfully read, or false if it was not.
Throws:
SAXException
IOException
See Also:
(String)

tryRead

private boolean tryRead(String delim)
                 throws SAXException,
                        IOException
Return true if we can read the expected string.

This is simply a convenience method.

Note that the string will be removed from the input stream on success, but will be put back on failure. Do not attempt to read the string again if the method succeeds.

This method will push back a character rather than an array whenever possible (probably the majority of cases).

Parameters:
delim - The string that should appear next.
Returns:
true if the string was successfully read, or false if it was not.
Throws:
SAXException
IOException
See Also:
(char)

tryRead

private boolean tryRead(char[] ch)
                 throws SAXException,
                        IOException
Throws:
SAXException
IOException

tryWhitespace

private boolean tryWhitespace()
                       throws SAXException,
                              IOException
Return true if we can read some whitespace.

This is simply a convenience method.

This method will push back a character rather than an array whenever possible (probably the majority of cases).

Returns:
true if whitespace was found.
Throws:
SAXException
IOException

parseUntil

private void parseUntil(char[] delim)
                 throws SAXException,
                        IOException
Throws:
SAXException
IOException

prefetchASCIIEncodingDecl

private void prefetchASCIIEncodingDecl()
                                throws SAXException,
                                       IOException
Prefetch US-ASCII XML/text decl from input stream into read buffer. Doesn't buffer more than absolutely needed, so that when an encoding decl says we need to create an InputStreamReader, we can discard our buffer and reset(). Caller knows the first chars of the decl exist in the input stream.

Throws:
SAXException
IOException

readDataChunk

private void readDataChunk()
                    throws SAXException,
                           IOException
Read a chunk of data from an external input source.

This is simply a front-end that fills the rawReadBuffer with bytes, then calls the appropriate encoding handler.

Throws:
SAXException
IOException
See Also:
characterEncoding, rawReadBuffer, readBuffer, filterCR(boolean), #copyUtf8ReadBuffer, #copyIso8859_1ReadBuffer, #copyUcs_2ReadBuffer, #copyUcs_4ReadBuffer

filterCR

private void filterCR(boolean moreData)
Filter carriage returns in the read buffer. CRLF becomes LF; CR becomes LF.

Parameters:
moreData - true iff more data might come from the same source
See Also:
readDataChunk(), readBuffer, readBufferOverflow

warnAboutPrivateUseChar

private void warnAboutPrivateUseChar()
                              throws SAXException
Throws:
SAXException

isPrivateUse

private boolean isPrivateUse(char c)

isPrivateUse

private boolean isPrivateUse(int c)

isAstralPrivateUse

private boolean isAstralPrivateUse(int c)

isNonCharacter

private boolean isNonCharacter(int c)

initializeVariables

private void initializeVariables()
Re-initialize the variables for each parse.