|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object fi.iki.hsivonen.htmlparser.HtmlParser
public final class HtmlParser
WARNING: This parser is incomplete. It does not perform tag inference, yet. It does not yet perform case folding for attribute value like method="POST".
Field Summary | |
---|---|
private boolean |
alreadyWarnedAboutPrivateUseCharacters
|
private static char[] |
APOS
|
private char[] |
astralChar
|
private char[] |
attrBuf
|
private int |
attrBufLen
|
private AttributesImpl |
attrs
|
private char[] |
bmpChar
|
private char[] |
buf
|
private int |
bufLen
|
private static int |
CASE_MASK
|
private int |
cdataState
|
private CharacterEncodingDeclarationFilter |
cedf
|
private ContentHandler |
ch
|
private int |
col
|
private int |
cstart
|
private DoctypeHandler |
doctypeHandler
|
private int |
doctypeMode
|
private boolean |
doctypeSeen
|
private DTDHandler |
dtdHandler
|
private EmptyElementFilter |
eef
|
private ErrorHandler |
eh
|
private XhtmlSaxEmitter |
emitter
|
private String |
encoding
|
private EntityResolver |
entityResolver
|
private boolean |
foldedAttributeValue
|
private boolean |
html5
|
private static int |
LEAD_OFFSET
|
private int |
line
|
private static char[] |
LT
|
private boolean |
nonWhiteSpaceAllowed
|
private NormalizationChecker |
normalizationChecker
|
private static char[] |
OCTYPE
|
private static int |
PCDATA
|
private ContentHandlerFilter |
pipelineLast
|
private int |
pos
|
private char |
prev
|
private String |
publicId
|
private Reader |
reader
|
private static int |
SCRIPT
|
private char[] |
strBuf
|
private int |
strBufLen
|
private InputStream |
stream
|
private static int |
STYLE
|
private static int |
SURROGATE_OFFSET
|
private String |
systemId
|
private TagInferenceFilter |
tif
|
private static char[] |
TML
|
private static char[] |
UBLIC
|
private boolean |
wasLt
|
Constructor Summary | |
---|---|
HtmlParser()
|
Method Summary | |
---|---|
private void |
appendAttrBuf(char c)
|
private void |
appendAttrBuf(char[] cs)
|
private void |
appendAttrBufAsciiLowerCase(char c)
|
private void |
appendAttrBufAsciiLowerCase(char[] cs)
|
private void |
appendStrBuf(char c)
|
private void |
appendStrBufAsciiLowerCase(char c)
|
private String |
attrBufToString()
|
private void |
cannotDetermineEncoding()
|
private void |
cdataStateEnd(String gi)
|
private void |
checkPublicAndSystemIds(String publicId,
String systemId)
|
private void |
clearAttrBuf()
|
private void |
clearStrBuf()
|
private char |
consumeAttribute(char c)
|
private boolean |
consumeCaseInsensitiveAsciiLetterString(char[] str)
|
private char[] |
consumeCharRef()
|
private char |
consumeComment()
|
private void |
consumeDoctype()
|
private void |
consumeEndTag()
|
private char[] |
consumeEntityRef(char c)
|
private void |
consumeMarkup()
|
private void |
consumeMarkupDecl()
|
private char[] |
consumeNCR()
|
private void |
consumePI()
|
private void |
consumeQuotedAttributeValue(char delim)
|
private void |
consumeStartTag(char c)
|
private void |
doctypeNotOk()
|
private Reader |
draconianInputStreamReader(String encoding,
InputStream stream,
boolean requireAsciiSuperset)
|
private void |
err(String message)
|
private void |
fatal(String message)
|
private void |
fatalIfAttributeExists(String name)
|
private void |
flushChars()
|
int |
getColumnNumber()
|
ContentHandler |
getContentHandler()
|
DoctypeHandler |
getDoctypeHandler()
Returns the doctypeHandler. |
int |
getDoctypeMode()
Returns the doctypeMode. |
DTDHandler |
getDTDHandler()
|
EntityResolver |
getEntityResolver()
|
ErrorHandler |
getErrorHandler()
|
boolean |
getFeature(String key)
|
int |
getLineNumber()
|
Object |
getProperty(String key)
|
String |
getPublicId()
|
String |
getSystemId()
|
private boolean |
isAstralPrivateUse(int c)
|
private boolean |
isForbidden(char c)
|
private boolean |
isNameChar(char c)
|
private boolean |
isNameStart(char c)
|
private boolean |
isNonCharacter(int c)
|
private boolean |
isPrivateUse(char c)
|
private boolean |
isUnquotedAttributeChar(char c)
|
private boolean |
isWhiteSpace(char c)
|
private void |
maybeBeginCdata(String gi)
|
private char |
next()
|
private char |
nextAfterZeroOrMoreWhiteSpace()
|
private char |
nextMayEnd()
|
private void |
parse()
|
void |
parse(InputSource is)
|
void |
parse(String url)
|
void |
refireStart()
|
private void |
sawHtml5Doctype()
|
void |
setContentHandler(ContentHandler ch)
|
void |
setDoctypeHandler(DoctypeHandler doctypeHandler)
Sets the doctypeHandler. |
void |
setDoctypeMode(int doctypeMode)
Sets the doctypeMode. |
void |
setDTDHandler(DTDHandler handler)
|
(package private) void |
setEncoding(String enc)
|
void |
setEntityResolver(EntityResolver entityResolver)
|
void |
setErrorHandler(ErrorHandler eh)
|
void |
setFeature(String key,
boolean value)
|
(package private) void |
setNonWhiteSpaceAllowed(boolean allow)
|
void |
setProperty(String key,
Object value)
|
private String |
strBufToString()
|
private void |
streamSetup(InputSource is)
|
private String |
unescapedStringUntil(char delim)
|
private void |
warn(String message)
|
private void |
warnAboutPrivateUseChar()
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
private static final int CASE_MASK
private static final int LEAD_OFFSET
private static final int SURROGATE_OFFSET
private static final char[] LT
private static final char[] APOS
private static final char[] OCTYPE
private static final char[] TML
private static final char[] UBLIC
private static final int PCDATA
private static final int SCRIPT
private static final int STYLE
private String publicId
private String systemId
private boolean nonWhiteSpaceAllowed
private int cdataState
private ErrorHandler eh
private ContentHandler ch
private DoctypeHandler doctypeHandler
private XhtmlSaxEmitter emitter
private Reader reader
private int pos
private int cstart
private char[] buf
private int bufLen
private int line
private int col
private boolean doctypeSeen
private int doctypeMode
private boolean html5
private char prev
private boolean wasLt
private char[] strBuf
private int strBufLen
private char[] attrBuf
private int attrBufLen
private AttributesImpl attrs
private char[] bmpChar
private char[] astralChar
private DTDHandler dtdHandler
private EmptyElementFilter eef
private TagInferenceFilter tif
private CharacterEncodingDeclarationFilter cedf
private ContentHandlerFilter pipelineLast
private EntityResolver entityResolver
private String encoding
private InputStream stream
private boolean foldedAttributeValue
private boolean alreadyWarnedAboutPrivateUseCharacters
private NormalizationChecker normalizationChecker
Constructor Detail |
---|
public HtmlParser()
Method Detail |
---|
private void clearStrBuf()
private void appendStrBufAsciiLowerCase(char c) throws SAXException, IOException
SAXException
IOException
private void appendStrBuf(char c) throws SAXException, IOException
SAXException
IOException
private String strBufToString()
private void clearAttrBuf()
private void appendAttrBuf(char c) throws SAXException, IOException
SAXException
IOException
private void appendAttrBufAsciiLowerCase(char c) throws SAXException, IOException
SAXException
IOException
private void appendAttrBuf(char[] cs) throws SAXException, IOException
cs
-
SAXException
IOException
private void appendAttrBufAsciiLowerCase(char[] cs) throws SAXException, IOException
cs
-
SAXException
IOException
private String attrBufToString()
private void parse() throws SAXException, IOException
SAXException
IOException
private void doctypeNotOk() throws SAXException, IOException
SAXException
IOException
private boolean isWhiteSpace(char c)
c
-
private char[] consumeCharRef() throws SAXException, IOException
SAXException
IOException
private char[] consumeEntityRef(char c) throws SAXException, IOException
c
-
SAXException
IOException
private char[] consumeNCR() throws SAXException, IOException
SAXException
IOException
private void consumeMarkup() throws SAXException, IOException
SAXException
IOException
private char next() throws SAXException, IOException
SAXException
IOException
private void consumeStartTag(char c) throws SAXException, IOException
c
-
SAXException
IOException
private void maybeBeginCdata(String gi)
gi
- private char consumeAttribute(char c) throws SAXException, IOException
c
-
SAXException
IOException
private boolean isUnquotedAttributeChar(char c)
c
-
private void consumeQuotedAttributeValue(char delim) throws SAXException, IOException
c
-
SAXException
IOException
private void fatalIfAttributeExists(String name) throws SAXException, IOException
name
-
SAXException
IOException
private boolean isNameStart(char c)
c
-
private boolean isNameChar(char c)
c
-
private void consumeEndTag() throws SAXException, IOException
SAXException
IOException
private void cdataStateEnd(String gi) throws SAXException, IOException
gi
-
SAXException
IOException
private char nextAfterZeroOrMoreWhiteSpace() throws SAXException, IOException
SAXException
IOException
private void consumePI() throws SAXException, IOException
SAXException
IOException
private void consumeMarkupDecl() throws SAXException, IOException
SAXException
IOException
private void consumeDoctype() throws SAXException, IOException
SAXException
IOException
private void sawHtml5Doctype() throws SAXException
SAXException
private void checkPublicAndSystemIds(String publicId, String systemId) throws SAXException, IOException
publicId
- systemId
-
SAXException
IOException
private String unescapedStringUntil(char delim) throws SAXException, IOException
c
-
SAXException
IOException
private boolean consumeCaseInsensitiveAsciiLetterString(char[] str) throws SAXException, IOException
SAXException
IOException
private char consumeComment() throws SAXException, IOException
SAXException
IOException
private char nextMayEnd() throws SAXException, IOException
SAXException
IOException
private void warnAboutPrivateUseChar() throws SAXException
SAXException
private boolean isPrivateUse(char c)
private boolean isAstralPrivateUse(int c)
private boolean isNonCharacter(int c)
intVal
-
private boolean isForbidden(char c)
c
-
private void flushChars() throws SAXException, IOException
SAXException
IOException
private void fatal(String message) throws SAXException
SAXException
SAXParseException
private void err(String message) throws SAXException
string
-
SAXException
private void warn(String message) throws SAXException
string
-
SAXException
public String getPublicId()
getPublicId
in interface Locator
Locator.getPublicId()
public String getSystemId()
getSystemId
in interface Locator
Locator.getSystemId()
public int getLineNumber()
getLineNumber
in interface Locator
Locator.getLineNumber()
public int getColumnNumber()
getColumnNumber
in interface Locator
Locator.getColumnNumber()
public boolean getFeature(String key) throws SAXNotRecognizedException, SAXNotSupportedException
getFeature
in interface XMLReader
SAXNotRecognizedException
SAXNotSupportedException
XMLReader.getFeature(java.lang.String)
public void setFeature(String key, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException
setFeature
in interface XMLReader
SAXNotRecognizedException
SAXNotSupportedException
XMLReader.setFeature(java.lang.String, boolean)
public Object getProperty(String key) throws SAXNotRecognizedException, SAXNotSupportedException
getProperty
in interface XMLReader
SAXNotRecognizedException
SAXNotSupportedException
XMLReader.getProperty(java.lang.String)
public void setProperty(String key, Object value) throws SAXNotRecognizedException, SAXNotSupportedException
setProperty
in interface XMLReader
SAXNotRecognizedException
SAXNotSupportedException
XMLReader.setProperty(java.lang.String,
java.lang.Object)
public void setEntityResolver(EntityResolver entityResolver)
setEntityResolver
in interface XMLReader
XMLReader.setEntityResolver(org.xml.sax.EntityResolver)
public EntityResolver getEntityResolver()
getEntityResolver
in interface XMLReader
XMLReader.getEntityResolver()
public void setDTDHandler(DTDHandler handler)
setDTDHandler
in interface XMLReader
XMLReader.setDTDHandler(org.xml.sax.DTDHandler)
public DTDHandler getDTDHandler()
getDTDHandler
in interface XMLReader
XMLReader.getDTDHandler()
public void setContentHandler(ContentHandler ch)
setContentHandler
in interface XMLReader
XMLReader.setContentHandler(org.xml.sax.ContentHandler)
public ContentHandler getContentHandler()
getContentHandler
in interface XMLReader
XMLReader.getContentHandler()
public void setErrorHandler(ErrorHandler eh)
setErrorHandler
in interface XMLReader
XMLReader.setErrorHandler(org.xml.sax.ErrorHandler)
public ErrorHandler getErrorHandler()
getErrorHandler
in interface XMLReader
XMLReader.getErrorHandler()
public void parse(InputSource is) throws IOException, SAXException
parse
in interface XMLReader
IOException
SAXException
XMLReader.parse(org.xml.sax.InputSource)
private void streamSetup(InputSource is) throws SAXException, IOException
is
- swallowBom
-
IOException
SAXException
private void cannotDetermineEncoding() throws IOException
IOException
private Reader draconianInputStreamReader(String encoding, InputStream stream, boolean requireAsciiSuperset) throws SAXException
SAXException
public void parse(String url) throws IOException, SAXException
parse
in interface XMLReader
IOException
SAXException
XMLReader.parse(java.lang.String)
void setEncoding(String enc) throws SAXException
string
-
SAXException
void setNonWhiteSpaceAllowed(boolean allow)
public int getDoctypeMode()
public void setDoctypeMode(int doctypeMode)
doctypeMode
- the doctypeMode to setpublic DoctypeHandler getDoctypeHandler()
public void setDoctypeHandler(DoctypeHandler doctypeHandler)
doctypeHandler
- the doctypeHandler to setpublic void refireStart() throws SAXException
SAXException
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |