|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object fi.iki.hsivonen.util.MetadataExtractor
public class MetadataExtractor
Field Summary | |
---|---|
private static char |
ANY
|
private static Pattern |
charsetPat
|
private String |
contentType
|
private static boolean |
DEBUG
Do we print the debug information |
private static char |
DIGIT
|
private Document |
doc
|
private String |
encoding
|
private static String[] |
extensions
|
private static String[] |
extensionTypes
|
private File |
file
|
private String |
fileName
|
private boolean |
guessedContentType
|
private boolean |
hasCSSCharset
|
private static char[][] |
magicNumbers
|
private static String[] |
magicTypes
|
private static int |
MAX_MAGIC_LENGTH
|
private static String[] |
mimeTypeNames
|
private static String[] |
mimeTypes
|
private String |
title
|
Constructor Summary | |
---|---|
MetadataExtractor(File f)
|
|
MetadataExtractor(File f,
String fileName)
Creates a new instance of MetadataExtractor |
Method Summary | |
---|---|
private void |
checkEncoding()
|
private boolean |
couldBeUTF8()
|
private void |
determineTypeFromExtension()
|
private void |
determineTypeFromMagic()
|
private void |
determineTypeFromXML()
|
private void |
determineTypeFromZip()
|
private void |
extractMetaFromHTML()
|
private void |
extractMetaFromPDF()
|
String |
getContentType()
Getter for property contentType. |
String |
getContentTypeWithParams()
|
Document |
getDOM()
|
String |
getEncoding()
Getter for property encoding |
boolean |
getGuessedContentType()
Getter for property guessedContentType. |
static String[] |
getMimeTypeNames()
|
static String[] |
getMimeTypes()
|
String |
getTitle()
|
private void |
guessEncoding()
|
static void |
main(String[] args)
|
private String |
normalizeWhiteSpace(String text)
|
private void |
setTitle(String title)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
private static boolean DEBUG
private static Pattern charsetPat
private File file
private String contentType
private String title
private Document doc
private String encoding
private String fileName
private boolean hasCSSCharset
private boolean guessedContentType
private static final char ANY
private static final char DIGIT
private static final int MAX_MAGIC_LENGTH
private static char[][] magicNumbers
private static String[] magicTypes
private static String[] extensions
private static String[] extensionTypes
private static String[] mimeTypes
private static String[] mimeTypeNames
Constructor Detail |
---|
public MetadataExtractor(File f) throws IOException
IOException
public MetadataExtractor(File f, String fileName) throws IOException
IOException
Method Detail |
---|
public static void main(String[] args) throws Exception
Exception
private void checkEncoding() throws IOException
IOException
private void determineTypeFromMagic() throws IOException
IOException
private void guessEncoding() throws IOException
IOException
private boolean couldBeUTF8() throws IOException
IOException
private void determineTypeFromXML() throws IOException
IOException
private void determineTypeFromZip() throws IOException
IOException
private void determineTypeFromExtension()
private void extractMetaFromHTML() throws IOException
IOException
private void extractMetaFromPDF() throws IOException
IOException
private String normalizeWhiteSpace(String text)
private void setTitle(String title)
public String getContentType()
public String getEncoding()
public String getContentTypeWithParams()
public boolean getGuessedContentType()
public String getTitle()
public Document getDOM()
public static String[] getMimeTypes()
public static String[] getMimeTypeNames()
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |