001    package fi.iki.hsivonen.io;
002    
003    import java.io.ByteArrayInputStream;
004    import java.io.IOException;
005    import java.io.InputStreamReader;
006    import java.io.Reader;
007    import java.nio.charset.Charset;
008    import java.nio.charset.CharsetDecoder;
009    import java.nio.charset.CodingErrorAction;
010    import java.util.Arrays;
011    import java.util.Iterator;
012    import java.util.Map;
013    import java.util.SortedMap;
014    import java.util.SortedSet;
015    import java.util.TreeSet;
016    
017    public class EncodingInfo {
018    
019        private static String[] NOT_OBSCURE = {"Big5",
020            "Big5-HKSCS",
021            "EUC-JP",
022            "EUC-KR",
023            "GB18030",
024            "GBK",
025            "ISO-2022-JP",
026            "ISO-2022-KR",
027            "ISO-8859-1",
028            "ISO-8859-13",
029            "ISO-8859-15",
030            "ISO-8859-2",
031            "ISO-8859-3",
032            "ISO-8859-4",
033            "ISO-8859-5",
034            "ISO-8859-6",
035            "ISO-8859-7",
036            "ISO-8859-8",
037            "ISO-8859-9",
038            "KOI8-R",
039            "Shift_JIS",
040            "TIS-620",
041            "US-ASCII",
042            "UTF-16",
043            "UTF-16BE",
044            "UTF-16LE",
045            "UTF-8",
046            "windows-1250",
047            "windows-1251",
048            "windows-1252",
049            "windows-1253",
050            "windows-1254",
051            "windows-1255",
052            "windows-1256",
053            "windows-1257",
054            "windows-1258"};
055        
056        private static String[] asciiSuperset;
057    
058        private static String[] notAsciiSuperset;   
059    
060        static {
061            byte[] testBuf = new byte[0x63];
062            for (int i = 0; i < 0x60; i++) {
063                testBuf[i] = (byte) (i + 0x20);
064            }
065            testBuf[0x60] = (byte) '\n';
066            testBuf[0x61] = (byte) '\r';
067            testBuf[0x62] = (byte) '\t';
068    
069            SortedSet<String> asciiSupersetSet = new TreeSet<String>();
070            SortedSet<String> notAsciiSupersetSet = new TreeSet<String>();
071            
072            SortedMap charsets = Charset.availableCharsets();
073            for (Iterator iter = charsets.entrySet().iterator(); iter.hasNext();) {
074                Map.Entry entry = (Map.Entry) iter.next();
075                Charset cs = (Charset) entry.getValue();
076                if (asciiMapsToBasicLatin(testBuf, cs)) {
077                    asciiSupersetSet.add(cs.name().intern());
078                } else {
079                    notAsciiSupersetSet.add(cs.name().intern());
080                }
081            }
082            
083            asciiSuperset = (String[]) asciiSupersetSet.toArray(new String[0]);
084            notAsciiSuperset = (String[]) notAsciiSupersetSet.toArray(new String[0]);
085        }
086    
087        public static boolean isAsciiSuperset(String preferredIanaName) {
088            return (Arrays.binarySearch(asciiSuperset, preferredIanaName) > -1);
089        }
090    
091        public static boolean isNotAsciiSuperset(String preferredIanaName) {
092            return (Arrays.binarySearch(notAsciiSuperset, preferredIanaName) > -1);
093        }
094    
095        public static boolean isObscure(String preferredIanaName) {
096            return !(Arrays.binarySearch(NOT_OBSCURE, preferredIanaName) > -1);
097        }
098        
099        /**
100         * @param testBuf
101         * @param cs
102         */
103        private static boolean asciiMapsToBasicLatin(byte[] testBuf, Charset cs) {
104            CharsetDecoder dec = cs.newDecoder();
105            dec.onMalformedInput(CodingErrorAction.REPORT);
106            dec.onUnmappableCharacter(CodingErrorAction.REPORT);
107            Reader r = new InputStreamReader(new ByteArrayInputStream(testBuf), dec);
108            try {
109                for (int i = 0; i < 0x60; i++) {
110                    if ((i + 0x20) != r.read()) {
111                        return false;
112                    }
113                }
114                if ('\n' != r.read()) {
115                    return false;
116                }
117                if ('\r' != r.read()) {
118                    return false;
119                }
120                if ('\t' != r.read()) {
121                    return false;
122                }        
123            } catch (IOException e) {
124                return false;
125            } catch (Exception e) {
126                return false;
127            }
128    
129            return true;
130        }
131    
132        public static void main(String[] args) {
133            System.out.println("ASCII maps to Basic Latin:");
134            for (int i = 0; i < asciiSuperset.length; i++) {
135                System.out.println(asciiSuperset[i]);            
136            }
137            System.out.println();
138            System.out.println("ASCII does not map to Basic Latin:");
139            for (int i = 0; i < notAsciiSuperset.length; i++) {
140                System.out.println(notAsciiSuperset[i]);            
141            }
142        }
143    }