001 package org.whattf.datatype.data; 002 003 import java.io.BufferedReader; 004 import java.io.IOException; 005 import java.util.Arrays; 006 import java.util.Iterator; 007 import java.util.SortedSet; 008 import java.util.TreeSet; 009 010 public class LanguageData { 011 012 private static final String PREFIX = "prefix: "; 013 014 private static final String SUPPRESS_SCRIPT = "suppress-script: "; 015 016 private static final String SUBTAG = "subtag: "; 017 018 private static final String TYPE = "type: "; 019 020 private BufferedReader in; 021 022 private SortedSet languageSet = new TreeSet(); 023 024 private SortedSet scriptSet = new TreeSet(); 025 026 private SortedSet regionSet = new TreeSet(); 027 028 private SortedSet variantSet = new TreeSet(); 029 030 private String[] languages = null; 031 032 private String[] scripts = null; 033 034 private String[] regions = null; 035 036 private String[] variants = null; 037 038 private int[] suppressedScriptByLanguage = null; 039 040 private String[][] prefixesByVariant = null; 041 042 public LanguageData() throws IOException { 043 super(); 044 consumeRegistry(); 045 prepareArrays(); 046 } 047 048 private void consumeRegistry() throws IOException { 049 while(consumeRecord()); 050 } 051 052 private void prepareArrays() throws IOException { 053 int i = 0; 054 scripts = new String[scriptSet.size()]; 055 for (Iterator iter = scriptSet.iterator(); iter.hasNext();) { 056 String str = (String) iter.next(); 057 scripts[i] = str.intern(); 058 i++; 059 } 060 061 i = 0; 062 languages = new String[languageSet.size()]; 063 suppressedScriptByLanguage = new int[languageSet.size()]; 064 for (Iterator iter = languageSet.iterator(); iter.hasNext();) { 065 StringPair pair = (StringPair) iter.next(); 066 languages[i] = pair.getMain().intern(); 067 String suppressed = (String)pair.getOther(); 068 if (suppressed == null) { 069 suppressedScriptByLanguage[i] = -1; 070 } else { 071 int index = Arrays.binarySearch(scripts, suppressed); 072 if (index < 0) { 073 throw new IOException("Malformed registry: reference to non-existent script."); 074 } 075 suppressedScriptByLanguage[i] = index; 076 } 077 i++; 078 } 079 080 i = 0; 081 regions = new String[regionSet.size()]; 082 for (Iterator iter = regionSet.iterator(); iter.hasNext();) { 083 String str = (String) iter.next(); 084 regions[i] = str.intern(); 085 i++; 086 } 087 088 i = 0; 089 variants = new String[variantSet.size()]; 090 prefixesByVariant = new String[variantSet.size()][]; 091 for (Iterator iter = variantSet.iterator(); iter.hasNext();) { 092 StringPair pair = (StringPair) iter.next(); 093 variants[i] = pair.getMain().intern(); 094 SortedSet other = (SortedSet) pair.getOther(); 095 String[] prefixArr = new String[other.size()]; 096 int j = 0; 097 for (Iterator iterator = other.iterator(); iterator.hasNext();) { 098 String str = (String) iterator.next(); 099 prefixArr[j] = str.intern(); 100 j++; 101 } 102 prefixesByVariant[i] = prefixArr; 103 i++; 104 } 105 } 106 107 private boolean consumeRecord() throws IOException { 108 boolean hasMore = true; 109 String type = null; 110 String subtag = null; 111 String suppressScript = null; 112 SortedSet prefixes = new TreeSet(); 113 String line = null; 114 for (;;) { 115 line = in.readLine(); 116 if (line == null) { 117 hasMore = false; 118 break; 119 } 120 line = line.toLowerCase(); 121 if ("%%".equals(line)) { 122 break; 123 } else if (line.startsWith(TYPE)) { 124 type = line.substring(TYPE.length()).trim(); 125 } else if (line.startsWith(SUBTAG)) { 126 subtag = line.substring(SUBTAG.length()).trim(); 127 } else if (line.startsWith(SUPPRESS_SCRIPT)) { 128 suppressScript = line.substring(SUPPRESS_SCRIPT.length()).trim(); 129 } else if (line.startsWith(PREFIX)) { 130 prefixes.add(line.substring(PREFIX.length()).trim()); 131 } 132 } 133 if (subtag == null) { 134 return hasMore; 135 } 136 if ("language".equals(type)) { 137 languageSet.add(new StringPair(subtag, suppressScript)); 138 } else if ("region".equals(type)) { 139 regionSet.add(subtag); 140 } else if ("script".equals(type)) { 141 scriptSet.add(subtag); 142 } else if ("variant".equals(type)) { 143 variantSet.add(new StringPair(subtag, prefixes)); 144 } 145 return hasMore; 146 } 147 148 private class StringPair implements Comparable{ 149 150 private String main; 151 152 private Object other; 153 154 /** 155 * Returns the main. 156 * 157 * @return the main 158 */ 159 public String getMain() { 160 return main; 161 } 162 163 /** 164 * Returns the other. 165 * 166 * @return the other 167 */ 168 public Object getOther() { 169 return other; 170 } 171 172 /** 173 * @param main 174 * @param other 175 */ 176 public StringPair(String main, Object other) { 177 this.main = main; 178 this.other = other; 179 } 180 181 /** 182 * @see java.lang.Object#equals(java.lang.Object) 183 */ 184 public boolean equals(Object arg0) { 185 return main.equals(((StringPair)arg0).main); 186 } 187 188 /** 189 * @see java.lang.Object#hashCode() 190 */ 191 public int hashCode() { 192 return main.hashCode(); 193 } 194 195 /** 196 * @see java.lang.Comparable#compareTo(java.lang.Object) 197 */ 198 public int compareTo(Object arg0) { 199 return main.compareTo(((StringPair)arg0).main); 200 } 201 } 202 203 /** 204 * Returns the languages. 205 * 206 * @return the languages 207 */ 208 public String[] getLanguages() { 209 return languages; 210 } 211 212 /** 213 * Returns the prefixesByVariant. 214 * 215 * @return the prefixesByVariant 216 */ 217 public String[][] getPrefixesByVariant() { 218 return prefixesByVariant; 219 } 220 221 /** 222 * Returns the regions. 223 * 224 * @return the regions 225 */ 226 public String[] getRegions() { 227 return regions; 228 } 229 230 /** 231 * Returns the scripts. 232 * 233 * @return the scripts 234 */ 235 public String[] getScripts() { 236 return scripts; 237 } 238 239 /** 240 * Returns the suppressedScriptByLanguage. 241 * 242 * @return the suppressedScriptByLanguage 243 */ 244 public int[] getSuppressedScriptByLanguage() { 245 return suppressedScriptByLanguage; 246 } 247 248 /** 249 * Returns the variants. 250 * 251 * @return the variants 252 */ 253 public String[] getVariants() { 254 return variants; 255 } 256 }