1 package types; 2 3 import java.io.IOException; 4 import java.io.ObjectOutputStream; 5 import java.io.Serializable; 6 import java.util.ArrayList; 7 import gnu.trove.TObjectIntHashMap; 8 9 /*** 10 * This class is used to map from some descriptive features (e.g. words in text) 11 * to an index in an array. 12 * 13 * @author kuzman 14 * 15 */ 16 17 public class Alphabet implements Serializable { 18 19 private static final long serialVersionUID = 1L; 20 TObjectIntHashMap feat2index; 21 ArrayList<Object> index2feat; 22 boolean canGrow; 23 24 public Alphabet() { 25 feat2index = new TObjectIntHashMap(); 26 index2feat = new ArrayList<Object>(); 27 canGrow = true; 28 } 29 30 /*** 31 * returns the index associated with a feature. This should be the same for 32 * all features a,b where a.equals(b). 33 * 34 * @param feature 35 * @return the index corresponding to the feature 36 */ 37 public int lookupObject(Object feature) { 38 if (feat2index.contains(feature)) 39 return feat2index.get(feature); 40 else if (canGrow) { 41 feat2index.put(feature, index2feat.size()); 42 index2feat.add(feature); 43 return feat2index.get(feature); 44 } 45 return -1; 46 } 47 48 public Object lookupIndex(int ind) { 49 return index2feat.get(ind); 50 } 51 52 /*** 53 * at test time, we need to stop the growth of the alphabet so we do not 54 * increase the size of the feature vector in case the user tries to use 55 * features not encountered at training time. 56 */ 57 public void stopGrowth() { 58 canGrow = false; 59 } 60 61 public void startGrowth() { 62 canGrow = true; 63 } 64 65 public int size() { 66 return index2feat.size(); 67 } 68 69 private void writeObject(ObjectOutputStream out) throws IOException { 70 out.writeLong(serialVersionUID); 71 out.writeBoolean(canGrow); 72 out.writeObject(index2feat); 73 } 74 75 @SuppressWarnings("unchecked") 76 private void readObject(java.io.ObjectInputStream in) throws IOException, 77 ClassNotFoundException { 78 long inid = in.readLong(); 79 if (inid != serialVersionUID) 80 throw new IOException("Serial version mismatch: expected " 81 + serialVersionUID + " got " + inid); 82 canGrow = in.readBoolean(); 83 index2feat = (ArrayList<Object>) in.readObject(); 84 feat2index = new TObjectIntHashMap(); 85 for (int i = 0; i < index2feat.size(); i++) { 86 if (feat2index.contains(index2feat.get(i))) 87 throw new IOException("duplicate feature in file. feature: " 88 + index2feat.get(i)); 89 feat2index.put(index2feat.get(i), i); 90 } 91 } 92 93 /*** 94 * returns the string representation of feature associated with a index. 95 * This should be the same for all features a,b where a.equals(b). 96 * 97 * @param index 98 * of feature 99 * @return the String representation of a feature 100 */ 101 public String lookupInt(int key) { 102 String f = index2feat.get(key).toString(); 103 if (f == null) { 104 throw new RuntimeException("Not one and the same alphabet"); 105 } 106 return f; 107 } 108 109 }