1 package types;
2
3 import java.io.IOException;
4 import java.io.ObjectOutputStream;
5 import java.io.Serializable;
6 import java.util.ArrayList;
7 import gnu.trove.TObjectIntHashMap;
8
9 /***
10 * This class is used to map from some descriptive features (e.g. words in text)
11 * to an index in an array.
12 *
13 * @author kuzman
14 *
15 */
16
17 public class Alphabet implements Serializable {
18
19 private static final long serialVersionUID = 1L;
20 TObjectIntHashMap feat2index;
21 ArrayList<Object> index2feat;
22 boolean canGrow;
23
24 public Alphabet() {
25 feat2index = new TObjectIntHashMap();
26 index2feat = new ArrayList<Object>();
27 canGrow = true;
28 }
29
30 /***
31 * returns the index associated with a feature. This should be the same for
32 * all features a,b where a.equals(b).
33 *
34 * @param feature
35 * @return the index corresponding to the feature
36 */
37 public int lookupObject(Object feature) {
38 if (feat2index.contains(feature))
39 return feat2index.get(feature);
40 else if (canGrow) {
41 feat2index.put(feature, index2feat.size());
42 index2feat.add(feature);
43 return feat2index.get(feature);
44 }
45 return -1;
46 }
47
48 public Object lookupIndex(int ind) {
49 return index2feat.get(ind);
50 }
51
52 /***
53 * at test time, we need to stop the growth of the alphabet so we do not
54 * increase the size of the feature vector in case the user tries to use
55 * features not encountered at training time.
56 */
57 public void stopGrowth() {
58 canGrow = false;
59 }
60
61 public void startGrowth() {
62 canGrow = true;
63 }
64
65 public int size() {
66 return index2feat.size();
67 }
68
69 private void writeObject(ObjectOutputStream out) throws IOException {
70 out.writeLong(serialVersionUID);
71 out.writeBoolean(canGrow);
72 out.writeObject(index2feat);
73 }
74
75 @SuppressWarnings("unchecked")
76 private void readObject(java.io.ObjectInputStream in) throws IOException,
77 ClassNotFoundException {
78 long inid = in.readLong();
79 if (inid != serialVersionUID)
80 throw new IOException("Serial version mismatch: expected "
81 + serialVersionUID + " got " + inid);
82 canGrow = in.readBoolean();
83 index2feat = (ArrayList<Object>) in.readObject();
84 feat2index = new TObjectIntHashMap();
85 for (int i = 0; i < index2feat.size(); i++) {
86 if (feat2index.contains(index2feat.get(i)))
87 throw new IOException("duplicate feature in file. feature: "
88 + index2feat.get(i));
89 feat2index.put(index2feat.get(i), i);
90 }
91 }
92
93 /***
94 * returns the string representation of feature associated with a index.
95 * This should be the same for all features a,b where a.equals(b).
96 *
97 * @param index
98 * of feature
99 * @return the String representation of a feature
100 */
101 public String lookupInt(int key) {
102 String f = index2feat.get(key).toString();
103 if (f == null) {
104 throw new RuntimeException("Not one and the same alphabet");
105 }
106 return f;
107 }
108
109 }