View Javadoc

1   package types;
2   
3   import java.io.IOException;
4   import java.io.ObjectOutputStream;
5   import java.io.Serializable;
6   import java.util.ArrayList;
7   import gnu.trove.TObjectIntHashMap;
8   
9   /***
10   * This class is used to map from some descriptive features (e.g. words in text)
11   * to an index in an array.
12   * 
13   * @author kuzman
14   * 
15   */
16  
17  public class Alphabet implements Serializable {
18  
19  	private static final long serialVersionUID = 1L;
20  	TObjectIntHashMap feat2index;
21  	ArrayList<Object> index2feat;
22  	boolean canGrow;
23  
24  	public Alphabet() {
25  		feat2index = new TObjectIntHashMap();
26  		index2feat = new ArrayList<Object>();
27  		canGrow = true;
28  	}
29  
30  	/***
31  	 * returns the index associated with a feature. This should be the same for
32  	 * all features a,b where a.equals(b).
33  	 * 
34  	 * @param feature
35  	 * @return the index corresponding to the feature
36  	 */
37  	public int lookupObject(Object feature) {
38  		if (feat2index.contains(feature))
39  			return feat2index.get(feature);
40  		else if (canGrow) {
41  			feat2index.put(feature, index2feat.size());
42  			index2feat.add(feature);
43  			return feat2index.get(feature);
44  		}
45  		return -1;
46  	}
47  
48  	public Object lookupIndex(int ind) {
49  		return index2feat.get(ind);
50  	}
51  
52  	/***
53  	 * at test time, we need to stop the growth of the alphabet so we do not
54  	 * increase the size of the feature vector in case the user tries to use
55  	 * features not encountered at training time.
56  	 */
57  	public void stopGrowth() {
58  		canGrow = false;
59  	}
60  
61  	public void startGrowth() {
62  		canGrow = true;
63  	}
64  
65  	public int size() {
66  		return index2feat.size();
67  	}
68  
69  	private void writeObject(ObjectOutputStream out) throws IOException {
70  		out.writeLong(serialVersionUID);
71  		out.writeBoolean(canGrow);
72  		out.writeObject(index2feat);
73  	}
74  
75  	@SuppressWarnings("unchecked")
76  	private void readObject(java.io.ObjectInputStream in) throws IOException,
77  			ClassNotFoundException {
78  		long inid = in.readLong();
79  		if (inid != serialVersionUID)
80  			throw new IOException("Serial version mismatch: expected "
81  					+ serialVersionUID + " got " + inid);
82  		canGrow = in.readBoolean();
83  		index2feat = (ArrayList<Object>) in.readObject();
84  		feat2index = new TObjectIntHashMap();
85  		for (int i = 0; i < index2feat.size(); i++) {
86  			if (feat2index.contains(index2feat.get(i)))
87  				throw new IOException("duplicate feature in file. feature: "
88  						+ index2feat.get(i));
89  			feat2index.put(index2feat.get(i), i);
90  		}
91  	}
92  
93  	/***
94  	 * returns the string representation of feature associated with a index.
95  	 * This should be the same for all features a,b where a.equals(b).
96  	 * 
97  	 * @param index
98  	 *            of feature
99  	 * @return the String representation of a feature
100 	 */
101 	public String lookupInt(int key) {
102 		String f = index2feat.get(key).toString();
103 		if (f == null) {
104 			throw new RuntimeException("Not one and the same alphabet");
105 		}
106 		return f;
107 	}
108 
109 }