source: sample/hadoop-0.16/tw/org/nchc/tuple/Schema.java

Last change on this file was 21, checked in by waue, 16 years ago

hadoop 0.16

File size: 14.7 KB
Line 
1/*
2 * Cloud9: A MapReduce Library for Hadoop
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you
5 * may not use this file except in compliance with the License. You may
6 * obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13 * implied. See the License for the specific language governing
14 * permissions and limitations under the License.
15 */
16
17package tw.org.nchc.tuple;
18
19import java.util.HashMap;
20import java.util.Map;
21
22import org.apache.hadoop.io.Writable;
23
24/**
25 * <p>
26 * Description of a Tuple's structure. The Schema class keeps track of column
27 * names, data types, and default values. The following code fragment
28 * illustrates the use of this class:
29 * </p>
30 *
31 * <pre>
32 * public static final Schema MYSCHEMA = new Schema();
33 * static {
34 *  MYSCHEMA.addField(&quot;token&quot;, String.class, &quot;&quot;);
35 *  MYSCHEMA.addField(&quot;int&quot;, Integer.class, new Integer(1));
36 * }
37 * </pre>
38 *
39 * <p>
40 * The following field types are allowed:
41 * </p>
42 *
43 * <ul>
44 * <li>Basic Java primitives: Boolean, Integer, Long, Float, Double, String</li>
45 * <li>Classes that implement Writable</li>
46 * </ul>
47 *
48 * <p>
49 * Schema instances can be locked to prevent further changes. Any attempt to
50 * alter a locked Schema will result in a runtime exception being thrown. If a
51 * Schema is not locked, callers are free to add new fields and edit default
52 * values.
53 * </p>
54 *
55 * <p>
56 * New Tuple instances can be created directly from Schema objects through the
57 * use of the {@link #instantiate()} method. A call to that method implicitly
58 * locks the Schema.
59 * </p>
60 *
61 * <p>
62 * <b>Acknowledgments:</b> much of this code was adapted from the <a
63 * href="http://prefuse.org/">Prefuse Visualization Toolkit</a>.
64 * </p>
65 *
66 */
67public class Schema implements Cloneable {
68
69  private String[] mFieldNames;
70  private Class<?>[] mFieldTypes;
71  private Object[] mDefaultValues;
72  private Map<String, Integer> mFieldLookup;
73  private int mFieldCount;
74  private boolean mLocked;
75
76  // ------------------------------------------------------------------------
77  // Constructors
78
79  /**
80   * Creates a new empty Schema.
81   */
82  public Schema() {
83    this(10);
84  }
85
86  /**
87   * Creates a new empty Schema with a starting capacity for a given number of
88   * fields.
89   *
90   * @param n
91   *            the number of columns in this schema
92   */
93  public Schema(int n) {
94    mFieldNames = new String[n];
95    mFieldTypes = new Class<?>[n];
96    mDefaultValues = new Object[n];
97    mFieldCount = 0;
98    mLocked = false;
99  }
100
101  /**
102   * Create a new Schema consisting of the given field names and types.
103   *
104   * @param names
105   *            the field names
106   * @param types
107   *            the field types (as Class instances)
108   */
109  public Schema(String[] names, Class<?>[] types) {
110    this(names.length);
111
112    // check the schema validity
113    if (names.length != types.length) {
114      throw new IllegalArgumentException(
115          "Input arrays should be the same length");
116    }
117    for (int i = 0; i < names.length; ++i) {
118      addField(names[i], types[i], null);
119    }
120  }
121
122  /**
123   * Create a new Schema consisting of the given field names, types, and
124   * default field values.
125   *
126   * @param names
127   *            the field names
128   * @param types
129   *            the field types (as Class instances)
130   * @param defaults
131   *            the default values for each field
132   */
133  public Schema(String[] names, Class<?>[] types, Object[] defaults) {
134    this(names.length);
135
136    // check the schema validity
137    if (names.length != types.length || types.length != defaults.length) {
138      throw new IllegalArgumentException(
139          "Input arrays should be the same length");
140    }
141    for (int i = 0; i < names.length; ++i) {
142      addField(names[i], types[i], defaults[i]);
143    }
144  }
145
146  /**
147   * Creates a copy of this Schema. Cloned copies of a locked Schema will not
148   * inherit the locked status.
149   *
150   * @see java.lang.Object#clone()
151   */
152  public Object clone() {
153    Schema s = new Schema(mFieldCount);
154    for (int i = 0; i < mFieldCount; ++i) {
155      s.addField(mFieldNames[i], mFieldTypes[i], mDefaultValues[i]);
156    }
157    return s;
158  }
159
160  /**
161   * Lazily construct the lookup table for this schema. Used to accelerate
162   * name-based lookups of schema information.
163   */
164  protected void initLookup() {
165    mFieldLookup = new HashMap<String, Integer>();
166    for (int i = 0; i < mFieldNames.length; ++i) {
167      mFieldLookup.put(mFieldNames[i], new Integer(i));
168    }
169  }
170
171  // ------------------------------------------------------------------------
172  // Accessors / Mutators
173
174  /**
175   * Locks the Schema, preventing any additional changes. Locked Schemas
176   * cannot be unlocked! Cloned copies of a locked schema will not inherit
177   * this locked status.
178   *
179   * @return a reference to this schema
180   */
181  public Schema lockSchema() {
182    mLocked = true;
183    return this;
184  }
185
186  /**
187   * Checks if this schema is locked. Locked Schemas can not be edited.
188   *
189   * @return true if this Schema is locked, false otherwise
190   */
191  public boolean isLocked() {
192    return mLocked;
193  }
194
195  /**
196   * Adds a field to this Schema.
197   *
198   * @param name
199   *            the field name
200   * @param type
201   *            the field type (as a Class instance)
202   * @throws IllegalArgumentException
203   *             if either name or type are null or the name already exists in
204   *             this schema.
205   */
206  public void addField(String name, Class<?> type) {
207    addField(name, type, null);
208  }
209
210  /**
211   * Adds a field to this schema.
212   *
213   * @param name
214   *            the field name
215   * @param type
216   *            the field type (as a Class instance)
217   * @throws IllegalArgumentException
218   *             if either name or type are null or the name already exists in
219   *             this schema.
220   */
221  public void addField(String name, Class<?> type, Object defaultValue) {
222    if (!(type == Integer.class || type == Boolean.class
223        || type == Long.class || type == Float.class
224        || type == Double.class || type == String.class || (!type
225        .isInterface() && Writable.class.isAssignableFrom(type)))) {
226      throw new SchemaException("Illegal field type: "
227          + type.getCanonicalName());
228    }
229
230    // check lock status
231    if (mLocked) {
232      throw new IllegalStateException(
233          "Can not add column to a locked Schema.");
234    }
235    // check for validity
236    if (name == null) {
237      throw new IllegalArgumentException(
238          "Null column names are not allowed.");
239    }
240    if (type == null) {
241      throw new IllegalArgumentException(
242          "Null column types are not allowed.");
243    }
244    for (int i = 0; i < mFieldCount; ++i) {
245      if (mFieldNames[i].equals(name)) {
246        throw new IllegalArgumentException(
247            "Duplicate column names are not allowed: "
248                + mFieldNames[i]);
249      }
250    }
251
252    // resize if necessary
253    if (mFieldNames.length == mFieldCount) {
254      int capacity = (3 * mFieldNames.length) / 2 + 1;
255      String[] names = new String[capacity];
256      Class<?>[] types = new Class[capacity];
257      Object[] dflts = new Object[capacity];
258      System.arraycopy(mFieldNames, 0, names, 0, mFieldCount);
259      System.arraycopy(mFieldTypes, 0, types, 0, mFieldCount);
260      System.arraycopy(mDefaultValues, 0, dflts, 0, mFieldCount);
261      mFieldNames = names;
262      mFieldTypes = types;
263      mDefaultValues = dflts;
264    }
265
266    mFieldNames[mFieldCount] = name;
267    mFieldTypes[mFieldCount] = type;
268    mDefaultValues[mFieldCount] = defaultValue;
269
270    if (mFieldLookup != null)
271      mFieldLookup.put(name, new Integer(mFieldCount));
272
273    mFieldCount++;
274  }
275
276  /**
277   * Returns the number of fields in this Schema.
278   *
279   * @return the number of fields in this Schema
280   */
281  public int getFieldCount() {
282    return mFieldCount;
283  }
284
285  /**
286   * Returns the name of the field at the given position.
287   *
288   * @param index
289   *            the field index
290   * @return the field name
291   */
292  public String getFieldName(int index) {
293    return mFieldNames[index];
294  }
295
296  /**
297   * Returns the position of a field given its name.
298   *
299   * @param field
300   *            the field name
301   * @return the field position index
302   */
303  public int getFieldIndex(String field) {
304    if (mFieldLookup == null)
305      initLookup();
306
307    Integer idx = (Integer) mFieldLookup.get(field);
308    return (idx == null ? -1 : idx.intValue());
309  }
310
311  /**
312   * Returns the type of the field at the given position.
313   *
314   * @param index
315   *            the column index
316   * @return the column type
317   */
318  public Class<?> getFieldType(int index) {
319    return mFieldTypes[index];
320  }
321
322  /**
323   * Returns the type of the field given its name.
324   *
325   * @param field
326   *            the field name
327   * @return the field type
328   */
329  public Class<?> getFieldType(String field) {
330    int idx = getFieldIndex(field);
331    return (idx < 0 ? null : mFieldTypes[idx]);
332  }
333
334  /**
335   * Returns the default value of the field at the given position.
336   *
337   * @param index
338   *            the field index
339   * @return the field's default value
340   */
341  public Object getDefault(int index) {
342    return mDefaultValues[index];
343  }
344
345  /**
346   * Returns the default value of the field with the given name.
347   *
348   * @param field
349   *            the field name
350   * @return the field's default value
351   */
352  public Object getDefault(String field) {
353    int idx = getFieldIndex(field);
354    return (idx < 0 ? null : mDefaultValues[idx]);
355  }
356
357  /**
358   * Sets the default value for the given field.
359   *
360   * @param index
361   *            the index position of the field to set the default for
362   * @param val
363   *            the new default value
364   */
365  public void setDefault(int index, Object val) {
366    // check lock status
367    if (mLocked) {
368      throw new IllegalStateException(
369          "Can not update default values of a locked Schema.");
370    }
371    mDefaultValues[index] = val;
372  }
373
374  /**
375   * Sets the default value for the given field.
376   *
377   * @param field
378   *            the name of field to set the default for
379   * @param val
380   *            the new default value
381   */
382  public void setDefault(String field, Object val) {
383    // check lock status
384    if (mLocked) {
385      throw new IllegalStateException(
386          "Can not update default values of a locked Schema.");
387    }
388    int idx = getFieldIndex(field);
389    mDefaultValues[idx] = val;
390  }
391
392  /**
393   * Sets the default value for the given field as an <code>int</code>.
394   *
395   * @param field
396   *            the name of field to set the default for
397   * @param val
398   *            the new default value
399   */
400  public void setDefault(String field, int val) {
401    setDefault(field, new Integer(val));
402  }
403
404  /**
405   * Set the default value for the given field as a <code>long</code>.
406   *
407   * @param field
408   *            the name of field to set the default for
409   * @param val
410   *            the new default value
411   */
412  public void setDefault(String field, long val) {
413    setDefault(field, new Long(val));
414  }
415
416  /**
417   * Set the default value for the given field as a <code>float</code>.
418   *
419   * @param field
420   *            the name of field to set the default for
421   * @param val
422   *            the new default value
423   */
424  public void setDefault(String field, float val) {
425    setDefault(field, new Float(val));
426  }
427
428  /**
429   * Set the default value for the given field as a <code>double</code>.
430   *
431   * @param field
432   *            the name of field to set the default for
433   * @param val
434   *            the new default value
435   */
436  public void setDefault(String field, double val) {
437    setDefault(field, new Double(val));
438  }
439
440  /**
441   * Set the default value for the given field as a <code>boolean</code>.
442   *
443   * @param field
444   *            the name of field to set the default for
445   * @param val
446   *            the new default value
447   */
448  public void setDefault(String field, boolean val) {
449    setDefault(field, val ? Boolean.TRUE : Boolean.FALSE);
450  }
451
452  // ------------------------------------------------------------------------
453  // Comparison and Hashing
454
455  /**
456   * Compares this Schema with another one for equality.
457   */
458  public boolean equals(Object o) {
459    if (!(o instanceof Schema))
460      return false;
461
462    Schema s = (Schema) o;
463    if (mFieldCount != s.getFieldCount())
464      return false;
465
466    for (int i = 0; i < mFieldCount; ++i) {
467      if (!(mFieldNames[i].equals(s.getFieldName(i))
468          && mFieldTypes[i].equals(s.getFieldType(i)) && mDefaultValues[i]
469          .equals(s.getDefault(i)))) {
470        return false;
471      }
472    }
473    return true;
474  }
475
476  /**
477   * Computes a hashcode for this schema.
478   */
479  public int hashCode() {
480    int hashcode = 0;
481    for (int i = 0; i < mFieldCount; ++i) {
482      int idx = i + 1;
483      int code = idx * mFieldNames[i].hashCode();
484      code ^= idx * mFieldTypes[i].hashCode();
485      if (mDefaultValues[i] != null)
486        code ^= mDefaultValues[i].hashCode();
487      hashcode ^= code;
488    }
489    return hashcode;
490  }
491
492  /**
493   * Returns a descriptive String for this schema.
494   */
495  public String toString() {
496    StringBuffer sbuf = new StringBuffer();
497    sbuf.append("Schema[");
498    for (int i = 0; i < mFieldCount; ++i) {
499      if (i > 0)
500        sbuf.append(' ');
501      sbuf.append('(').append(mFieldNames[i]).append(", ");
502      sbuf.append(mFieldTypes[i].getName()).append(", ");
503      sbuf.append(mDefaultValues[i]).append(')');
504    }
505    sbuf.append(']');
506    return sbuf.toString();
507  }
508
509  // ------------------------------------------------------------------------
510  // Tuple Operations
511
512  /**
513   * Instantiate a new Tuple instance with this Schema. Fields of the newly
514   * instantiated Tuple are set to default value.
515   *
516   * @return a new Tuple with this Schema
517   */
518  public Tuple instantiate() {
519    lockSchema();
520
521    Object[] objects = new Object[mFieldCount];
522    System.arraycopy(mDefaultValues, 0, objects, 0, mFieldCount);
523
524    String[] symbols = new String[mFieldCount];
525
526    String[] fields = new String[mFieldCount];
527    System.arraycopy(mFieldNames, 0, fields, 0, mFieldCount);
528
529    Class<?>[] types = new Class<?>[mFieldCount];
530    System.arraycopy(mFieldTypes, 0, types, 0, mFieldCount);
531
532    return new Tuple(objects, symbols, fields, types);
533  }
534
535  /**
536   * Instantiate a new Tuple instance with this Schema.
537   *
538   * @param objects
539   *            values of each field
540   * @return a new Tuple with this Schema
541   */
542  public Tuple instantiate(Object... objects) {
543    lockSchema();
544
545    String[] symbols = new String[mFieldCount];
546
547    String[] fields = new String[mFieldCount];
548    System.arraycopy(mFieldNames, 0, fields, 0, mFieldCount);
549
550    Class<?>[] types = new Class[mFieldCount];
551    System.arraycopy(mFieldTypes, 0, types, 0, mFieldCount);
552
553    return new Tuple(objects, symbols, fields, types);
554  }
555
556} // end of class Schema
Note: See TracBrowser for help on using the repository browser.