1 | /* |
---|
2 | * Cloud9: A MapReduce Library for Hadoop |
---|
3 | * |
---|
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you |
---|
5 | * may not use this file except in compliance with the License. You may |
---|
6 | * obtain a copy of the License at |
---|
7 | * |
---|
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
9 | * |
---|
10 | * Unless required by applicable law or agreed to in writing, software |
---|
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
---|
13 | * implied. See the License for the specific language governing |
---|
14 | * permissions and limitations under the License. |
---|
15 | */ |
---|
16 | |
---|
17 | package tw.org.nchc.tuple; |
---|
18 | |
---|
19 | import java.util.HashMap; |
---|
20 | import java.util.Map; |
---|
21 | |
---|
22 | import org.apache.hadoop.io.Writable; |
---|
23 | |
---|
24 | /** |
---|
25 | * <p> |
---|
26 | * Description of a Tuple's structure. The Schema class keeps track of column |
---|
27 | * names, data types, and default values. The following code fragment |
---|
28 | * illustrates the use of this class: |
---|
29 | * </p> |
---|
30 | * |
---|
31 | * <pre> |
---|
32 | * public static final Schema MYSCHEMA = new Schema(); |
---|
33 | * static { |
---|
34 | * MYSCHEMA.addField("token", String.class, ""); |
---|
35 | * MYSCHEMA.addField("int", Integer.class, new Integer(1)); |
---|
36 | * } |
---|
37 | * </pre> |
---|
38 | * |
---|
39 | * <p> |
---|
40 | * The following field types are allowed: |
---|
41 | * </p> |
---|
42 | * |
---|
43 | * <ul> |
---|
44 | * <li>Basic Java primitives: Boolean, Integer, Long, Float, Double, String</li> |
---|
45 | * <li>Classes that implement Writable</li> |
---|
46 | * </ul> |
---|
47 | * |
---|
48 | * <p> |
---|
49 | * Schema instances can be locked to prevent further changes. Any attempt to |
---|
50 | * alter a locked Schema will result in a runtime exception being thrown. If a |
---|
51 | * Schema is not locked, callers are free to add new fields and edit default |
---|
52 | * values. |
---|
53 | * </p> |
---|
54 | * |
---|
55 | * <p> |
---|
56 | * New Tuple instances can be created directly from Schema objects through the |
---|
57 | * use of the {@link #instantiate()} method. A call to that method implicitly |
---|
58 | * locks the Schema. |
---|
59 | * </p> |
---|
60 | * |
---|
61 | * <p> |
---|
62 | * <b>Acknowledgments:</b> much of this code was adapted from the <a |
---|
63 | * href="http://prefuse.org/">Prefuse Visualization Toolkit</a>. |
---|
64 | * </p> |
---|
65 | * |
---|
66 | */ |
---|
67 | public class Schema implements Cloneable { |
---|
68 | |
---|
69 | private String[] mFieldNames; |
---|
70 | private Class<?>[] mFieldTypes; |
---|
71 | private Object[] mDefaultValues; |
---|
72 | private Map<String, Integer> mFieldLookup; |
---|
73 | private int mFieldCount; |
---|
74 | private boolean mLocked; |
---|
75 | |
---|
76 | // ------------------------------------------------------------------------ |
---|
77 | // Constructors |
---|
78 | |
---|
79 | /** |
---|
80 | * Creates a new empty Schema. |
---|
81 | */ |
---|
82 | public Schema() { |
---|
83 | this(10); |
---|
84 | } |
---|
85 | |
---|
86 | /** |
---|
87 | * Creates a new empty Schema with a starting capacity for a given number of |
---|
88 | * fields. |
---|
89 | * |
---|
90 | * @param n |
---|
91 | * the number of columns in this schema |
---|
92 | */ |
---|
93 | public Schema(int n) { |
---|
94 | mFieldNames = new String[n]; |
---|
95 | mFieldTypes = new Class<?>[n]; |
---|
96 | mDefaultValues = new Object[n]; |
---|
97 | mFieldCount = 0; |
---|
98 | mLocked = false; |
---|
99 | } |
---|
100 | |
---|
101 | /** |
---|
102 | * Create a new Schema consisting of the given field names and types. |
---|
103 | * |
---|
104 | * @param names |
---|
105 | * the field names |
---|
106 | * @param types |
---|
107 | * the field types (as Class instances) |
---|
108 | */ |
---|
109 | public Schema(String[] names, Class<?>[] types) { |
---|
110 | this(names.length); |
---|
111 | |
---|
112 | // check the schema validity |
---|
113 | if (names.length != types.length) { |
---|
114 | throw new IllegalArgumentException( |
---|
115 | "Input arrays should be the same length"); |
---|
116 | } |
---|
117 | for (int i = 0; i < names.length; ++i) { |
---|
118 | addField(names[i], types[i], null); |
---|
119 | } |
---|
120 | } |
---|
121 | |
---|
122 | /** |
---|
123 | * Create a new Schema consisting of the given field names, types, and |
---|
124 | * default field values. |
---|
125 | * |
---|
126 | * @param names |
---|
127 | * the field names |
---|
128 | * @param types |
---|
129 | * the field types (as Class instances) |
---|
130 | * @param defaults |
---|
131 | * the default values for each field |
---|
132 | */ |
---|
133 | public Schema(String[] names, Class<?>[] types, Object[] defaults) { |
---|
134 | this(names.length); |
---|
135 | |
---|
136 | // check the schema validity |
---|
137 | if (names.length != types.length || types.length != defaults.length) { |
---|
138 | throw new IllegalArgumentException( |
---|
139 | "Input arrays should be the same length"); |
---|
140 | } |
---|
141 | for (int i = 0; i < names.length; ++i) { |
---|
142 | addField(names[i], types[i], defaults[i]); |
---|
143 | } |
---|
144 | } |
---|
145 | |
---|
146 | /** |
---|
147 | * Creates a copy of this Schema. Cloned copies of a locked Schema will not |
---|
148 | * inherit the locked status. |
---|
149 | * |
---|
150 | * @see java.lang.Object#clone() |
---|
151 | */ |
---|
152 | public Object clone() { |
---|
153 | Schema s = new Schema(mFieldCount); |
---|
154 | for (int i = 0; i < mFieldCount; ++i) { |
---|
155 | s.addField(mFieldNames[i], mFieldTypes[i], mDefaultValues[i]); |
---|
156 | } |
---|
157 | return s; |
---|
158 | } |
---|
159 | |
---|
160 | /** |
---|
161 | * Lazily construct the lookup table for this schema. Used to accelerate |
---|
162 | * name-based lookups of schema information. |
---|
163 | */ |
---|
164 | protected void initLookup() { |
---|
165 | mFieldLookup = new HashMap<String, Integer>(); |
---|
166 | for (int i = 0; i < mFieldNames.length; ++i) { |
---|
167 | mFieldLookup.put(mFieldNames[i], new Integer(i)); |
---|
168 | } |
---|
169 | } |
---|
170 | |
---|
171 | // ------------------------------------------------------------------------ |
---|
172 | // Accessors / Mutators |
---|
173 | |
---|
174 | /** |
---|
175 | * Locks the Schema, preventing any additional changes. Locked Schemas |
---|
176 | * cannot be unlocked! Cloned copies of a locked schema will not inherit |
---|
177 | * this locked status. |
---|
178 | * |
---|
179 | * @return a reference to this schema |
---|
180 | */ |
---|
181 | public Schema lockSchema() { |
---|
182 | mLocked = true; |
---|
183 | return this; |
---|
184 | } |
---|
185 | |
---|
186 | /** |
---|
187 | * Checks if this schema is locked. Locked Schemas can not be edited. |
---|
188 | * |
---|
189 | * @return true if this Schema is locked, false otherwise |
---|
190 | */ |
---|
191 | public boolean isLocked() { |
---|
192 | return mLocked; |
---|
193 | } |
---|
194 | |
---|
195 | /** |
---|
196 | * Adds a field to this Schema. |
---|
197 | * |
---|
198 | * @param name |
---|
199 | * the field name |
---|
200 | * @param type |
---|
201 | * the field type (as a Class instance) |
---|
202 | * @throws IllegalArgumentException |
---|
203 | * if either name or type are null or the name already exists in |
---|
204 | * this schema. |
---|
205 | */ |
---|
206 | public void addField(String name, Class<?> type) { |
---|
207 | addField(name, type, null); |
---|
208 | } |
---|
209 | |
---|
210 | /** |
---|
211 | * Adds a field to this schema. |
---|
212 | * |
---|
213 | * @param name |
---|
214 | * the field name |
---|
215 | * @param type |
---|
216 | * the field type (as a Class instance) |
---|
217 | * @throws IllegalArgumentException |
---|
218 | * if either name or type are null or the name already exists in |
---|
219 | * this schema. |
---|
220 | */ |
---|
221 | public void addField(String name, Class<?> type, Object defaultValue) { |
---|
222 | if (!(type == Integer.class || type == Boolean.class |
---|
223 | || type == Long.class || type == Float.class |
---|
224 | || type == Double.class || type == String.class || (!type |
---|
225 | .isInterface() && Writable.class.isAssignableFrom(type)))) { |
---|
226 | throw new SchemaException("Illegal field type: " |
---|
227 | + type.getCanonicalName()); |
---|
228 | } |
---|
229 | |
---|
230 | // check lock status |
---|
231 | if (mLocked) { |
---|
232 | throw new IllegalStateException( |
---|
233 | "Can not add column to a locked Schema."); |
---|
234 | } |
---|
235 | // check for validity |
---|
236 | if (name == null) { |
---|
237 | throw new IllegalArgumentException( |
---|
238 | "Null column names are not allowed."); |
---|
239 | } |
---|
240 | if (type == null) { |
---|
241 | throw new IllegalArgumentException( |
---|
242 | "Null column types are not allowed."); |
---|
243 | } |
---|
244 | for (int i = 0; i < mFieldCount; ++i) { |
---|
245 | if (mFieldNames[i].equals(name)) { |
---|
246 | throw new IllegalArgumentException( |
---|
247 | "Duplicate column names are not allowed: " |
---|
248 | + mFieldNames[i]); |
---|
249 | } |
---|
250 | } |
---|
251 | |
---|
252 | // resize if necessary |
---|
253 | if (mFieldNames.length == mFieldCount) { |
---|
254 | int capacity = (3 * mFieldNames.length) / 2 + 1; |
---|
255 | String[] names = new String[capacity]; |
---|
256 | Class<?>[] types = new Class[capacity]; |
---|
257 | Object[] dflts = new Object[capacity]; |
---|
258 | System.arraycopy(mFieldNames, 0, names, 0, mFieldCount); |
---|
259 | System.arraycopy(mFieldTypes, 0, types, 0, mFieldCount); |
---|
260 | System.arraycopy(mDefaultValues, 0, dflts, 0, mFieldCount); |
---|
261 | mFieldNames = names; |
---|
262 | mFieldTypes = types; |
---|
263 | mDefaultValues = dflts; |
---|
264 | } |
---|
265 | |
---|
266 | mFieldNames[mFieldCount] = name; |
---|
267 | mFieldTypes[mFieldCount] = type; |
---|
268 | mDefaultValues[mFieldCount] = defaultValue; |
---|
269 | |
---|
270 | if (mFieldLookup != null) |
---|
271 | mFieldLookup.put(name, new Integer(mFieldCount)); |
---|
272 | |
---|
273 | mFieldCount++; |
---|
274 | } |
---|
275 | |
---|
276 | /** |
---|
277 | * Returns the number of fields in this Schema. |
---|
278 | * |
---|
279 | * @return the number of fields in this Schema |
---|
280 | */ |
---|
281 | public int getFieldCount() { |
---|
282 | return mFieldCount; |
---|
283 | } |
---|
284 | |
---|
285 | /** |
---|
286 | * Returns the name of the field at the given position. |
---|
287 | * |
---|
288 | * @param index |
---|
289 | * the field index |
---|
290 | * @return the field name |
---|
291 | */ |
---|
292 | public String getFieldName(int index) { |
---|
293 | return mFieldNames[index]; |
---|
294 | } |
---|
295 | |
---|
296 | /** |
---|
297 | * Returns the position of a field given its name. |
---|
298 | * |
---|
299 | * @param field |
---|
300 | * the field name |
---|
301 | * @return the field position index |
---|
302 | */ |
---|
303 | public int getFieldIndex(String field) { |
---|
304 | if (mFieldLookup == null) |
---|
305 | initLookup(); |
---|
306 | |
---|
307 | Integer idx = (Integer) mFieldLookup.get(field); |
---|
308 | return (idx == null ? -1 : idx.intValue()); |
---|
309 | } |
---|
310 | |
---|
311 | /** |
---|
312 | * Returns the type of the field at the given position. |
---|
313 | * |
---|
314 | * @param index |
---|
315 | * the column index |
---|
316 | * @return the column type |
---|
317 | */ |
---|
318 | public Class<?> getFieldType(int index) { |
---|
319 | return mFieldTypes[index]; |
---|
320 | } |
---|
321 | |
---|
322 | /** |
---|
323 | * Returns the type of the field given its name. |
---|
324 | * |
---|
325 | * @param field |
---|
326 | * the field name |
---|
327 | * @return the field type |
---|
328 | */ |
---|
329 | public Class<?> getFieldType(String field) { |
---|
330 | int idx = getFieldIndex(field); |
---|
331 | return (idx < 0 ? null : mFieldTypes[idx]); |
---|
332 | } |
---|
333 | |
---|
334 | /** |
---|
335 | * Returns the default value of the field at the given position. |
---|
336 | * |
---|
337 | * @param index |
---|
338 | * the field index |
---|
339 | * @return the field's default value |
---|
340 | */ |
---|
341 | public Object getDefault(int index) { |
---|
342 | return mDefaultValues[index]; |
---|
343 | } |
---|
344 | |
---|
345 | /** |
---|
346 | * Returns the default value of the field with the given name. |
---|
347 | * |
---|
348 | * @param field |
---|
349 | * the field name |
---|
350 | * @return the field's default value |
---|
351 | */ |
---|
352 | public Object getDefault(String field) { |
---|
353 | int idx = getFieldIndex(field); |
---|
354 | return (idx < 0 ? null : mDefaultValues[idx]); |
---|
355 | } |
---|
356 | |
---|
357 | /** |
---|
358 | * Sets the default value for the given field. |
---|
359 | * |
---|
360 | * @param index |
---|
361 | * the index position of the field to set the default for |
---|
362 | * @param val |
---|
363 | * the new default value |
---|
364 | */ |
---|
365 | public void setDefault(int index, Object val) { |
---|
366 | // check lock status |
---|
367 | if (mLocked) { |
---|
368 | throw new IllegalStateException( |
---|
369 | "Can not update default values of a locked Schema."); |
---|
370 | } |
---|
371 | mDefaultValues[index] = val; |
---|
372 | } |
---|
373 | |
---|
374 | /** |
---|
375 | * Sets the default value for the given field. |
---|
376 | * |
---|
377 | * @param field |
---|
378 | * the name of field to set the default for |
---|
379 | * @param val |
---|
380 | * the new default value |
---|
381 | */ |
---|
382 | public void setDefault(String field, Object val) { |
---|
383 | // check lock status |
---|
384 | if (mLocked) { |
---|
385 | throw new IllegalStateException( |
---|
386 | "Can not update default values of a locked Schema."); |
---|
387 | } |
---|
388 | int idx = getFieldIndex(field); |
---|
389 | mDefaultValues[idx] = val; |
---|
390 | } |
---|
391 | |
---|
392 | /** |
---|
393 | * Sets the default value for the given field as an <code>int</code>. |
---|
394 | * |
---|
395 | * @param field |
---|
396 | * the name of field to set the default for |
---|
397 | * @param val |
---|
398 | * the new default value |
---|
399 | */ |
---|
400 | public void setDefault(String field, int val) { |
---|
401 | setDefault(field, new Integer(val)); |
---|
402 | } |
---|
403 | |
---|
404 | /** |
---|
405 | * Set the default value for the given field as a <code>long</code>. |
---|
406 | * |
---|
407 | * @param field |
---|
408 | * the name of field to set the default for |
---|
409 | * @param val |
---|
410 | * the new default value |
---|
411 | */ |
---|
412 | public void setDefault(String field, long val) { |
---|
413 | setDefault(field, new Long(val)); |
---|
414 | } |
---|
415 | |
---|
416 | /** |
---|
417 | * Set the default value for the given field as a <code>float</code>. |
---|
418 | * |
---|
419 | * @param field |
---|
420 | * the name of field to set the default for |
---|
421 | * @param val |
---|
422 | * the new default value |
---|
423 | */ |
---|
424 | public void setDefault(String field, float val) { |
---|
425 | setDefault(field, new Float(val)); |
---|
426 | } |
---|
427 | |
---|
428 | /** |
---|
429 | * Set the default value for the given field as a <code>double</code>. |
---|
430 | * |
---|
431 | * @param field |
---|
432 | * the name of field to set the default for |
---|
433 | * @param val |
---|
434 | * the new default value |
---|
435 | */ |
---|
436 | public void setDefault(String field, double val) { |
---|
437 | setDefault(field, new Double(val)); |
---|
438 | } |
---|
439 | |
---|
440 | /** |
---|
441 | * Set the default value for the given field as a <code>boolean</code>. |
---|
442 | * |
---|
443 | * @param field |
---|
444 | * the name of field to set the default for |
---|
445 | * @param val |
---|
446 | * the new default value |
---|
447 | */ |
---|
448 | public void setDefault(String field, boolean val) { |
---|
449 | setDefault(field, val ? Boolean.TRUE : Boolean.FALSE); |
---|
450 | } |
---|
451 | |
---|
452 | // ------------------------------------------------------------------------ |
---|
453 | // Comparison and Hashing |
---|
454 | |
---|
455 | /** |
---|
456 | * Compares this Schema with another one for equality. |
---|
457 | */ |
---|
458 | public boolean equals(Object o) { |
---|
459 | if (!(o instanceof Schema)) |
---|
460 | return false; |
---|
461 | |
---|
462 | Schema s = (Schema) o; |
---|
463 | if (mFieldCount != s.getFieldCount()) |
---|
464 | return false; |
---|
465 | |
---|
466 | for (int i = 0; i < mFieldCount; ++i) { |
---|
467 | if (!(mFieldNames[i].equals(s.getFieldName(i)) |
---|
468 | && mFieldTypes[i].equals(s.getFieldType(i)) && mDefaultValues[i] |
---|
469 | .equals(s.getDefault(i)))) { |
---|
470 | return false; |
---|
471 | } |
---|
472 | } |
---|
473 | return true; |
---|
474 | } |
---|
475 | |
---|
476 | /** |
---|
477 | * Computes a hashcode for this schema. |
---|
478 | */ |
---|
479 | public int hashCode() { |
---|
480 | int hashcode = 0; |
---|
481 | for (int i = 0; i < mFieldCount; ++i) { |
---|
482 | int idx = i + 1; |
---|
483 | int code = idx * mFieldNames[i].hashCode(); |
---|
484 | code ^= idx * mFieldTypes[i].hashCode(); |
---|
485 | if (mDefaultValues[i] != null) |
---|
486 | code ^= mDefaultValues[i].hashCode(); |
---|
487 | hashcode ^= code; |
---|
488 | } |
---|
489 | return hashcode; |
---|
490 | } |
---|
491 | |
---|
492 | /** |
---|
493 | * Returns a descriptive String for this schema. |
---|
494 | */ |
---|
495 | public String toString() { |
---|
496 | StringBuffer sbuf = new StringBuffer(); |
---|
497 | sbuf.append("Schema["); |
---|
498 | for (int i = 0; i < mFieldCount; ++i) { |
---|
499 | if (i > 0) |
---|
500 | sbuf.append(' '); |
---|
501 | sbuf.append('(').append(mFieldNames[i]).append(", "); |
---|
502 | sbuf.append(mFieldTypes[i].getName()).append(", "); |
---|
503 | sbuf.append(mDefaultValues[i]).append(')'); |
---|
504 | } |
---|
505 | sbuf.append(']'); |
---|
506 | return sbuf.toString(); |
---|
507 | } |
---|
508 | |
---|
509 | // ------------------------------------------------------------------------ |
---|
510 | // Tuple Operations |
---|
511 | |
---|
512 | /** |
---|
513 | * Instantiate a new Tuple instance with this Schema. Fields of the newly |
---|
514 | * instantiated Tuple are set to default value. |
---|
515 | * |
---|
516 | * @return a new Tuple with this Schema |
---|
517 | */ |
---|
518 | public Tuple instantiate() { |
---|
519 | lockSchema(); |
---|
520 | |
---|
521 | Object[] objects = new Object[mFieldCount]; |
---|
522 | System.arraycopy(mDefaultValues, 0, objects, 0, mFieldCount); |
---|
523 | |
---|
524 | String[] symbols = new String[mFieldCount]; |
---|
525 | |
---|
526 | String[] fields = new String[mFieldCount]; |
---|
527 | System.arraycopy(mFieldNames, 0, fields, 0, mFieldCount); |
---|
528 | |
---|
529 | Class<?>[] types = new Class<?>[mFieldCount]; |
---|
530 | System.arraycopy(mFieldTypes, 0, types, 0, mFieldCount); |
---|
531 | |
---|
532 | return new Tuple(objects, symbols, fields, types); |
---|
533 | } |
---|
534 | |
---|
535 | /** |
---|
536 | * Instantiate a new Tuple instance with this Schema. |
---|
537 | * |
---|
538 | * @param objects |
---|
539 | * values of each field |
---|
540 | * @return a new Tuple with this Schema |
---|
541 | */ |
---|
542 | public Tuple instantiate(Object... objects) { |
---|
543 | lockSchema(); |
---|
544 | |
---|
545 | String[] symbols = new String[mFieldCount]; |
---|
546 | |
---|
547 | String[] fields = new String[mFieldCount]; |
---|
548 | System.arraycopy(mFieldNames, 0, fields, 0, mFieldCount); |
---|
549 | |
---|
550 | Class<?>[] types = new Class[mFieldCount]; |
---|
551 | System.arraycopy(mFieldTypes, 0, types, 0, mFieldCount); |
---|
552 | |
---|
553 | return new Tuple(objects, symbols, fields, types); |
---|
554 | } |
---|
555 | |
---|
556 | } // end of class Schema |
---|