001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.io;
020
021import java.io.IOException;
022import java.io.DataInput;
023import java.io.DataOutput;
024import java.io.InputStream;
025import java.util.Arrays;
026import java.security.*;
027
028import org.apache.hadoop.classification.InterfaceAudience;
029import org.apache.hadoop.classification.InterfaceStability;
030
031/** A Writable for MD5 hash values.
032 */
033@InterfaceAudience.Public
034@InterfaceStability.Stable
035public class MD5Hash implements WritableComparable<MD5Hash> {
036  public static final int MD5_LEN = 16;
037
038  private static final ThreadLocal<MessageDigest> DIGESTER_FACTORY =
039      new ThreadLocal<MessageDigest>() {
040    @Override
041    protected MessageDigest initialValue() {
042      try {
043        return MessageDigest.getInstance("MD5");
044      } catch (NoSuchAlgorithmException e) {
045        throw new RuntimeException(e);
046      }
047    }
048  };
049
050  private byte[] digest;
051
052  /** Constructs an MD5Hash. */
053  public MD5Hash() {
054    this.digest = new byte[MD5_LEN];
055  }
056
057  /** Constructs an MD5Hash from a hex string. */
058  public MD5Hash(String hex) {
059    setDigest(hex);
060  }
061  
062  /** Constructs an MD5Hash with a specified value. */
063  public MD5Hash(byte[] digest) {
064    if (digest.length != MD5_LEN)
065      throw new IllegalArgumentException("Wrong length: " + digest.length);
066    this.digest = digest;
067  }
068  
069  // javadoc from Writable
070  @Override
071  public void readFields(DataInput in) throws IOException {
072    in.readFully(digest);
073  }
074
075  /** Constructs, reads and returns an instance. */
076  public static MD5Hash read(DataInput in) throws IOException {
077    MD5Hash result = new MD5Hash();
078    result.readFields(in);
079    return result;
080  }
081
082  // javadoc from Writable
083  @Override
084  public void write(DataOutput out) throws IOException {
085    out.write(digest);
086  }
087
088  /** Copy the contents of another instance into this instance. */
089  public void set(MD5Hash that) {
090    System.arraycopy(that.digest, 0, this.digest, 0, MD5_LEN);
091  }
092
093  /** Returns the digest bytes. */
094  public byte[] getDigest() { return digest; }
095
096  /** Construct a hash value for a byte array. */
097  public static MD5Hash digest(byte[] data) {
098    return digest(data, 0, data.length);
099  }
100
101  /**
102   * Create a thread local MD5 digester
103   */
104  public static MessageDigest getDigester() {
105    MessageDigest digester = DIGESTER_FACTORY.get();
106    digester.reset();
107    return digester;
108  }
109
110  /** Construct a hash value for the content from the InputStream. */
111  public static MD5Hash digest(InputStream in) throws IOException {
112    final byte[] buffer = new byte[4*1024]; 
113
114    final MessageDigest digester = getDigester();
115    for(int n; (n = in.read(buffer)) != -1; ) {
116      digester.update(buffer, 0, n);
117    }
118
119    return new MD5Hash(digester.digest());
120  }
121
122  /** Construct a hash value for a byte array. */
123  public static MD5Hash digest(byte[] data, int start, int len) {
124    byte[] digest;
125    MessageDigest digester = getDigester();
126    digester.update(data, start, len);
127    digest = digester.digest();
128    return new MD5Hash(digest);
129  }
130
131  /** Construct a hash value for an array of byte array. */
132  public static MD5Hash digest(byte[][] dataArr, int start, int len) {
133    byte[] digest;
134    MessageDigest digester = getDigester();
135    for (byte[] data : dataArr) {
136      digester.update(data, start, len);
137    }
138    digest = digester.digest();
139    return new MD5Hash(digest);
140  }
141
142  /** Construct a hash value for a String. */
143  public static MD5Hash digest(String string) {
144    return digest(UTF8.getBytes(string));
145  }
146
147  /** Construct a hash value for a String. */
148  public static MD5Hash digest(UTF8 utf8) {
149    return digest(utf8.getBytes(), 0, utf8.getLength());
150  }
151
152  /** Construct a half-sized version of this MD5.  Fits in a long **/
153  public long halfDigest() {
154    long value = 0;
155    for (int i = 0; i < 8; i++)
156      value |= ((digest[i] & 0xffL) << (8*(7-i)));
157    return value;
158  }
159
160  /**
161   * Return a 32-bit digest of the MD5.
162   * @return the first 4 bytes of the md5
163   */
164  public int quarterDigest() {
165    int value = 0;
166    for (int i = 0; i < 4; i++)
167      value |= ((digest[i] & 0xff) << (8*(3-i)));
168    return value;    
169  }
170
171  /** Returns true iff <code>o</code> is an MD5Hash whose digest contains the
172   * same values.  */
173  @Override
174  public boolean equals(Object o) {
175    if (!(o instanceof MD5Hash))
176      return false;
177    MD5Hash other = (MD5Hash)o;
178    return Arrays.equals(this.digest, other.digest);
179  }
180
181  /** Returns a hash code value for this object.
182   * Only uses the first 4 bytes, since md5s are evenly distributed.
183   */
184  @Override
185  public int hashCode() {
186    return quarterDigest();
187  }
188
189
190  /** Compares this object with the specified object for order.*/
191  @Override
192  public int compareTo(MD5Hash that) {
193    return WritableComparator.compareBytes(this.digest, 0, MD5_LEN,
194                                           that.digest, 0, MD5_LEN);
195  }
196
197  /** A WritableComparator optimized for MD5Hash keys. */
198  public static class Comparator extends WritableComparator {
199    public Comparator() {
200      super(MD5Hash.class);
201    }
202
203    @Override
204    public int compare(byte[] b1, int s1, int l1,
205                       byte[] b2, int s2, int l2) {
206      return compareBytes(b1, s1, MD5_LEN, b2, s2, MD5_LEN);
207    }
208  }
209
210  static {                                        // register this comparator
211    WritableComparator.define(MD5Hash.class, new Comparator());
212  }
213
214  private static final char[] HEX_DIGITS =
215  {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
216
217  /** Returns a string representation of this object. */
218  @Override
219  public String toString() {
220    StringBuilder buf = new StringBuilder(MD5_LEN*2);
221    for (int i = 0; i < MD5_LEN; i++) {
222      int b = digest[i];
223      buf.append(HEX_DIGITS[(b >> 4) & 0xf]);
224      buf.append(HEX_DIGITS[b & 0xf]);
225    }
226    return buf.toString();
227  }
228
229  /** Sets the digest value from a hex string. */
230  public void setDigest(String hex) {
231    if (hex.length() != MD5_LEN*2)
232      throw new IllegalArgumentException("Wrong length: " + hex.length());
233    byte[] digest = new byte[MD5_LEN];
234    for (int i = 0; i < MD5_LEN; i++) {
235      int j = i << 1;
236      digest[i] = (byte)(charToNibble(hex.charAt(j)) << 4 |
237                         charToNibble(hex.charAt(j+1)));
238    }
239    this.digest = digest;
240  }
241
242  private static final int charToNibble(char c) {
243    if (c >= '0' && c <= '9') {
244      return c - '0';
245    } else if (c >= 'a' && c <= 'f') {
246      return 0xa + (c - 'a');
247    } else if (c >= 'A' && c <= 'F') {
248      return 0xA + (c - 'A');
249    } else {
250      throw new RuntimeException("Not a hex character: " + c);
251    }
252  }
253
254
255}