001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.io; 020 021import java.io.IOException; 022import java.io.DataInput; 023import java.io.DataOutput; 024import java.io.InputStream; 025import java.util.Arrays; 026import java.security.*; 027 028import org.apache.hadoop.classification.InterfaceAudience; 029import org.apache.hadoop.classification.InterfaceStability; 030 031/** A Writable for MD5 hash values. 032 */ 033@InterfaceAudience.Public 034@InterfaceStability.Stable 035public class MD5Hash implements WritableComparable<MD5Hash> { 036 public static final int MD5_LEN = 16; 037 038 private static final ThreadLocal<MessageDigest> DIGESTER_FACTORY = 039 new ThreadLocal<MessageDigest>() { 040 @Override 041 protected MessageDigest initialValue() { 042 try { 043 return MessageDigest.getInstance("MD5"); 044 } catch (NoSuchAlgorithmException e) { 045 throw new RuntimeException(e); 046 } 047 } 048 }; 049 050 private byte[] digest; 051 052 /** Constructs an MD5Hash. */ 053 public MD5Hash() { 054 this.digest = new byte[MD5_LEN]; 055 } 056 057 /** Constructs an MD5Hash from a hex string. */ 058 public MD5Hash(String hex) { 059 setDigest(hex); 060 } 061 062 /** Constructs an MD5Hash with a specified value. */ 063 public MD5Hash(byte[] digest) { 064 if (digest.length != MD5_LEN) 065 throw new IllegalArgumentException("Wrong length: " + digest.length); 066 this.digest = digest; 067 } 068 069 // javadoc from Writable 070 @Override 071 public void readFields(DataInput in) throws IOException { 072 in.readFully(digest); 073 } 074 075 /** Constructs, reads and returns an instance. */ 076 public static MD5Hash read(DataInput in) throws IOException { 077 MD5Hash result = new MD5Hash(); 078 result.readFields(in); 079 return result; 080 } 081 082 // javadoc from Writable 083 @Override 084 public void write(DataOutput out) throws IOException { 085 out.write(digest); 086 } 087 088 /** Copy the contents of another instance into this instance. */ 089 public void set(MD5Hash that) { 090 System.arraycopy(that.digest, 0, this.digest, 0, MD5_LEN); 091 } 092 093 /** Returns the digest bytes. */ 094 public byte[] getDigest() { return digest; } 095 096 /** Construct a hash value for a byte array. */ 097 public static MD5Hash digest(byte[] data) { 098 return digest(data, 0, data.length); 099 } 100 101 /** 102 * Create a thread local MD5 digester 103 */ 104 public static MessageDigest getDigester() { 105 MessageDigest digester = DIGESTER_FACTORY.get(); 106 digester.reset(); 107 return digester; 108 } 109 110 /** Construct a hash value for the content from the InputStream. */ 111 public static MD5Hash digest(InputStream in) throws IOException { 112 final byte[] buffer = new byte[4*1024]; 113 114 final MessageDigest digester = getDigester(); 115 for(int n; (n = in.read(buffer)) != -1; ) { 116 digester.update(buffer, 0, n); 117 } 118 119 return new MD5Hash(digester.digest()); 120 } 121 122 /** Construct a hash value for a byte array. */ 123 public static MD5Hash digest(byte[] data, int start, int len) { 124 byte[] digest; 125 MessageDigest digester = getDigester(); 126 digester.update(data, start, len); 127 digest = digester.digest(); 128 return new MD5Hash(digest); 129 } 130 131 /** Construct a hash value for an array of byte array. */ 132 public static MD5Hash digest(byte[][] dataArr, int start, int len) { 133 byte[] digest; 134 MessageDigest digester = getDigester(); 135 for (byte[] data : dataArr) { 136 digester.update(data, start, len); 137 } 138 digest = digester.digest(); 139 return new MD5Hash(digest); 140 } 141 142 /** Construct a hash value for a String. */ 143 public static MD5Hash digest(String string) { 144 return digest(UTF8.getBytes(string)); 145 } 146 147 /** Construct a hash value for a String. */ 148 public static MD5Hash digest(UTF8 utf8) { 149 return digest(utf8.getBytes(), 0, utf8.getLength()); 150 } 151 152 /** Construct a half-sized version of this MD5. Fits in a long **/ 153 public long halfDigest() { 154 long value = 0; 155 for (int i = 0; i < 8; i++) 156 value |= ((digest[i] & 0xffL) << (8*(7-i))); 157 return value; 158 } 159 160 /** 161 * Return a 32-bit digest of the MD5. 162 * @return the first 4 bytes of the md5 163 */ 164 public int quarterDigest() { 165 int value = 0; 166 for (int i = 0; i < 4; i++) 167 value |= ((digest[i] & 0xff) << (8*(3-i))); 168 return value; 169 } 170 171 /** Returns true iff <code>o</code> is an MD5Hash whose digest contains the 172 * same values. */ 173 @Override 174 public boolean equals(Object o) { 175 if (!(o instanceof MD5Hash)) 176 return false; 177 MD5Hash other = (MD5Hash)o; 178 return Arrays.equals(this.digest, other.digest); 179 } 180 181 /** Returns a hash code value for this object. 182 * Only uses the first 4 bytes, since md5s are evenly distributed. 183 */ 184 @Override 185 public int hashCode() { 186 return quarterDigest(); 187 } 188 189 190 /** Compares this object with the specified object for order.*/ 191 @Override 192 public int compareTo(MD5Hash that) { 193 return WritableComparator.compareBytes(this.digest, 0, MD5_LEN, 194 that.digest, 0, MD5_LEN); 195 } 196 197 /** A WritableComparator optimized for MD5Hash keys. */ 198 public static class Comparator extends WritableComparator { 199 public Comparator() { 200 super(MD5Hash.class); 201 } 202 203 @Override 204 public int compare(byte[] b1, int s1, int l1, 205 byte[] b2, int s2, int l2) { 206 return compareBytes(b1, s1, MD5_LEN, b2, s2, MD5_LEN); 207 } 208 } 209 210 static { // register this comparator 211 WritableComparator.define(MD5Hash.class, new Comparator()); 212 } 213 214 private static final char[] HEX_DIGITS = 215 {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; 216 217 /** Returns a string representation of this object. */ 218 @Override 219 public String toString() { 220 StringBuilder buf = new StringBuilder(MD5_LEN*2); 221 for (int i = 0; i < MD5_LEN; i++) { 222 int b = digest[i]; 223 buf.append(HEX_DIGITS[(b >> 4) & 0xf]); 224 buf.append(HEX_DIGITS[b & 0xf]); 225 } 226 return buf.toString(); 227 } 228 229 /** Sets the digest value from a hex string. */ 230 public void setDigest(String hex) { 231 if (hex.length() != MD5_LEN*2) 232 throw new IllegalArgumentException("Wrong length: " + hex.length()); 233 byte[] digest = new byte[MD5_LEN]; 234 for (int i = 0; i < MD5_LEN; i++) { 235 int j = i << 1; 236 digest[i] = (byte)(charToNibble(hex.charAt(j)) << 4 | 237 charToNibble(hex.charAt(j+1))); 238 } 239 this.digest = digest; 240 } 241 242 private static final int charToNibble(char c) { 243 if (c >= '0' && c <= '9') { 244 return c - '0'; 245 } else if (c >= 'a' && c <= 'f') { 246 return 0xa + (c - 'a'); 247 } else if (c >= 'A' && c <= 'F') { 248 return 0xA + (c - 'A'); 249 } else { 250 throw new RuntimeException("Not a hex character: " + c); 251 } 252 } 253 254 255}