001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.fs; 020 021import java.io.IOException; 022import java.io.InvalidObjectException; 023import java.io.ObjectInputValidation; 024import java.io.Serializable; 025import java.net.URI; 026import java.net.URISyntaxException; 027import java.util.regex.Pattern; 028 029import org.apache.avro.reflect.Stringable; 030import org.apache.commons.lang.StringUtils; 031import org.apache.hadoop.HadoopIllegalArgumentException; 032import org.apache.hadoop.classification.InterfaceAudience; 033import org.apache.hadoop.classification.InterfaceStability; 034import org.apache.hadoop.conf.Configuration; 035 036/** 037 * Names a file or directory in a {@link FileSystem}. 038 * Path strings use slash as the directory separator. 039 */ 040@Stringable 041@InterfaceAudience.Public 042@InterfaceStability.Stable 043public class Path implements Comparable, Serializable, ObjectInputValidation { 044 045 /** 046 * The directory separator, a slash. 047 */ 048 public static final String SEPARATOR = "/"; 049 050 /** 051 * The directory separator, a slash, as a character. 052 */ 053 public static final char SEPARATOR_CHAR = '/'; 054 055 /** 056 * The current directory, ".". 057 */ 058 public static final String CUR_DIR = "."; 059 060 /** 061 * Whether the current host is a Windows machine. 062 */ 063 public static final boolean WINDOWS = 064 System.getProperty("os.name").startsWith("Windows"); 065 066 /** 067 * Pre-compiled regular expressions to detect path formats. 068 */ 069 private static final Pattern HAS_DRIVE_LETTER_SPECIFIER = 070 Pattern.compile("^/?[a-zA-Z]:"); 071 072 private static final long serialVersionUID = 0xad00f; 073 074 private URI uri; // a hierarchical uri 075 076 /** 077 * Test whether this Path uses a scheme and is relative. 078 * Pathnames with scheme and relative path are illegal. 079 */ 080 void checkNotSchemeWithRelative() { 081 if (toUri().isAbsolute() && !isUriPathAbsolute()) { 082 throw new HadoopIllegalArgumentException( 083 "Unsupported name: has scheme but relative path-part"); 084 } 085 } 086 087 void checkNotRelative() { 088 if (!isAbsolute() && toUri().getScheme() == null) { 089 throw new HadoopIllegalArgumentException("Path is relative"); 090 } 091 } 092 093 /** 094 * Return a version of the given Path without the scheme information. 095 * 096 * @param path the source Path 097 * @return a copy of this Path without the scheme information 098 */ 099 public static Path getPathWithoutSchemeAndAuthority(Path path) { 100 // This code depends on Path.toString() to remove the leading slash before 101 // the drive specification on Windows. 102 Path newPath = path.isUriPathAbsolute() ? 103 new Path(null, null, path.toUri().getPath()) : 104 path; 105 return newPath; 106 } 107 108 /** 109 * Create a new Path based on the child path resolved against the parent path. 110 * 111 * @param parent the parent path 112 * @param child the child path 113 */ 114 public Path(String parent, String child) { 115 this(new Path(parent), new Path(child)); 116 } 117 118 /** 119 * Create a new Path based on the child path resolved against the parent path. 120 * 121 * @param parent the parent path 122 * @param child the child path 123 */ 124 public Path(Path parent, String child) { 125 this(parent, new Path(child)); 126 } 127 128 /** 129 * Create a new Path based on the child path resolved against the parent path. 130 * 131 * @param parent the parent path 132 * @param child the child path 133 */ 134 public Path(String parent, Path child) { 135 this(new Path(parent), child); 136 } 137 138 /** 139 * Create a new Path based on the child path resolved against the parent path. 140 * 141 * @param parent the parent path 142 * @param child the child path 143 */ 144 public Path(Path parent, Path child) { 145 // Add a slash to parent's path so resolution is compatible with URI's 146 URI parentUri = parent.uri; 147 String parentPath = parentUri.getPath(); 148 if (!(parentPath.equals("/") || parentPath.isEmpty())) { 149 try { 150 parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(), 151 parentUri.getPath()+"/", null, parentUri.getFragment()); 152 } catch (URISyntaxException e) { 153 throw new IllegalArgumentException(e); 154 } 155 } 156 URI resolved = parentUri.resolve(child.uri); 157 initialize(resolved.getScheme(), resolved.getAuthority(), 158 resolved.getPath(), resolved.getFragment()); 159 } 160 161 private void checkPathArg( String path ) throws IllegalArgumentException { 162 // disallow construction of a Path from an empty string 163 if ( path == null ) { 164 throw new IllegalArgumentException( 165 "Can not create a Path from a null string"); 166 } 167 if( path.length() == 0 ) { 168 throw new IllegalArgumentException( 169 "Can not create a Path from an empty string"); 170 } 171 } 172 173 /** 174 * Construct a path from a String. Path strings are URIs, but with 175 * unescaped elements and some additional normalization. 176 * 177 * @param pathString the path string 178 */ 179 public Path(String pathString) throws IllegalArgumentException { 180 checkPathArg( pathString ); 181 182 // We can't use 'new URI(String)' directly, since it assumes things are 183 // escaped, which we don't require of Paths. 184 185 // add a slash in front of paths with Windows drive letters 186 if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') { 187 pathString = "/" + pathString; 188 } 189 190 // parse uri components 191 String scheme = null; 192 String authority = null; 193 194 int start = 0; 195 196 // parse uri scheme, if any 197 int colon = pathString.indexOf(':'); 198 int slash = pathString.indexOf('/'); 199 if ((colon != -1) && 200 ((slash == -1) || (colon < slash))) { // has a scheme 201 scheme = pathString.substring(0, colon); 202 start = colon+1; 203 } 204 205 // parse uri authority, if any 206 if (pathString.startsWith("//", start) && 207 (pathString.length()-start > 2)) { // has authority 208 int nextSlash = pathString.indexOf('/', start+2); 209 int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); 210 authority = pathString.substring(start+2, authEnd); 211 start = authEnd; 212 } 213 214 // uri path is the rest of the string -- query & fragment not supported 215 String path = pathString.substring(start, pathString.length()); 216 217 initialize(scheme, authority, path, null); 218 } 219 220 /** 221 * Construct a path from a URI 222 * 223 * @param aUri the source URI 224 */ 225 public Path(URI aUri) { 226 uri = aUri.normalize(); 227 } 228 229 /** 230 * Construct a Path from components. 231 * 232 * @param scheme the scheme 233 * @param authority the authority 234 * @param path the path 235 */ 236 public Path(String scheme, String authority, String path) { 237 checkPathArg( path ); 238 239 // add a slash in front of paths with Windows drive letters 240 if (hasWindowsDrive(path) && path.charAt(0) != '/') { 241 path = "/" + path; 242 } 243 244 // add "./" in front of Linux relative paths so that a path containing 245 // a colon e.q. "a:b" will not be interpreted as scheme "a". 246 if (!WINDOWS && path.charAt(0) != '/') { 247 path = "./" + path; 248 } 249 250 initialize(scheme, authority, path, null); 251 } 252 253 private void initialize(String scheme, String authority, String path, 254 String fragment) { 255 try { 256 this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment) 257 .normalize(); 258 } catch (URISyntaxException e) { 259 throw new IllegalArgumentException(e); 260 } 261 } 262 263 /** 264 * Merge 2 paths such that the second path is appended relative to the first. 265 * The returned path has the scheme and authority of the first path. On 266 * Windows, the drive specification in the second path is discarded. 267 * 268 * @param path1 the first path 269 * @param path2 the second path, to be appended relative to path1 270 * @return the merged path 271 */ 272 public static Path mergePaths(Path path1, Path path2) { 273 String path2Str = path2.toUri().getPath(); 274 path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str)); 275 // Add path components explicitly, because simply concatenating two path 276 // string is not safe, for example: 277 // "/" + "/foo" yields "//foo", which will be parsed as authority in Path 278 return new Path(path1.toUri().getScheme(), 279 path1.toUri().getAuthority(), 280 path1.toUri().getPath() + path2Str); 281 } 282 283 /** 284 * Normalize a path string to use non-duplicated forward slashes as 285 * the path separator and remove any trailing path separators. 286 * 287 * @param scheme the URI scheme. Used to deduce whether we 288 * should replace backslashes or not 289 * @param path the scheme-specific part 290 * @return the normalized path string 291 */ 292 private static String normalizePath(String scheme, String path) { 293 // Remove double forward slashes. 294 path = StringUtils.replace(path, "//", "/"); 295 296 // Remove backslashes if this looks like a Windows path. Avoid 297 // the substitution if it looks like a non-local URI. 298 if (WINDOWS && 299 (hasWindowsDrive(path) || 300 (scheme == null) || 301 (scheme.isEmpty()) || 302 (scheme.equals("file")))) { 303 path = StringUtils.replace(path, "\\", "/"); 304 } 305 306 // trim trailing slash from non-root path (ignoring windows drive) 307 int minLength = startPositionWithoutWindowsDrive(path) + 1; 308 if (path.length() > minLength && path.endsWith(SEPARATOR)) { 309 path = path.substring(0, path.length()-1); 310 } 311 312 return path; 313 } 314 315 private static boolean hasWindowsDrive(String path) { 316 return (WINDOWS && HAS_DRIVE_LETTER_SPECIFIER.matcher(path).find()); 317 } 318 319 private static int startPositionWithoutWindowsDrive(String path) { 320 if (hasWindowsDrive(path)) { 321 return path.charAt(0) == SEPARATOR_CHAR ? 3 : 2; 322 } else { 323 return 0; 324 } 325 } 326 327 /** 328 * Determine whether a given path string represents an absolute path on 329 * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not. 330 * 331 * @param pathString the path string to evaluate 332 * @param slashed true if the given path is prefixed with "/" 333 * @return true if the supplied path looks like an absolute path with a Windows 334 * drive-specifier 335 */ 336 public static boolean isWindowsAbsolutePath(final String pathString, 337 final boolean slashed) { 338 int start = startPositionWithoutWindowsDrive(pathString); 339 return start > 0 340 && pathString.length() > start 341 && ((pathString.charAt(start) == SEPARATOR_CHAR) || 342 (pathString.charAt(start) == '\\')); 343 } 344 345 /** 346 * Convert this Path to a URI. 347 * 348 * @return this Path as a URI 349 */ 350 public URI toUri() { return uri; } 351 352 /** 353 * Return the FileSystem that owns this Path. 354 * 355 * @param conf the configuration to use when resolving the FileSystem 356 * @return the FileSystem that owns this Path 357 * @throws java.io.IOException thrown if there's an issue resolving the 358 * FileSystem 359 */ 360 public FileSystem getFileSystem(Configuration conf) throws IOException { 361 return FileSystem.get(this.toUri(), conf); 362 } 363 364 /** 365 * Returns true if the path component (i.e. directory) of this URI is 366 * absolute <strong>and</strong> the scheme is null, <b>and</b> the authority 367 * is null. 368 * 369 * @return whether the path is absolute and the URI has no scheme nor 370 * authority parts 371 */ 372 public boolean isAbsoluteAndSchemeAuthorityNull() { 373 return (isUriPathAbsolute() && 374 uri.getScheme() == null && uri.getAuthority() == null); 375 } 376 377 /** 378 * Returns true if the path component (i.e. directory) of this URI is 379 * absolute. 380 * 381 * @return whether this URI's path is absolute 382 */ 383 public boolean isUriPathAbsolute() { 384 int start = startPositionWithoutWindowsDrive(uri.getPath()); 385 return uri.getPath().startsWith(SEPARATOR, start); 386 } 387 388 /** 389 * Returns true if the path component (i.e. directory) of this URI is 390 * absolute. This method is a wrapper for {@link #isUriPathAbsolute()}. 391 * 392 * @return whether this URI's path is absolute 393 */ 394 public boolean isAbsolute() { 395 return isUriPathAbsolute(); 396 } 397 398 /** 399 * Returns true if and only if this path represents the root of a file system. 400 * 401 * @return true if and only if this path represents the root of a file system 402 */ 403 public boolean isRoot() { 404 return getParent() == null; 405 } 406 407 /** 408 * Returns the final component of this path. 409 * 410 * @return the final component of this path 411 */ 412 public String getName() { 413 String path = uri.getPath(); 414 int slash = path.lastIndexOf(SEPARATOR); 415 return path.substring(slash+1); 416 } 417 418 /** 419 * Returns the parent of a path or null if at root. 420 * @return the parent of a path or null if at root 421 */ 422 public Path getParent() { 423 String path = uri.getPath(); 424 int lastSlash = path.lastIndexOf('/'); 425 int start = startPositionWithoutWindowsDrive(path); 426 if ((path.length() == start) || // empty path 427 (lastSlash == start && path.length() == start+1)) { // at root 428 return null; 429 } 430 String parent; 431 if (lastSlash==-1) { 432 parent = CUR_DIR; 433 } else { 434 parent = path.substring(0, lastSlash==start?start+1:lastSlash); 435 } 436 return new Path(uri.getScheme(), uri.getAuthority(), parent); 437 } 438 439 /** 440 * Adds a suffix to the final name in the path. 441 * 442 * @param suffix the suffix to add 443 * @return a new path with the suffix added 444 */ 445 public Path suffix(String suffix) { 446 return new Path(getParent(), getName()+suffix); 447 } 448 449 @Override 450 public String toString() { 451 // we can't use uri.toString(), which escapes everything, because we want 452 // illegal characters unescaped in the string, for glob processing, etc. 453 StringBuilder buffer = new StringBuilder(); 454 if (uri.getScheme() != null) { 455 buffer.append(uri.getScheme()); 456 buffer.append(":"); 457 } 458 if (uri.getAuthority() != null) { 459 buffer.append("//"); 460 buffer.append(uri.getAuthority()); 461 } 462 if (uri.getPath() != null) { 463 String path = uri.getPath(); 464 if (path.indexOf('/')==0 && 465 hasWindowsDrive(path) && // has windows drive 466 uri.getScheme() == null && // but no scheme 467 uri.getAuthority() == null) // or authority 468 path = path.substring(1); // remove slash before drive 469 buffer.append(path); 470 } 471 if (uri.getFragment() != null) { 472 buffer.append("#"); 473 buffer.append(uri.getFragment()); 474 } 475 return buffer.toString(); 476 } 477 478 @Override 479 public boolean equals(Object o) { 480 if (!(o instanceof Path)) { 481 return false; 482 } 483 Path that = (Path)o; 484 return this.uri.equals(that.uri); 485 } 486 487 @Override 488 public int hashCode() { 489 return uri.hashCode(); 490 } 491 492 @Override 493 public int compareTo(Object o) { 494 Path that = (Path)o; 495 return this.uri.compareTo(that.uri); 496 } 497 498 /** 499 * Returns the number of elements in this path. 500 * @return the number of elements in this path 501 */ 502 public int depth() { 503 String path = uri.getPath(); 504 int depth = 0; 505 int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0; 506 while (slash != -1) { 507 depth++; 508 slash = path.indexOf(SEPARATOR, slash+1); 509 } 510 return depth; 511 } 512 513 /** 514 * Returns a qualified path object for the {@link FileSystem}'s working 515 * directory. 516 * 517 * @param fs the target FileSystem 518 * @return a qualified path object for the FileSystem's working directory 519 * @deprecated use {@link #makeQualified(URI, Path)} 520 */ 521 @Deprecated 522 public Path makeQualified(FileSystem fs) { 523 return makeQualified(fs.getUri(), fs.getWorkingDirectory()); 524 } 525 526 /** 527 * Returns a qualified path object. 528 * 529 * @param defaultUri if this path is missing the scheme or authority 530 * components, borrow them from this URI 531 * @param workingDir if this path isn't absolute, treat it as relative to this 532 * working directory 533 * @return this path if it contains a scheme and authority and is absolute, or 534 * a new path that includes a path and authority and is fully qualified 535 */ 536 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 537 public Path makeQualified(URI defaultUri, Path workingDir ) { 538 Path path = this; 539 if (!isAbsolute()) { 540 path = new Path(workingDir, this); 541 } 542 543 URI pathUri = path.toUri(); 544 545 String scheme = pathUri.getScheme(); 546 String authority = pathUri.getAuthority(); 547 String fragment = pathUri.getFragment(); 548 549 if (scheme != null && 550 (authority != null || defaultUri.getAuthority() == null)) 551 return path; 552 553 if (scheme == null) { 554 scheme = defaultUri.getScheme(); 555 } 556 557 if (authority == null) { 558 authority = defaultUri.getAuthority(); 559 if (authority == null) { 560 authority = ""; 561 } 562 } 563 564 URI newUri = null; 565 try { 566 newUri = new URI(scheme, authority , 567 normalizePath(scheme, pathUri.getPath()), null, fragment); 568 } catch (URISyntaxException e) { 569 throw new IllegalArgumentException(e); 570 } 571 return new Path(newUri); 572 } 573 574 /** 575 * Validate the contents of a deserialized Path, so as 576 * to defend against malicious object streams. 577 * @throws InvalidObjectException if there's no URI 578 */ 579 @Override 580 public void validateObject() throws InvalidObjectException { 581 if (uri == null) { 582 throw new InvalidObjectException("No URI in deserialized Path"); 583 } 584 585 } 586}