001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.fs;
020
021import java.io.IOException;
022import java.io.InvalidObjectException;
023import java.io.ObjectInputValidation;
024import java.io.Serializable;
025import java.net.URI;
026import java.net.URISyntaxException;
027import java.util.regex.Pattern;
028
029import org.apache.avro.reflect.Stringable;
030import org.apache.commons.lang.StringUtils;
031import org.apache.hadoop.HadoopIllegalArgumentException;
032import org.apache.hadoop.classification.InterfaceAudience;
033import org.apache.hadoop.classification.InterfaceStability;
034import org.apache.hadoop.conf.Configuration;
035
036/**
037 * Names a file or directory in a {@link FileSystem}.
038 * Path strings use slash as the directory separator.
039 */
040@Stringable
041@InterfaceAudience.Public
042@InterfaceStability.Stable
043public class Path implements Comparable, Serializable, ObjectInputValidation {
044
045  /**
046   * The directory separator, a slash.
047   */
048  public static final String SEPARATOR = "/";
049
050  /**
051   * The directory separator, a slash, as a character.
052   */
053  public static final char SEPARATOR_CHAR = '/';
054  
055  /**
056   * The current directory, ".".
057   */
058  public static final String CUR_DIR = ".";
059  
060  /**
061   * Whether the current host is a Windows machine.
062   */
063  public static final boolean WINDOWS =
064      System.getProperty("os.name").startsWith("Windows");
065
066  /**
067   *  Pre-compiled regular expressions to detect path formats.
068   */
069  private static final Pattern HAS_DRIVE_LETTER_SPECIFIER =
070      Pattern.compile("^/?[a-zA-Z]:");
071
072  private static final long serialVersionUID = 0xad00f;
073
074  private URI uri; // a hierarchical uri
075
076  /**
077   * Test whether this Path uses a scheme and is relative.
078   * Pathnames with scheme and relative path are illegal.
079   */
080  void checkNotSchemeWithRelative() {
081    if (toUri().isAbsolute() && !isUriPathAbsolute()) {
082      throw new HadoopIllegalArgumentException(
083          "Unsupported name: has scheme but relative path-part");
084    }
085  }
086
087  void checkNotRelative() {
088    if (!isAbsolute() && toUri().getScheme() == null) {
089      throw new HadoopIllegalArgumentException("Path is relative");
090    }
091  }
092
093  /**
094   * Return a version of the given Path without the scheme information.
095   *
096   * @param path the source Path
097   * @return a copy of this Path without the scheme information
098   */
099  public static Path getPathWithoutSchemeAndAuthority(Path path) {
100    // This code depends on Path.toString() to remove the leading slash before
101    // the drive specification on Windows.
102    Path newPath = path.isUriPathAbsolute() ?
103      new Path(null, null, path.toUri().getPath()) :
104      path;
105    return newPath;
106  }
107
108  /**
109   * Create a new Path based on the child path resolved against the parent path.
110   *
111   * @param parent the parent path
112   * @param child the child path
113   */
114  public Path(String parent, String child) {
115    this(new Path(parent), new Path(child));
116  }
117
118  /**
119   * Create a new Path based on the child path resolved against the parent path.
120   *
121   * @param parent the parent path
122   * @param child the child path
123   */
124  public Path(Path parent, String child) {
125    this(parent, new Path(child));
126  }
127
128  /**
129   * Create a new Path based on the child path resolved against the parent path.
130   *
131   * @param parent the parent path
132   * @param child the child path
133   */
134  public Path(String parent, Path child) {
135    this(new Path(parent), child);
136  }
137
138  /**
139   * Create a new Path based on the child path resolved against the parent path.
140   *
141   * @param parent the parent path
142   * @param child the child path
143   */
144  public Path(Path parent, Path child) {
145    // Add a slash to parent's path so resolution is compatible with URI's
146    URI parentUri = parent.uri;
147    String parentPath = parentUri.getPath();
148    if (!(parentPath.equals("/") || parentPath.isEmpty())) {
149      try {
150        parentUri = new URI(parentUri.getScheme(), parentUri.getAuthority(),
151                      parentUri.getPath()+"/", null, parentUri.getFragment());
152      } catch (URISyntaxException e) {
153        throw new IllegalArgumentException(e);
154      }
155    }
156    URI resolved = parentUri.resolve(child.uri);
157    initialize(resolved.getScheme(), resolved.getAuthority(),
158               resolved.getPath(), resolved.getFragment());
159  }
160
161  private void checkPathArg( String path ) throws IllegalArgumentException {
162    // disallow construction of a Path from an empty string
163    if ( path == null ) {
164      throw new IllegalArgumentException(
165          "Can not create a Path from a null string");
166    }
167    if( path.length() == 0 ) {
168       throw new IllegalArgumentException(
169           "Can not create a Path from an empty string");
170    }   
171  }
172  
173  /**
174   * Construct a path from a String.  Path strings are URIs, but with
175   * unescaped elements and some additional normalization.
176   *
177   * @param pathString the path string
178   */
179  public Path(String pathString) throws IllegalArgumentException {
180    checkPathArg( pathString );
181    
182    // We can't use 'new URI(String)' directly, since it assumes things are
183    // escaped, which we don't require of Paths. 
184    
185    // add a slash in front of paths with Windows drive letters
186    if (hasWindowsDrive(pathString) && pathString.charAt(0) != '/') {
187      pathString = "/" + pathString;
188    }
189
190    // parse uri components
191    String scheme = null;
192    String authority = null;
193
194    int start = 0;
195
196    // parse uri scheme, if any
197    int colon = pathString.indexOf(':');
198    int slash = pathString.indexOf('/');
199    if ((colon != -1) &&
200        ((slash == -1) || (colon < slash))) {     // has a scheme
201      scheme = pathString.substring(0, colon);
202      start = colon+1;
203    }
204
205    // parse uri authority, if any
206    if (pathString.startsWith("//", start) &&
207        (pathString.length()-start > 2)) {       // has authority
208      int nextSlash = pathString.indexOf('/', start+2);
209      int authEnd = nextSlash > 0 ? nextSlash : pathString.length();
210      authority = pathString.substring(start+2, authEnd);
211      start = authEnd;
212    }
213
214    // uri path is the rest of the string -- query & fragment not supported
215    String path = pathString.substring(start, pathString.length());
216
217    initialize(scheme, authority, path, null);
218  }
219
220  /**
221   * Construct a path from a URI
222   *
223   * @param aUri the source URI
224   */
225  public Path(URI aUri) {
226    uri = aUri.normalize();
227  }
228  
229  /**
230   * Construct a Path from components.
231   *
232   * @param scheme the scheme
233   * @param authority the authority
234   * @param path the path
235   */
236  public Path(String scheme, String authority, String path) {
237    checkPathArg( path );
238
239    // add a slash in front of paths with Windows drive letters
240    if (hasWindowsDrive(path) && path.charAt(0) != '/') {
241      path = "/" + path;
242    }
243
244    // add "./" in front of Linux relative paths so that a path containing
245    // a colon e.q. "a:b" will not be interpreted as scheme "a".
246    if (!WINDOWS && path.charAt(0) != '/') {
247      path = "./" + path;
248    }
249
250    initialize(scheme, authority, path, null);
251  }
252
253  private void initialize(String scheme, String authority, String path,
254      String fragment) {
255    try {
256      this.uri = new URI(scheme, authority, normalizePath(scheme, path), null, fragment)
257        .normalize();
258    } catch (URISyntaxException e) {
259      throw new IllegalArgumentException(e);
260    }
261  }
262
263  /**
264   * Merge 2 paths such that the second path is appended relative to the first.
265   * The returned path has the scheme and authority of the first path.  On
266   * Windows, the drive specification in the second path is discarded.
267   * 
268   * @param path1 the first path
269   * @param path2 the second path, to be appended relative to path1
270   * @return the merged path
271   */
272  public static Path mergePaths(Path path1, Path path2) {
273    String path2Str = path2.toUri().getPath();
274    path2Str = path2Str.substring(startPositionWithoutWindowsDrive(path2Str));
275    // Add path components explicitly, because simply concatenating two path
276    // string is not safe, for example:
277    // "/" + "/foo" yields "//foo", which will be parsed as authority in Path
278    return new Path(path1.toUri().getScheme(), 
279        path1.toUri().getAuthority(), 
280        path1.toUri().getPath() + path2Str);
281  }
282
283  /**
284   * Normalize a path string to use non-duplicated forward slashes as
285   * the path separator and remove any trailing path separators.
286   *
287   * @param scheme the URI scheme. Used to deduce whether we
288   * should replace backslashes or not
289   * @param path the scheme-specific part
290   * @return the normalized path string
291   */
292  private static String normalizePath(String scheme, String path) {
293    // Remove double forward slashes.
294    path = StringUtils.replace(path, "//", "/");
295
296    // Remove backslashes if this looks like a Windows path. Avoid
297    // the substitution if it looks like a non-local URI.
298    if (WINDOWS &&
299        (hasWindowsDrive(path) ||
300         (scheme == null) ||
301         (scheme.isEmpty()) ||
302         (scheme.equals("file")))) {
303      path = StringUtils.replace(path, "\\", "/");
304    }
305    
306    // trim trailing slash from non-root path (ignoring windows drive)
307    int minLength = startPositionWithoutWindowsDrive(path) + 1;
308    if (path.length() > minLength && path.endsWith(SEPARATOR)) {
309      path = path.substring(0, path.length()-1);
310    }
311    
312    return path;
313  }
314
315  private static boolean hasWindowsDrive(String path) {
316    return (WINDOWS && HAS_DRIVE_LETTER_SPECIFIER.matcher(path).find());
317  }
318
319  private static int startPositionWithoutWindowsDrive(String path) {
320    if (hasWindowsDrive(path)) {
321      return path.charAt(0) ==  SEPARATOR_CHAR ? 3 : 2;
322    } else {
323      return 0;
324    }
325  }
326  
327  /**
328   * Determine whether a given path string represents an absolute path on
329   * Windows. e.g. "C:/a/b" is an absolute path. "C:a/b" is not.
330   *
331   * @param pathString the path string to evaluate
332   * @param slashed true if the given path is prefixed with "/"
333   * @return true if the supplied path looks like an absolute path with a Windows
334   * drive-specifier
335   */
336  public static boolean isWindowsAbsolutePath(final String pathString,
337                                              final boolean slashed) {
338    int start = startPositionWithoutWindowsDrive(pathString);
339    return start > 0
340        && pathString.length() > start
341        && ((pathString.charAt(start) == SEPARATOR_CHAR) ||
342            (pathString.charAt(start) == '\\'));
343  }
344
345  /**
346   * Convert this Path to a URI.
347   *
348   * @return this Path as a URI
349   */
350  public URI toUri() { return uri; }
351
352  /**
353   * Return the FileSystem that owns this Path.
354   *
355   * @param conf the configuration to use when resolving the FileSystem
356   * @return the FileSystem that owns this Path
357   * @throws java.io.IOException thrown if there's an issue resolving the
358   * FileSystem
359   */
360  public FileSystem getFileSystem(Configuration conf) throws IOException {
361    return FileSystem.get(this.toUri(), conf);
362  }
363
364  /**
365   * Returns true if the path component (i.e. directory) of this URI is
366   * absolute <strong>and</strong> the scheme is null, <b>and</b> the authority
367   * is null.
368   *
369   * @return whether the path is absolute and the URI has no scheme nor
370   * authority parts
371   */
372  public boolean isAbsoluteAndSchemeAuthorityNull() {
373    return  (isUriPathAbsolute() && 
374        uri.getScheme() == null && uri.getAuthority() == null);
375  }
376  
377  /**
378   * Returns true if the path component (i.e. directory) of this URI is
379   * absolute.
380   *
381   * @return whether this URI's path is absolute
382   */
383  public boolean isUriPathAbsolute() {
384    int start = startPositionWithoutWindowsDrive(uri.getPath());
385    return uri.getPath().startsWith(SEPARATOR, start);
386   }
387  
388  /**
389   * Returns true if the path component (i.e. directory) of this URI is
390   * absolute.  This method is a wrapper for {@link #isUriPathAbsolute()}.
391   *
392   * @return whether this URI's path is absolute
393   */
394  public boolean isAbsolute() {
395     return isUriPathAbsolute();
396  }
397
398  /**
399   * Returns true if and only if this path represents the root of a file system.
400   *
401   * @return true if and only if this path represents the root of a file system
402   */
403  public boolean isRoot() {
404    return getParent() == null;
405  }
406
407  /**
408   * Returns the final component of this path.
409   *
410   * @return the final component of this path
411   */
412  public String getName() {
413    String path = uri.getPath();
414    int slash = path.lastIndexOf(SEPARATOR);
415    return path.substring(slash+1);
416  }
417
418  /**
419   * Returns the parent of a path or null if at root.
420   * @return the parent of a path or null if at root
421   */
422  public Path getParent() {
423    String path = uri.getPath();
424    int lastSlash = path.lastIndexOf('/');
425    int start = startPositionWithoutWindowsDrive(path);
426    if ((path.length() == start) ||               // empty path
427        (lastSlash == start && path.length() == start+1)) { // at root
428      return null;
429    }
430    String parent;
431    if (lastSlash==-1) {
432      parent = CUR_DIR;
433    } else {
434      parent = path.substring(0, lastSlash==start?start+1:lastSlash);
435    }
436    return new Path(uri.getScheme(), uri.getAuthority(), parent);
437  }
438
439  /**
440   * Adds a suffix to the final name in the path.
441   *
442   * @param suffix the suffix to add
443   * @return a new path with the suffix added
444   */
445  public Path suffix(String suffix) {
446    return new Path(getParent(), getName()+suffix);
447  }
448
449  @Override
450  public String toString() {
451    // we can't use uri.toString(), which escapes everything, because we want
452    // illegal characters unescaped in the string, for glob processing, etc.
453    StringBuilder buffer = new StringBuilder();
454    if (uri.getScheme() != null) {
455      buffer.append(uri.getScheme());
456      buffer.append(":");
457    }
458    if (uri.getAuthority() != null) {
459      buffer.append("//");
460      buffer.append(uri.getAuthority());
461    }
462    if (uri.getPath() != null) {
463      String path = uri.getPath();
464      if (path.indexOf('/')==0 &&
465          hasWindowsDrive(path) &&                // has windows drive
466          uri.getScheme() == null &&              // but no scheme
467          uri.getAuthority() == null)             // or authority
468        path = path.substring(1);                 // remove slash before drive
469      buffer.append(path);
470    }
471    if (uri.getFragment() != null) {
472      buffer.append("#");
473      buffer.append(uri.getFragment());
474    }
475    return buffer.toString();
476  }
477
478  @Override
479  public boolean equals(Object o) {
480    if (!(o instanceof Path)) {
481      return false;
482    }
483    Path that = (Path)o;
484    return this.uri.equals(that.uri);
485  }
486
487  @Override
488  public int hashCode() {
489    return uri.hashCode();
490  }
491
492  @Override
493  public int compareTo(Object o) {
494    Path that = (Path)o;
495    return this.uri.compareTo(that.uri);
496  }
497  
498  /**
499   * Returns the number of elements in this path.
500   * @return the number of elements in this path
501   */
502  public int depth() {
503    String path = uri.getPath();
504    int depth = 0;
505    int slash = path.length()==1 && path.charAt(0)=='/' ? -1 : 0;
506    while (slash != -1) {
507      depth++;
508      slash = path.indexOf(SEPARATOR, slash+1);
509    }
510    return depth;
511  }
512
513  /**
514   * Returns a qualified path object for the {@link FileSystem}'s working
515   * directory.
516   *  
517   * @param fs the target FileSystem
518   * @return a qualified path object for the FileSystem's working directory
519   * @deprecated use {@link #makeQualified(URI, Path)}
520   */
521  @Deprecated
522  public Path makeQualified(FileSystem fs) {
523    return makeQualified(fs.getUri(), fs.getWorkingDirectory());
524  }
525  
526  /**
527   * Returns a qualified path object.
528   *
529   * @param defaultUri if this path is missing the scheme or authority
530   * components, borrow them from this URI
531   * @param workingDir if this path isn't absolute, treat it as relative to this
532   * working directory
533   * @return this path if it contains a scheme and authority and is absolute, or
534   * a new path that includes a path and authority and is fully qualified
535   */
536  @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
537  public Path makeQualified(URI defaultUri, Path workingDir ) {
538    Path path = this;
539    if (!isAbsolute()) {
540      path = new Path(workingDir, this);
541    }
542
543    URI pathUri = path.toUri();
544      
545    String scheme = pathUri.getScheme();
546    String authority = pathUri.getAuthority();
547    String fragment = pathUri.getFragment();
548
549    if (scheme != null &&
550        (authority != null || defaultUri.getAuthority() == null))
551      return path;
552
553    if (scheme == null) {
554      scheme = defaultUri.getScheme();
555    }
556
557    if (authority == null) {
558      authority = defaultUri.getAuthority();
559      if (authority == null) {
560        authority = "";
561      }
562    }
563    
564    URI newUri = null;
565    try {
566      newUri = new URI(scheme, authority , 
567        normalizePath(scheme, pathUri.getPath()), null, fragment);
568    } catch (URISyntaxException e) {
569      throw new IllegalArgumentException(e);
570    }
571    return new Path(newUri);
572  }
573
574  /**
575   * Validate the contents of a deserialized Path, so as
576   * to defend against malicious object streams.
577   * @throws InvalidObjectException if there's no URI
578   */
579  @Override
580  public void validateObject() throws InvalidObjectException {
581    if (uri == null) {
582      throw new InvalidObjectException("No URI in deserialized Path");
583    }
584
585  }
586}