glob.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. """Filename globbing utility."""
  2. import os
  3. import re
  4. import fnmatch
  5. import sys
  6. __all__ = ["glob", "iglob", "escape"]
  7. def glob(pathname, *, recursive=False):
  8. """Return a list of paths matching a pathname pattern.
  9. The pattern may contain simple shell-style wildcards a la
  10. fnmatch. However, unlike fnmatch, filenames starting with a
  11. dot are special cases that are not matched by '*' and '?'
  12. patterns.
  13. If recursive is true, the pattern '**' will match any files and
  14. zero or more directories and subdirectories.
  15. """
  16. return list(iglob(pathname, recursive=recursive))
  17. def iglob(pathname, *, recursive=False):
  18. """Return an iterator which yields the paths matching a pathname pattern.
  19. The pattern may contain simple shell-style wildcards a la
  20. fnmatch. However, unlike fnmatch, filenames starting with a
  21. dot are special cases that are not matched by '*' and '?'
  22. patterns.
  23. If recursive is true, the pattern '**' will match any files and
  24. zero or more directories and subdirectories.
  25. """
  26. sys.audit("glob.glob", pathname, recursive)
  27. it = _iglob(pathname, recursive, False)
  28. if recursive and _isrecursive(pathname):
  29. s = next(it) # skip empty string
  30. assert not s
  31. return it
  32. def _iglob(pathname, recursive, dironly):
  33. dirname, basename = os.path.split(pathname)
  34. if not has_magic(pathname):
  35. assert not dironly
  36. if basename:
  37. if os.path.lexists(pathname):
  38. yield pathname
  39. else:
  40. # Patterns ending with a slash should match only directories
  41. if os.path.isdir(dirname):
  42. yield pathname
  43. return
  44. if not dirname:
  45. if recursive and _isrecursive(basename):
  46. yield from _glob2(dirname, basename, dironly)
  47. else:
  48. yield from _glob1(dirname, basename, dironly)
  49. return
  50. # `os.path.split()` returns the argument itself as a dirname if it is a
  51. # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
  52. # contains magic characters (i.e. r'\\?\C:').
  53. if dirname != pathname and has_magic(dirname):
  54. dirs = _iglob(dirname, recursive, True)
  55. else:
  56. dirs = [dirname]
  57. if has_magic(basename):
  58. if recursive and _isrecursive(basename):
  59. glob_in_dir = _glob2
  60. else:
  61. glob_in_dir = _glob1
  62. else:
  63. glob_in_dir = _glob0
  64. for dirname in dirs:
  65. for name in glob_in_dir(dirname, basename, dironly):
  66. yield os.path.join(dirname, name)
  67. # These 2 helper functions non-recursively glob inside a literal directory.
  68. # They return a list of basenames. _glob1 accepts a pattern while _glob0
  69. # takes a literal basename (so it only has to check for its existence).
  70. def _glob1(dirname, pattern, dironly):
  71. names = list(_iterdir(dirname, dironly))
  72. if not _ishidden(pattern):
  73. names = (x for x in names if not _ishidden(x))
  74. return fnmatch.filter(names, pattern)
  75. def _glob0(dirname, basename, dironly):
  76. if not basename:
  77. # `os.path.split()` returns an empty basename for paths ending with a
  78. # directory separator. 'q*x/' should match only directories.
  79. if os.path.isdir(dirname):
  80. return [basename]
  81. else:
  82. if os.path.lexists(os.path.join(dirname, basename)):
  83. return [basename]
  84. return []
  85. # Following functions are not public but can be used by third-party code.
  86. def glob0(dirname, pattern):
  87. return _glob0(dirname, pattern, False)
  88. def glob1(dirname, pattern):
  89. return _glob1(dirname, pattern, False)
  90. # This helper function recursively yields relative pathnames inside a literal
  91. # directory.
  92. def _glob2(dirname, pattern, dironly):
  93. assert _isrecursive(pattern)
  94. yield pattern[:0]
  95. yield from _rlistdir(dirname, dironly)
  96. # If dironly is false, yields all file names inside a directory.
  97. # If dironly is true, yields only directory names.
  98. def _iterdir(dirname, dironly):
  99. if not dirname:
  100. if isinstance(dirname, bytes):
  101. dirname = bytes(os.curdir, 'ASCII')
  102. else:
  103. dirname = os.curdir
  104. try:
  105. with os.scandir(dirname) as it:
  106. for entry in it:
  107. try:
  108. if not dironly or entry.is_dir():
  109. yield entry.name
  110. except OSError:
  111. pass
  112. except OSError:
  113. return
  114. # Recursively yields relative pathnames inside a literal directory.
  115. def _rlistdir(dirname, dironly):
  116. names = list(_iterdir(dirname, dironly))
  117. for x in names:
  118. if not _ishidden(x):
  119. yield x
  120. path = os.path.join(dirname, x) if dirname else x
  121. for y in _rlistdir(path, dironly):
  122. yield os.path.join(x, y)
  123. magic_check = re.compile('([*?[])')
  124. magic_check_bytes = re.compile(b'([*?[])')
  125. def has_magic(s):
  126. if isinstance(s, bytes):
  127. match = magic_check_bytes.search(s)
  128. else:
  129. match = magic_check.search(s)
  130. return match is not None
  131. def _ishidden(path):
  132. return path[0] in ('.', b'.'[0])
  133. def _isrecursive(pattern):
  134. if isinstance(pattern, bytes):
  135. return pattern == b'**'
  136. else:
  137. return pattern == '**'
  138. def escape(pathname):
  139. """Escape all special characters.
  140. """
  141. # Escaping is done by wrapping any of "*?[" between square brackets.
  142. # Metacharacters do not work in the drive part and shouldn't be escaped.
  143. drive, pathname = os.path.splitdrive(pathname)
  144. if isinstance(pathname, bytes):
  145. pathname = magic_check_bytes.sub(br'[\1]', pathname)
  146. else:
  147. pathname = magic_check.sub(r'[\1]', pathname)
  148. return drive + pathname