4
4
import os
5
5
import re
6
6
import fnmatch
7
+ import functools
7
8
import itertools
9
+ import operator
8
10
import stat
9
11
import sys
10
12
11
- __all__ = ["glob" , "iglob" , "escape" ]
13
+
14
+ __all__ = ["glob" , "iglob" , "escape" , "translate" ]
12
15
13
16
def glob (pathname , * , root_dir = None , dir_fd = None , recursive = False ,
14
17
include_hidden = False ):
@@ -104,8 +107,8 @@ def _iglob(pathname, root_dir, dir_fd, recursive, dironly,
104
107
105
108
def _glob1 (dirname , pattern , dir_fd , dironly , include_hidden = False ):
106
109
names = _listdir (dirname , dir_fd , dironly )
107
- if include_hidden or not _ishidden (pattern ):
108
- names = (x for x in names if include_hidden or not _ishidden (x ))
110
+ if not ( include_hidden or _ishidden (pattern ) ):
111
+ names = (x for x in names if not _ishidden (x ))
109
112
return fnmatch .filter (names , pattern )
110
113
111
114
def _glob0 (dirname , basename , dir_fd , dironly , include_hidden = False ):
@@ -119,12 +122,19 @@ def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False):
119
122
return [basename ]
120
123
return []
121
124
122
- # Following functions are not public but can be used by third-party code.
125
+ _deprecated_function_message = (
126
+ "{name} is deprecated and will be removed in Python {remove}. Use "
127
+ "glob.glob and pass a directory to its root_dir argument instead."
128
+ )
123
129
124
130
def glob0 (dirname , pattern ):
131
+ import warnings
132
+ warnings ._deprecated ("glob.glob0" , _deprecated_function_message , remove = (3 , 15 ))
125
133
return _glob0 (dirname , pattern , None , False )
126
134
127
135
def glob1 (dirname , pattern ):
136
+ import warnings
137
+ warnings ._deprecated ("glob.glob1" , _deprecated_function_message , remove = (3 , 15 ))
128
138
return _glob1 (dirname , pattern , None , False )
129
139
130
140
# This helper function recursively yields relative pathnames inside a literal
@@ -249,4 +259,287 @@ def escape(pathname):
249
259
return drive + pathname
250
260
251
261
262
+ _special_parts = ('' , '.' , '..' )
252
263
_dir_open_flags = os .O_RDONLY | getattr (os , 'O_DIRECTORY' , 0 )
264
+ _no_recurse_symlinks = object ()
265
+
266
+
267
+ def translate (pat , * , recursive = False , include_hidden = False , seps = None ):
268
+ """Translate a pathname with shell wildcards to a regular expression.
269
+
270
+ If `recursive` is true, the pattern segment '**' will match any number of
271
+ path segments.
272
+
273
+ If `include_hidden` is true, wildcards can match path segments beginning
274
+ with a dot ('.').
275
+
276
+ If a sequence of separator characters is given to `seps`, they will be
277
+ used to split the pattern into segments and match path separators. If not
278
+ given, os.path.sep and os.path.altsep (where available) are used.
279
+ """
280
+ if not seps :
281
+ if os .path .altsep :
282
+ seps = (os .path .sep , os .path .altsep )
283
+ else :
284
+ seps = os .path .sep
285
+ escaped_seps = '' .join (map (re .escape , seps ))
286
+ any_sep = f'[{ escaped_seps } ]' if len (seps ) > 1 else escaped_seps
287
+ not_sep = f'[^{ escaped_seps } ]'
288
+ if include_hidden :
289
+ one_last_segment = f'{ not_sep } +'
290
+ one_segment = f'{ one_last_segment } { any_sep } '
291
+ any_segments = f'(?:.+{ any_sep } )?'
292
+ any_last_segments = '.*'
293
+ else :
294
+ one_last_segment = f'[^{ escaped_seps } .]{ not_sep } *'
295
+ one_segment = f'{ one_last_segment } { any_sep } '
296
+ any_segments = f'(?:{ one_segment } )*'
297
+ any_last_segments = f'{ any_segments } (?:{ one_last_segment } )?'
298
+
299
+ results = []
300
+ parts = re .split (any_sep , pat )
301
+ last_part_idx = len (parts ) - 1
302
+ for idx , part in enumerate (parts ):
303
+ if part == '*' :
304
+ results .append (one_segment if idx < last_part_idx else one_last_segment )
305
+ elif recursive and part == '**' :
306
+ if idx < last_part_idx :
307
+ if parts [idx + 1 ] != '**' :
308
+ results .append (any_segments )
309
+ else :
310
+ results .append (any_last_segments )
311
+ else :
312
+ if part :
313
+ if not include_hidden and part [0 ] in '*?' :
314
+ results .append (r'(?!\.)' )
315
+ results .extend (fnmatch ._translate (part , f'{ not_sep } *' , not_sep ))
316
+ if idx < last_part_idx :
317
+ results .append (any_sep )
318
+ res = '' .join (results )
319
+ return fr'(?s:{ res } )\Z'
320
+
321
+
322
+ @functools .lru_cache (maxsize = 512 )
323
+ def _compile_pattern (pat , sep , case_sensitive , recursive = True ):
324
+ """Compile given glob pattern to a re.Pattern object (observing case
325
+ sensitivity)."""
326
+ flags = re .NOFLAG if case_sensitive else re .IGNORECASE
327
+ regex = translate (pat , recursive = recursive , include_hidden = True , seps = sep )
328
+ return re .compile (regex , flags = flags ).match
329
+
330
+
331
+ class _Globber :
332
+ """Class providing shell-style pattern matching and globbing.
333
+ """
334
+
335
+ def __init__ (self , sep , case_sensitive , case_pedantic = False , recursive = False ):
336
+ self .sep = sep
337
+ self .case_sensitive = case_sensitive
338
+ self .case_pedantic = case_pedantic
339
+ self .recursive = recursive
340
+
341
+ # Low-level methods
342
+
343
+ lstat = operator .methodcaller ('lstat' )
344
+ add_slash = operator .methodcaller ('joinpath' , '' )
345
+
346
+ @staticmethod
347
+ def scandir (path ):
348
+ """Emulates os.scandir(), which returns an object that can be used as
349
+ a context manager. This method is called by walk() and glob().
350
+ """
351
+ return contextlib .nullcontext (path .iterdir ())
352
+
353
+ @staticmethod
354
+ def concat_path (path , text ):
355
+ """Appends text to the given path.
356
+ """
357
+ return path .with_segments (path ._raw_path + text )
358
+
359
+ @staticmethod
360
+ def parse_entry (entry ):
361
+ """Returns the path of an entry yielded from scandir().
362
+ """
363
+ return entry
364
+
365
+ # High-level methods
366
+
367
+ def compile (self , pat ):
368
+ return _compile_pattern (pat , self .sep , self .case_sensitive , self .recursive )
369
+
370
+ def selector (self , parts ):
371
+ """Returns a function that selects from a given path, walking and
372
+ filtering according to the glob-style pattern parts in *parts*.
373
+ """
374
+ if not parts :
375
+ return self .select_exists
376
+ part = parts .pop ()
377
+ if self .recursive and part == '**' :
378
+ selector = self .recursive_selector
379
+ elif part in _special_parts :
380
+ selector = self .special_selector
381
+ elif not self .case_pedantic and magic_check .search (part ) is None :
382
+ selector = self .literal_selector
383
+ else :
384
+ selector = self .wildcard_selector
385
+ return selector (part , parts )
386
+
387
+ def special_selector (self , part , parts ):
388
+ """Returns a function that selects special children of the given path.
389
+ """
390
+ select_next = self .selector (parts )
391
+
392
+ def select_special (path , exists = False ):
393
+ path = self .concat_path (self .add_slash (path ), part )
394
+ return select_next (path , exists )
395
+ return select_special
396
+
397
+ def literal_selector (self , part , parts ):
398
+ """Returns a function that selects a literal descendant of a path.
399
+ """
400
+
401
+ # Optimization: consume and join any subsequent literal parts here,
402
+ # rather than leaving them for the next selector. This reduces the
403
+ # number of string concatenation operations and calls to add_slash().
404
+ while parts and magic_check .search (parts [- 1 ]) is None :
405
+ part += self .sep + parts .pop ()
406
+
407
+ select_next = self .selector (parts )
408
+
409
+ def select_literal (path , exists = False ):
410
+ path = self .concat_path (self .add_slash (path ), part )
411
+ return select_next (path , exists = False )
412
+ return select_literal
413
+
414
+ def wildcard_selector (self , part , parts ):
415
+ """Returns a function that selects direct children of a given path,
416
+ filtering by pattern.
417
+ """
418
+
419
+ match = None if part == '*' else self .compile (part )
420
+ dir_only = bool (parts )
421
+ if dir_only :
422
+ select_next = self .selector (parts )
423
+
424
+ def select_wildcard (path , exists = False ):
425
+ try :
426
+ # We must close the scandir() object before proceeding to
427
+ # avoid exhausting file descriptors when globbing deep trees.
428
+ with self .scandir (path ) as scandir_it :
429
+ entries = list (scandir_it )
430
+ except OSError :
431
+ pass
432
+ else :
433
+ for entry in entries :
434
+ if match is None or match (entry .name ):
435
+ if dir_only :
436
+ try :
437
+ if not entry .is_dir ():
438
+ continue
439
+ except OSError :
440
+ continue
441
+ entry_path = self .parse_entry (entry )
442
+ if dir_only :
443
+ yield from select_next (entry_path , exists = True )
444
+ else :
445
+ yield entry_path
446
+ return select_wildcard
447
+
448
+ def recursive_selector (self , part , parts ):
449
+ """Returns a function that selects a given path and all its children,
450
+ recursively, filtering by pattern.
451
+ """
452
+ # Optimization: consume following '**' parts, which have no effect.
453
+ while parts and parts [- 1 ] == '**' :
454
+ parts .pop ()
455
+
456
+ # Optimization: consume and join any following non-special parts here,
457
+ # rather than leaving them for the next selector. They're used to
458
+ # build a regular expression, which we use to filter the results of
459
+ # the recursive walk. As a result, non-special pattern segments
460
+ # following a '**' wildcard don't require additional filesystem access
461
+ # to expand.
462
+ follow_symlinks = self .recursive is not _no_recurse_symlinks
463
+ if follow_symlinks :
464
+ while parts and parts [- 1 ] not in _special_parts :
465
+ part += self .sep + parts .pop ()
466
+
467
+ match = None if part == '**' else self .compile (part )
468
+ dir_only = bool (parts )
469
+ select_next = self .selector (parts )
470
+
471
+ def select_recursive (path , exists = False ):
472
+ path = self .add_slash (path )
473
+ match_pos = len (str (path ))
474
+ if match is None or match (str (path ), match_pos ):
475
+ yield from select_next (path , exists )
476
+ stack = [path ]
477
+ while stack :
478
+ yield from select_recursive_step (stack , match_pos )
479
+
480
+ def select_recursive_step (stack , match_pos ):
481
+ path = stack .pop ()
482
+ try :
483
+ # We must close the scandir() object before proceeding to
484
+ # avoid exhausting file descriptors when globbing deep trees.
485
+ with self .scandir (path ) as scandir_it :
486
+ entries = list (scandir_it )
487
+ except OSError :
488
+ pass
489
+ else :
490
+ for entry in entries :
491
+ is_dir = False
492
+ try :
493
+ if entry .is_dir (follow_symlinks = follow_symlinks ):
494
+ is_dir = True
495
+ except OSError :
496
+ pass
497
+
498
+ if is_dir or not dir_only :
499
+ entry_path = self .parse_entry (entry )
500
+ if match is None or match (str (entry_path ), match_pos ):
501
+ if dir_only :
502
+ yield from select_next (entry_path , exists = True )
503
+ else :
504
+ # Optimization: directly yield the path if this is
505
+ # last pattern part.
506
+ yield entry_path
507
+ if is_dir :
508
+ stack .append (entry_path )
509
+
510
+ return select_recursive
511
+
512
+ def select_exists (self , path , exists = False ):
513
+ """Yields the given path, if it exists.
514
+ """
515
+ if exists :
516
+ # Optimization: this path is already known to exist, e.g. because
517
+ # it was returned from os.scandir(), so we skip calling lstat().
518
+ yield path
519
+ else :
520
+ try :
521
+ self .lstat (path )
522
+ yield path
523
+ except OSError :
524
+ pass
525
+
526
+
527
+ class _StringGlobber (_Globber ):
528
+ lstat = staticmethod (os .lstat )
529
+ scandir = staticmethod (os .scandir )
530
+ parse_entry = operator .attrgetter ('path' )
531
+ concat_path = operator .add
532
+
533
+ if os .name == 'nt' :
534
+ @staticmethod
535
+ def add_slash (pathname ):
536
+ tail = os .path .splitroot (pathname )[2 ]
537
+ if not tail or tail [- 1 ] in '\\ /' :
538
+ return pathname
539
+ return f'{ pathname } \\ '
540
+ else :
541
+ @staticmethod
542
+ def add_slash (pathname ):
543
+ if not pathname or pathname [- 1 ] == '/' :
544
+ return pathname
545
+ return f'{ pathname } /'
0 commit comments