Skip to content

Commit

Permalink
gh-106752: Sync with zipp 3.16.2 (#106757)
Browse files Browse the repository at this point in the history
* gh-106752: Sync with zipp 3.16.2

* Add blurb
  • Loading branch information
jaraco authored Jul 15, 2023
1 parent 2566b74 commit 22980dc
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 27 deletions.
81 changes: 80 additions & 1 deletion Lib/test/test_zipfile/_path/test_complexity.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import unittest
import io
import itertools
import math
import re
import string
import unittest
import zipfile

from ._functools import compose
Expand All @@ -9,9 +13,11 @@


big_o = import_or_skip('big_o')
pytest = import_or_skip('pytest')


class TestComplexity(unittest.TestCase):
@pytest.mark.flaky
def test_implied_dirs_performance(self):
best, others = big_o.big_o(
compose(consume, zipfile.CompleteDirs._implied_dirs),
Expand All @@ -22,3 +28,76 @@ def test_implied_dirs_performance(self):
min_n=1,
)
assert best <= big_o.complexities.Linear

def make_zip_path(self, depth=1, width=1) -> zipfile.Path:
"""
Construct a Path with width files at every level of depth.
"""
zf = zipfile.ZipFile(io.BytesIO(), mode='w')
pairs = itertools.product(self.make_deep_paths(depth), self.make_names(width))
for path, name in pairs:
zf.writestr(f"{path}{name}.txt", b'')
zf.filename = "big un.zip"
return zipfile.Path(zf)

@classmethod
def make_names(cls, width, letters=string.ascii_lowercase):
"""
>>> list(TestComplexity.make_names(2))
['a', 'b']
>>> list(TestComplexity.make_names(30))
['aa', 'ab', ..., 'bd']
"""
# determine how many products are needed to produce width
n_products = math.ceil(math.log(width, len(letters)))
inputs = (letters,) * n_products
combinations = itertools.product(*inputs)
names = map(''.join, combinations)
return itertools.islice(names, width)

@classmethod
def make_deep_paths(cls, depth):
return map(cls.make_deep_path, range(depth))

@classmethod
def make_deep_path(cls, depth):
return ''.join(('d/',) * depth)

def test_baseline_regex_complexity(self):
best, others = big_o.big_o(
lambda path: re.fullmatch(r'[^/]*\\.txt', path),
self.make_deep_path,
max_n=100,
min_n=1,
)
assert best <= big_o.complexities.Constant

@pytest.mark.flaky
def test_glob_depth(self):
best, others = big_o.big_o(
lambda path: consume(path.glob('*.txt')),
self.make_zip_path,
max_n=100,
min_n=1,
)
assert best <= big_o.complexities.Quadratic

@pytest.mark.flaky
def test_glob_width(self):
best, others = big_o.big_o(
lambda path: consume(path.glob('*.txt')),
lambda size: self.make_zip_path(width=size),
max_n=100,
min_n=1,
)
assert best <= big_o.complexities.Linear

@pytest.mark.flaky
def test_glob_width_and_depth(self):
best, others = big_o.big_o(
lambda path: consume(path.glob('*.txt')),
lambda size: self.make_zip_path(depth=size, width=size),
max_n=10,
min_n=1,
)
assert best <= big_o.complexities.Linear
70 changes: 62 additions & 8 deletions Lib/test/test_zipfile/_path/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,21 @@ def build_alpharep_fixture():
│ ├── d
│ │ └── e.txt
│ └── f.txt
└── g
└── h
└── i.txt
├── g
│ └── h
│ └── i.txt
└── j
├── k.bin
├── l.baz
└── m.bar
This fixture has the following key characteristics:
- a file at the root (a)
- a file two levels deep (b/d/e)
- multiple files in a directory (b/c, b/f)
- a directory containing only a directory (g/h)
- a directory with files of different extensions (j/klm)
"alpha" because it uses alphabet
"rep" because it's a representative example
Expand All @@ -62,6 +67,9 @@ def build_alpharep_fixture():
zf.writestr("b/d/e.txt", b"content of e")
zf.writestr("b/f.txt", b"content of f")
zf.writestr("g/h/i.txt", b"content of i")
zf.writestr("j/k.bin", b"content of k")
zf.writestr("j/l.baz", b"content of l")
zf.writestr("j/m.bar", b"content of m")
zf.filename = "alpharep.zip"
return zf

Expand Down Expand Up @@ -92,7 +100,7 @@ def zipfile_ondisk(self, alpharep):
def test_iterdir_and_types(self, alpharep):
root = zipfile.Path(alpharep)
assert root.is_dir()
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
assert a.is_file()
assert b.is_dir()
assert g.is_dir()
Expand All @@ -112,7 +120,7 @@ def test_is_file_missing(self, alpharep):
@pass_alpharep
def test_iterdir_on_file(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
with self.assertRaises(ValueError):
a.iterdir()

Expand All @@ -127,7 +135,7 @@ def test_subdir_is_dir(self, alpharep):
@pass_alpharep
def test_open(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
with a.open(encoding="utf-8") as strm:
data = strm.read()
self.assertEqual(data, "content of a")
Expand Down Expand Up @@ -229,7 +237,7 @@ def test_open_missing_directory(self):
@pass_alpharep
def test_read(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
assert a.read_text(encoding="utf-8") == "content of a"
# Also check positional encoding arg (gh-101144).
assert a.read_text("utf-8") == "content of a"
Expand Down Expand Up @@ -295,7 +303,7 @@ def test_mutability(self, alpharep):
reflect that change.
"""
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
alpharep.writestr('foo.txt', 'foo')
alpharep.writestr('bar/baz.txt', 'baz')
assert any(child.name == 'foo.txt' for child in root.iterdir())
Expand Down Expand Up @@ -394,6 +402,13 @@ def test_suffixes(self, alpharep):
e = root / '.hgrc'
assert e.suffixes == []

@pass_alpharep
def test_suffix_no_filename(self, alpharep):
alpharep.filename = None
root = zipfile.Path(alpharep)
assert root.joinpath('example').suffix == ""
assert root.joinpath('example').suffixes == []

@pass_alpharep
def test_stem(self, alpharep):
"""
Expand All @@ -411,6 +426,8 @@ def test_stem(self, alpharep):
d = root / "d"
assert d.stem == "d"

assert (root / ".gitignore").stem == ".gitignore"

@pass_alpharep
def test_root_parent(self, alpharep):
root = zipfile.Path(alpharep)
Expand Down Expand Up @@ -442,12 +459,49 @@ def test_match_and_glob(self, alpharep):
assert not root.match("*.txt")

assert list(root.glob("b/c.*")) == [zipfile.Path(alpharep, "b/c.txt")]
assert list(root.glob("b/*.txt")) == [
zipfile.Path(alpharep, "b/c.txt"),
zipfile.Path(alpharep, "b/f.txt"),
]

@pass_alpharep
def test_glob_recursive(self, alpharep):
root = zipfile.Path(alpharep)
files = root.glob("**/*.txt")
assert all(each.match("*.txt") for each in files)

assert list(root.glob("**/*.txt")) == list(root.rglob("*.txt"))

@pass_alpharep
def test_glob_subdirs(self, alpharep):
root = zipfile.Path(alpharep)

assert list(root.glob("*/i.txt")) == []
assert list(root.rglob("*/i.txt")) == [zipfile.Path(alpharep, "g/h/i.txt")]

@pass_alpharep
def test_glob_does_not_overmatch_dot(self, alpharep):
root = zipfile.Path(alpharep)

assert list(root.glob("*.xt")) == []

@pass_alpharep
def test_glob_single_char(self, alpharep):
root = zipfile.Path(alpharep)

assert list(root.glob("a?txt")) == [zipfile.Path(alpharep, "a.txt")]
assert list(root.glob("a[.]txt")) == [zipfile.Path(alpharep, "a.txt")]
assert list(root.glob("a[?]txt")) == []

@pass_alpharep
def test_glob_chars(self, alpharep):
root = zipfile.Path(alpharep)

assert list(root.glob("j/?.b[ai][nz]")) == [
zipfile.Path(alpharep, "j/k.bin"),
zipfile.Path(alpharep, "j/l.baz"),
]

def test_glob_empty(self):
root = zipfile.Path(zipfile.ZipFile(io.BytesIO(), 'w'))
with self.assertRaises(ValueError):
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_zipfile/_path/write-alpharep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from . import test_path


__name__ == '__main__' and test_path.build_alpharep_fixture().extractall('alpharep')
31 changes: 13 additions & 18 deletions Lib/zipfile/_path/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import contextlib
import pathlib
import re
import fnmatch

from .glob import translate


__all__ = ['Path']
Expand Down Expand Up @@ -296,21 +297,24 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
return io.TextIOWrapper(stream, encoding, *args, **kwargs)

def _base(self):
return pathlib.PurePosixPath(self.at or self.root.filename)

@property
def name(self):
return pathlib.Path(self.at).name or self.filename.name
return self._base().name

@property
def suffix(self):
return pathlib.Path(self.at).suffix or self.filename.suffix
return self._base().suffix

@property
def suffixes(self):
return pathlib.Path(self.at).suffixes or self.filename.suffixes
return self._base().suffixes

@property
def stem(self):
return pathlib.Path(self.at).stem or self.filename.stem
return self._base().stem

@property
def filename(self):
Expand Down Expand Up @@ -347,30 +351,21 @@ def iterdir(self):
return filter(self._is_child, subs)

def match(self, path_pattern):
return pathlib.Path(self.at).match(path_pattern)
return pathlib.PurePosixPath(self.at).match(path_pattern)

def is_symlink(self):
"""
Return whether this path is a symlink. Always false (python/cpython#82102).
"""
return False

def _descendants(self):
for child in self.iterdir():
yield child
if child.is_dir():
yield from child._descendants()

def glob(self, pattern):
if not pattern:
raise ValueError(f"Unacceptable pattern: {pattern!r}")

matches = re.compile(fnmatch.translate(pattern)).fullmatch
return (
child
for child in self._descendants()
if matches(str(child.relative_to(self)))
)
prefix = re.escape(self.at)
matches = re.compile(prefix + translate(pattern)).fullmatch
return map(self._next, filter(matches, self.root.namelist()))

def rglob(self, pattern):
return self.glob(f'**/{pattern}')
Expand Down
40 changes: 40 additions & 0 deletions Lib/zipfile/_path/glob.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import re


def translate(pattern):
r"""
Given a glob pattern, produce a regex that matches it.
>>> translate('*.txt')
'[^/]*\\.txt'
>>> translate('a?txt')
'a.txt'
>>> translate('**/*')
'.*/[^/]*'
"""
return ''.join(map(replace, separate(pattern)))


def separate(pattern):
"""
Separate out character sets to avoid translating their contents.
>>> [m.group(0) for m in separate('*.txt')]
['*.txt']
>>> [m.group(0) for m in separate('a[?]txt')]
['a', '[?]', 'txt']
"""
return re.finditer(r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)', pattern)


def replace(match):
"""
Perform the replacements for a match from :func:`separate`.
"""

return match.group('set') or (
re.escape(match.group(0))
.replace('\\*\\*', r'.*')
.replace('\\*', r'[^/]*')
.replace('\\?', r'.')
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Fixed several bugs in zipfile.Path, including: in ``Path.match`, Windows
separators are no longer honored (and never were meant to be); Fixed
``name``/``suffix``/``suffixes``/``stem`` operations when no filename is
present and the Path is not at the root of the zipfile; Reworked glob for
performance and more correct matching behavior.

0 comments on commit 22980dc

Please sign in to comment.