Skip to content

Commit

Permalink
pythongh-57141: Add dircmp shallow option.
Browse files Browse the repository at this point in the history
Co-authored-by: Steve Ward <[email protected]>
Co-authored-by: Sanyam Khurana <[email protected]>
  • Loading branch information
3 people committed Sep 27, 2023
1 parent 0e28d0f commit 8671e21
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 31 deletions.
5 changes: 3 additions & 2 deletions Doc/library/filecmp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,16 @@ The :mod:`filecmp` module defines the following functions:
The :class:`dircmp` class
-------------------------

.. class:: dircmp(a, b, ignore=None, hide=None)
.. class:: dircmp(a, b, ignore=None, hide=None, shallow=True)

Construct a new directory comparison object, to compare the directories *a*
and *b*. *ignore* is a list of names to ignore, and defaults to
:const:`filecmp.DEFAULT_IGNORES`. *hide* is a list of names to hide, and
defaults to ``[os.curdir, os.pardir]``.

The :class:`dircmp` class compares files by doing *shallow* comparisons
as described for :func:`filecmp.cmp`.
as described for :func:`filecmp.cmp` by default using the *shallow*
parameter.

The :class:`dircmp` class provides the following methods:

Expand Down
13 changes: 9 additions & 4 deletions Lib/filecmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,15 @@ def _do_cmp(f1, f2):
class dircmp:
"""A class that manages the comparison of 2 directories.
dircmp(a, b, ignore=None, hide=None)
dircmp(a, b, ignore=None, hide=None, shallow=True)
A and B are directories.
IGNORE is a list of names to ignore,
defaults to DEFAULT_IGNORES.
HIDE is a list of names to hide,
defaults to [os.curdir, os.pardir].
SHALLOW specifies whether to just check the stat signature (do not read
the files).
defaults to True.
High level usage:
x = dircmp(dir1, dir2)
Expand Down Expand Up @@ -121,7 +124,7 @@ class dircmp:
in common_dirs.
"""

def __init__(self, a, b, ignore=None, hide=None): # Initialize
def __init__(self, a, b, ignore=None, hide=None, shallow=True): # Initialize
self.left = a
self.right = b
if hide is None:
Expand All @@ -132,6 +135,7 @@ def __init__(self, a, b, ignore=None, hide=None): # Initialize
self.ignore = DEFAULT_IGNORES
else:
self.ignore = ignore
self.shallow = shallow

def phase0(self): # Compare everything except common subdirectories
self.left_list = _filter(os.listdir(self.left),
Expand Down Expand Up @@ -184,7 +188,7 @@ def phase2(self): # Distinguish files, directories, funnies
self.common_funny.append(x)

def phase3(self): # Find out differences between common files
xx = cmpfiles(self.left, self.right, self.common_files)
xx = cmpfiles(self.left, self.right, self.common_files, self.shallow)
self.same_files, self.diff_files, self.funny_files = xx

def phase4(self): # Find out differences between common subdirectories
Expand All @@ -196,7 +200,8 @@ def phase4(self): # Find out differences between common subdirectories
for x in self.common_dirs:
a_x = os.path.join(self.left, x)
b_x = os.path.join(self.right, x)
self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide)
self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide,
self.shallow)

def phase4_closure(self): # Recursively call phase4() on subdirectories
self.phase4()
Expand Down
126 changes: 101 additions & 25 deletions Lib/test/test_filecmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,44 @@
from test.support import os_helper


def _create_file_shallow_equal(template_path, new_path):
"""create a file with the same size and mtime but different content."""
shutil.copy2(template_path, new_path)
with open(new_path, 'r+b') as f:
next_char = bytearray(f.read(1))
next_char[0] = (next_char[0] + 1) % 256
f.seek(0)
f.write(next_char)
shutil.copystat(template_path, new_path)
assert os.stat(new_path).st_size == os.stat(template_path).st_size
assert os.stat(new_path).st_mtime == os.stat(template_path).st_mtime

class FileCompareTestCase(unittest.TestCase):
def setUp(self):
self.name = os_helper.TESTFN
self.name_same = os_helper.TESTFN + '-same'
self.name_diff = os_helper.TESTFN + '-diff'
self.name_same_shallow = os_helper.TESTFN + '-same-shallow'
data = 'Contents of file go here.\n'
for name in [self.name, self.name_same, self.name_diff]:
with open(name, 'w', encoding="utf-8") as output:
output.write(data)

with open(self.name_diff, 'a+', encoding="utf-8") as output:
output.write('An extra line.\n')

for name in [self.name_same, self.name_diff]:
shutil.copystat(self.name, name)

_create_file_shallow_equal(self.name, self.name_same_shallow)

self.dir = tempfile.gettempdir()

def tearDown(self):
os.unlink(self.name)
os.unlink(self.name_same)
os.unlink(self.name_diff)
os.unlink(self.name_same_shallow)

def test_matching(self):
self.assertTrue(filecmp.cmp(self.name, self.name),
Expand All @@ -36,12 +56,17 @@ def test_matching(self):
"Comparing file to identical file fails")
self.assertTrue(filecmp.cmp(self.name, self.name_same, shallow=False),
"Comparing file to identical file fails")
self.assertTrue(filecmp.cmp(self.name, self.name_same_shallow),
"Shallow identical files should be considered equal")

def test_different(self):
self.assertFalse(filecmp.cmp(self.name, self.name_diff),
"Mismatched files compare as equal")
self.assertFalse(filecmp.cmp(self.name, self.dir),
"File and directory compare as equal")
self.assertFalse(filecmp.cmp(self.name, self.name_same_shallow,
shallow=False),
"Mismatched file to shallow identical file compares as equal")

def test_cache_clear(self):
first_compare = filecmp.cmp(self.name, self.name_same, shallow=False)
Expand All @@ -56,14 +81,26 @@ def setUp(self):
self.dir = os.path.join(tmpdir, 'dir')
self.dir_same = os.path.join(tmpdir, 'dir-same')
self.dir_diff = os.path.join(tmpdir, 'dir-diff')
self.dir_diff_file = os.path.join(tmpdir, 'dir-diff-file')
self.dir_same_shallow = os.path.join(tmpdir, 'dir-same-shallow')

# Another dir is created under dir_same, but it has a name from the
# ignored list so it should not affect testing results.
self.dir_ignored = os.path.join(self.dir_same, '.hg')

self.caseinsensitive = os.path.normcase('A') == os.path.normcase('a')
data = 'Contents of file go here.\n'
for dir in (self.dir, self.dir_same, self.dir_diff, self.dir_ignored):

shutil.rmtree(self.dir, True)
os.mkdir(self.dir)
subdir_path = os.path.join(self.dir, 'subdir')
os.mkdir(subdir_path)
dir_file_path = os.path.join(self.dir, "file")
with open(dir_file_path, 'w', encoding="utf-8") as output:
output.write(data)

for dir in (self.dir_same, self.dir_same_shallow,
self.dir_diff, self.dir_diff_file):
shutil.rmtree(dir, True)
os.mkdir(dir)
subdir_path = os.path.join(dir, 'subdir')
Expand All @@ -72,14 +109,25 @@ def setUp(self):
fn = 'FiLe' # Verify case-insensitive comparison
else:
fn = 'file'
with open(os.path.join(dir, fn), 'w', encoding="utf-8") as output:
output.write(data)

file_path = os.path.join(dir, fn)

if dir is self.dir_same_shallow:
_create_file_shallow_equal(dir_file_path, file_path)
else:
shutil.copy2(dir_file_path, file_path)

with open(os.path.join(self.dir_diff, 'file2'), 'w', encoding="utf-8") as output:
output.write('An extra file.\n')

# Add different file2 with respect to dir_diff
with open(os.path.join(self.dir_diff_file, 'file2'), 'w', encoding="utf-8") as output:
output.write('Different contents.\n')


def tearDown(self):
for dir in (self.dir, self.dir_same, self.dir_diff):
for dir in (self.dir, self.dir_same, self.dir_diff,
self.dir_same_shallow, self.dir_diff_file):
shutil.rmtree(dir)

def test_default_ignores(self):
Expand All @@ -102,11 +150,7 @@ def test_cmpfiles(self):
shallow=False),
"Comparing directory to same fails")

# Add different file2
with open(os.path.join(self.dir, 'file2'), 'w', encoding="utf-8") as output:
output.write('Different contents.\n')

self.assertFalse(filecmp.cmpfiles(self.dir, self.dir_same,
self.assertFalse(filecmp.cmpfiles(self.dir, self.dir_diff_file,
['file', 'file2']) ==
(['file'], ['file2'], []),
"Comparing mismatched directories fails")
Expand All @@ -116,11 +160,22 @@ def _assert_lists(self, actual, expected):
"""Assert that two lists are equal, up to ordering."""
self.assertEqual(sorted(actual), sorted(expected))

def test_dircmp_identical_directories(self):
self._assert_dircmp_identical_directories()
self._assert_dircmp_identical_directories(shallow=False)

def test_dircmp(self):
def test_dircmp_different_file(self):
self._assert_dircmp_different_file()
self._assert_dircmp_different_file(shallow=False)

def test_dircmp_different_directories(self):
self._assert_dircmp_different_directories()
self._assert_dircmp_different_directories(shallow=False)

def _assert_dircmp_identical_directories(self, **options):
# Check attributes for comparison of two identical directories
left_dir, right_dir = self.dir, self.dir_same
d = filecmp.dircmp(left_dir, right_dir)
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
if self.caseinsensitive:
Expand All @@ -142,9 +197,10 @@ def test_dircmp(self):
]
self._assert_report(d.report, expected_report)

def _assert_dircmp_different_directories(self, **options):
# Check attributes for comparison of two different directories (right)
left_dir, right_dir = self.dir, self.dir_diff
d = filecmp.dircmp(left_dir, right_dir)
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
self._assert_lists(d.left_list, ['file', 'subdir'])
Expand All @@ -164,12 +220,8 @@ def test_dircmp(self):
self._assert_report(d.report, expected_report)

# Check attributes for comparison of two different directories (left)
left_dir, right_dir = self.dir, self.dir_diff
shutil.move(
os.path.join(self.dir_diff, 'file2'),
os.path.join(self.dir, 'file2')
)
d = filecmp.dircmp(left_dir, right_dir)
left_dir, right_dir = self.dir_diff, self.dir
d = filecmp.dircmp(left_dir, right_dir, **options)
self.assertEqual(d.left, left_dir)
self.assertEqual(d.right, right_dir)
self._assert_lists(d.left_list, ['file', 'file2', 'subdir'])
Expand All @@ -180,27 +232,51 @@ def test_dircmp(self):
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, [])
expected_report = [
"diff {} {}".format(self.dir, self.dir_diff),
"Only in {} : ['file2']".format(self.dir),
"diff {} {}".format(self.dir_diff, self.dir),
"Only in {} : ['file2']".format(self.dir_diff),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)

# Add different file2
with open(os.path.join(self.dir_diff, 'file2'), 'w', encoding="utf-8") as output:
output.write('Different contents.\n')
d = filecmp.dircmp(self.dir, self.dir_diff)

def _assert_dircmp_different_file(self, **options):
# A different file2
d = filecmp.dircmp(self.dir_diff, self.dir_diff_file, **options)
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, ['file2'])
expected_report = [
"diff {} {}".format(self.dir, self.dir_diff),
"diff {} {}".format(self.dir_diff, self.dir_diff_file),
"Identical files : ['file']",
"Differing files : ['file2']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)

def test_dircmp_no_shallow_different_file(self):
# A non shallow different file2
d = filecmp.dircmp(self.dir, self.dir_same_shallow, shallow=False)
self.assertEqual(d.same_files, [])
self.assertEqual(d.diff_files, ['file'])
expected_report = [
"diff {} {}".format(self.dir, self.dir_same_shallow),
"Differing files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)

def test_dircmp_shallow_same_file(self):
# A non shallow different file2
d = filecmp.dircmp(self.dir, self.dir_same_shallow)
self.assertEqual(d.same_files, ['file'])
self.assertEqual(d.diff_files, [])
expected_report = [
"diff {} {}".format(self.dir, self.dir_same_shallow),
"Identical files : ['file']",
"Common subdirectories : ['subdir']",
]
self._assert_report(d.report, expected_report)

def test_dircmp_subdirs_type(self):
"""Check that dircmp.subdirs respects subclassing."""
class MyDirCmp(filecmp.dircmp):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add option for *non-shallow* comparisons to :class:`filecmp.dircmp` like
:func:`filecmp.cmp`. Original patch by Steven Ward. Enhanced by
Tobias Rautenkranz

0 comments on commit 8671e21

Please sign in to comment.