From d77765a23e10adf6f8d89988cdd63fbed491b683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Nu=C3=9Fm=C3=BCller?= Date: Fri, 25 Jul 2025 12:28:23 +0200 Subject: [PATCH] Fix UnboundLocalError with plugins and largefiles When Plugins are used in a repository that contains largefiles, the following exception is thrown as soon as the first largefile is converted: ``` Traceback (most recent call last): File "fast-export/hg-fast-export.py", line 728, in sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile, File "fast-export/hg-fast-export.py", line 581, in hg2git c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap, File "fast-export/hg-fast-export.py", line 366, in export_commit export_file_contents(ctx,man,modified,hgtags,fn_encoding,plugins) File "fast-export/hg-fast-export.py", line 222, in export_file_contents file_data = {'filename':filename,'file_ctx':file_ctx,'data':d} UnboundLocalError: local variable 'file_ctx' referenced before assignment ``` This commit fixes the error by: * initializing the file_ctx before the largefile handling takes place * Providing a new `is_largefile` value for plugins so they can detect if largefile handling was applied (and therefore the file_ctx object may no longer be in sync with the git version of the file) --- README.md | 8 ++- hg-fast-export.py | 7 +- t/largefile_plugin.expected | 20 ++++++ t/largefile_plugin.t | 69 +++++++++++++++++++ t/largefile_plugin_file_info.expected | 12 ++++ .../echo_file_data_test_plugin/__init__.py | 18 +++++ 6 files changed, 131 insertions(+), 3 deletions(-) create mode 100644 t/largefile_plugin.expected create mode 100755 t/largefile_plugin.t create mode 100644 t/largefile_plugin_file_info.expected create mode 100644 t/plugins/echo_file_data_test_plugin/__init__.py diff --git a/README.md b/README.md index 77f0231..9bc113e 100644 --- a/README.md +++ b/README.md @@ -188,7 +188,7 @@ values in the dictionary after filters have been run are used to create the git commit. ``` -file_data = {'filename':filename,'file_ctx':file_ctx,'data':file_contents} +file_data = {'filename':filename,'file_ctx':file_ctx,'data':file_contents, 'is_largefile':largefile_status} def file_data_filter(self,file_data): ``` @@ -203,6 +203,12 @@ but in this case the `data` and `file_ctx` keys map to None. This is so that a filter which modifies file names can apply the same name transformations when files are deleted. +The `is_largefile` entry within the `file_data` dictionary will contain +`True` if the original file was a largefile and has been converted +to a normal file before the plugins were invoked. In this case, the `file_ctx` +will still point to the filecontext for the original, unconverted file, while +`filename` and `data` will contain the already converted information. + Submodules ---------- See README-SUBMODULES.md for how to convert subrepositories into git diff --git a/hg-fast-export.py b/hg-fast-export.py index f00943c..9405468 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -211,15 +211,18 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): b'Ignoring file %s which cannot be tracked by git\n' % filename ) continue + + largefile = False + file_ctx=ctx.filectx(file) if is_largefile(filename): + largefile = True filename = largefile_orig_name(filename) d = largefile_data(ctx, file, filename) else: - file_ctx=ctx.filectx(file) d=file_ctx.data() if plugins and plugins['file_data_filters']: - file_data = {'filename':filename,'file_ctx':file_ctx,'data':d} + file_data = {'filename':filename,'file_ctx':file_ctx,'data':d, 'is_largefile':largefile} for filter in plugins['file_data_filters']: filter(file_data) d=file_data['data'] diff --git a/t/largefile_plugin.expected b/t/largefile_plugin.expected new file mode 100644 index 0000000..5d2b193 --- /dev/null +++ b/t/largefile_plugin.expected @@ -0,0 +1,20 @@ +blob +mark :1 +data 7 +a_file + +blob +mark :2 +data 6 +large + +reset refs/heads/master +commit refs/heads/master +mark :3 +author Grevious Bodily Harmsworth 1679014800 +0000 +committer Grevious Bodily Harmsworth 1679014800 +0000 +data 3 +r0 +M 100644 :1 a.txt +M 100644 :2 b.txt + diff --git a/t/largefile_plugin.t b/t/largefile_plugin.t new file mode 100755 index 0000000..9bbe29e --- /dev/null +++ b/t/largefile_plugin.t @@ -0,0 +1,69 @@ +#!/bin/bash +# +# Copyright (c) 2023 Felipe Contreras +# Copyright (c) 2023 Frej Drejhammar +# Copyright (c) 2025 Günther Nußmüller +# +# Check that plugin invocation works with largefiles. +# This test uses the echo_file_data_test_plugin to verify that the +# file data is passed correctly, including the largefile status. +# + +test_description='Largefiles and plugin test' + +. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1 + + +git_create() { + git init -q "$1" && + git -C "$1" config core.ignoreCase false +} + +git_convert() { + ( + cd "$2" && + hg-fast-export.sh --repo "../$1" \ + -s --hgtags -n \ + --plugin ../../plugins/echo_file_data_test_plugin + ) +} + +setup() { + cat > "$HOME"/.hgrc <<-EOF + [ui] + username = Grevious Bodily Harmsworth + [extensions] + largefiles = + EOF +} + +commit0() { + ( + cd hgrepo && + echo "a_file" > a.txt && + echo "large" > b.txt + hg add a.txt && + hg add --large b.txt && + hg commit -d "2023-03-17 01:00Z" -m "r0" + ) +} + +setup + +test_expect_success 'largefile and plugin' ' + test_when_finished "rm -rf hgrepo gitrepo" && + + ( + hg init hgrepo && + commit0 + ) && + git_create gitrepo && + git_convert hgrepo gitrepo && + + git -C gitrepo fast-export --all > actual && + + test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin.expected actual && + test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin_file_info.expected gitrepo/largefile_info.txt +' + +test_done diff --git a/t/largefile_plugin_file_info.expected b/t/largefile_plugin_file_info.expected new file mode 100644 index 0000000..54df99e --- /dev/null +++ b/t/largefile_plugin_file_info.expected @@ -0,0 +1,12 @@ +filename: b'b.txt' +data size: 6 bytes +ctx rev: 0 +ctx binary: False +is largefile: True + +filename: b'a.txt' +data size: 7 bytes +ctx rev: 0 +ctx binary: False +is largefile: False + diff --git a/t/plugins/echo_file_data_test_plugin/__init__.py b/t/plugins/echo_file_data_test_plugin/__init__.py new file mode 100644 index 0000000..759da70 --- /dev/null +++ b/t/plugins/echo_file_data_test_plugin/__init__.py @@ -0,0 +1,18 @@ +import sys +from mercurial import node + +def build_filter(args): + return Filter(args) + +class Filter: + def __init__(self, _): + pass + + def file_data_filter(self,file_data): + with open('largefile_info.txt', 'a') as f: + f.write(f"filename: {file_data['filename']}\n") + f.write(f"data size: {len(file_data['data'])} bytes\n") + f.write(f"ctx rev: {file_data['file_ctx'].rev()}\n") + f.write(f"ctx binary: {file_data['file_ctx'].isbinary()}\n") + f.write(f"is largefile: {file_data.get('is_largefile', False)}\n") + f.write("\n") \ No newline at end of file