From bd707b5d6ecfc3d6cf6b0f35a182043caa594e5b Mon Sep 17 00:00:00 2001 From: Frank Zingsheim Date: Wed, 11 Sep 2024 15:48:26 +0200 Subject: [PATCH] Fix: Largefiles ignored #141 Import mercurial large files as ordinary files into git The basic idea to this fix is based on https://github.com/planestraveler/fast-export/tree/add-lfs-support-v2 from PR #65 Closes #141 --- README.md | 9 +++++++++ hg-fast-export.py | 38 ++++++++++++++++++++++++++++++++++-- t/main.t | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 87e3095..77f0231 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,15 @@ if [ "$3" == "1" ]; then cat; else dos2unix -q; fi -- End of crlf-filter.sh -- ``` +Mercurial Largefiles Extension +------------------------------ + +Mercurial largefiles are exported as ordinary files into git, i.e. not +as git lfs files. In order to make the export work, make sure that +you have all largefiles of all mercurial commits available locally. +This can be ensured by either cloning the mercurial repository with +the option --all-largefiles or by executing the command +'hg lfpull --rev "all()"' inside the mercurial repository. Plugins ----------------- diff --git a/hg-fast-export.py b/hg-fast-export.py index 46ccea1..b67110b 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # Copyright (c) 2007, 2008 Rocco Rutte and others. +# Copyright (c) 2025 Siemens # License: MIT from hg2git import setup_repo,fixup_user,get_branch,get_changeset @@ -11,6 +12,7 @@ import sys import os from binascii import hexlify import pluginloader +from hgext.largefiles import lfutil # silly regex to catch Signed-off-by lines in log message sob_re=re.compile(b'^Signed-[Oo]ff-[Bb]y: (.+)$') @@ -162,6 +164,32 @@ def refresh_gitmodules(ctx): wr(b'M 100644 inline .gitmodules') wr_data(gitmodules) +def is_largefile(filename): + return filename[:6] == b'.hglf/' + +def largefile_orig_name(filename): + return filename[6:] + +def largefile_data(ctx, file, filename): + lf_file_ctx=ctx.filectx(file) + lf_hash=lf_file_ctx.data().strip(b'\n') + sys.stderr.write("Detected large file hash %s\n" % lf_hash.decode()) + #should detect where the large files are located + file_with_data = lfutil.findfile(ctx.repo(), lf_hash) + if file_with_data is None: + # Autodownloading from the mercurial repository would be an issue as there + # is a good chance that we may need to input some username and password. + # This will surely break fast-export as there will be some unexpected + # output. + sys.stderr.write("Large file wasn't found in local cache.\n") + sys.stderr.write("Please clone with --all-largefiles\n") + sys.stderr.write("or pull all large files with 'hg lfpull --rev " + "\"all()\"'\n") + # closing in the middle of import will revert everything to the last checkpoint + sys.exit(3) + with open(os.path.normpath(file_with_data), 'rb') as file_with_data_handle: + return file_with_data_handle.read() + def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): count=0 max=len(files) @@ -183,8 +211,12 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): b'Ignoring file %s which cannot be tracked by git\n' % filename ) continue - file_ctx=ctx.filectx(file) - d=file_ctx.data() + if is_largefile(filename): + filename = largefile_orig_name(filename) + d = largefile_data(ctx, file, filename) + else: + file_ctx=ctx.filectx(file) + d=file_ctx.data() if plugins and plugins['file_data_filters']: file_data = {'filename':filename,'file_ctx':file_ctx,'data':d} @@ -327,6 +359,8 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, filename=strip_leading_slash(filename) if filename==b'.hgsub': remove_gitmodules(ctx) + if is_largefile(filename): + filename=largefile_orig_name(filename) wr(b'D %s' % filename) export_file_contents(ctx,man,modified,hgtags,fn_encoding,plugins) diff --git a/t/main.t b/t/main.t index 659cabe..a2e0d3a 100755 --- a/t/main.t +++ b/t/main.t @@ -92,4 +92,53 @@ test_expect_success 'merge' ' test_cmp expected actual ' +test_expect_success 'hg large file' ' + test_when_finished "rm -rf hgrepo gitrepo" && + + ( + hg init hgrepo && + cd hgrepo && + echo "[extensions]" >> .hg/hgrc + echo "largefiles =" >> .hg/hgrc + echo a > content && + echo a > file1 && + hg add content && + hg add --large file1 && + hg commit -m "origin" && + + echo b > content && + echo b > file2 && + hg add --large file2 && + hg rm file1 && + hg commit -m "right" && + + hg update -r0 && + echo c > content && + hg commit -m "left" && + + HGMERGE=true hg merge -r1 && + hg commit -m "merge" + ) && + + git_clone hgrepo gitrepo && + + cat > expected <<-EOF && + left + c + tree @: + + content + file2 + EOF + + ( + cd gitrepo + git show -q --format='%s' @^ && + git show @:content && + git show @: + ) > actual && + + test_cmp expected actual +' + test_done