Allow specifying a repository root commit for conversion

The current conversion process mandates an empty repository for a clean start.
This presents a barrier to performance optimization strategies.

This change introduces the ability to pass a repository root commit hash.

This is necessary to support the immediate next commit (Incremental LFS conversion),
which uses a `.gitattributes` file and LFS pointers to bypass the slow, full-history
rewriting often required on large non-empty monorepos (100GiB+, 1M+ files).

The immediate benefit is allowing conversion to start when a non-empty repo
already contains an orphan commit, laying the groundwork for the optimized LFS
conversion feature.
This commit is contained in:
Kévin Lévesque
2025-12-10 10:31:59 -05:00
parent 8e1ba281d4
commit f6b72d248f
3 changed files with 129 additions and 3 deletions

View File

@@ -284,7 +284,7 @@ def strip_leading_slash(filename):
def export_commit(ui,repo,revision,old_marks,max,count,authors,
branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',
plugins={}):
first_commit_hash="",plugins={}):
def get_branchname(name):
if name in brmap:
return brmap[name]
@@ -332,6 +332,9 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors,
if not parents:
type='full'
if revision == 0 and first_commit_hash:
wr(b'from %s' % first_commit_hash.encode())
type='simple delta'
else:
wr(b'from %s' % revnum_to_revref(parents[0], old_marks))
if len(parents) == 1:
@@ -526,7 +529,8 @@ def verify_heads(ui,repo,cache,force,ignore_unnamed_heads,branchesmap):
def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
authors={},branchesmap={},tagsmap={},
sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,notes=False,encoding='',fn_encoding='',
sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,
notes=False,encoding='',fn_encoding='',first_commit_hash='',
plugins={}):
def check_cache(filename, contents):
if len(contents) == 0:
@@ -582,7 +586,7 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
brmap={}
for rev in range(min,max):
c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
sob,brmap,hgtags,encoding,fn_encoding,
sob,brmap,hgtags,encoding,fn_encoding,first_commit_hash,
plugins)
if notes:
for rev in range(min,max):
@@ -656,6 +660,8 @@ if __name__=='__main__':
help="Add a plugin with the given init string <name=init>")
parser.add_option("--subrepo-map", type="string", dest="subrepo_map",
help="Provide a mapping file between the subrepository name and the submodule name")
parser.add_option("--first-commit-hash", type="string", dest="first_commit_hash",
help="Allow importing into an existing git repository by specifying the hash of the first commit")
(options,args)=parser.parse_args()
@@ -735,4 +741,5 @@ if __name__=='__main__':
ignore_unnamed_heads=options.ignore_unnamed_heads,
hgtags=options.hgtags,
notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,
first_commit_hash=options.first_commit_hash,
plugins=plugins_dict))

View File

@@ -87,6 +87,8 @@ Options:
with <file-path> <hg-hash> <is-binary> as arguments
--plugin <plugin=init> Add a plugin with the given init string (repeatable)
--plugin-path <plugin-path> Add an additional plugin lookup path
--first-commit-hash <git-commit-hash> Use the given git commit hash as the
first commit's parent (for grafting)
"
case "$1" in
-h|--help)

117
t/first_commit_hash_option.t Executable file
View File

@@ -0,0 +1,117 @@
#!/bin/bash
#
# Copyright (c) 2025
#
test_description='git_lfs_importer plugin integration tests'
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
setup() {
cat > "$HOME"/.hgrc <<-EOF
[ui]
username = Test User <test@example.com>
EOF
# Git config for the destination repo commits
git config --global user.email "test@example.com"
git config --global user.name "Test User"
}
setup
test_expect_success 'Mercurial history is imported over the provided commit' '
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
# 1. Create source Mercurial repository with binary files
(
hg init hgrepo &&
cd hgrepo &&
echo "regular text file" > readme.txt &&
hg add readme.txt &&
hg commit -m "initial commit"
) &&
# 2. Prepare destination git repo with LFS setup
mkdir gitrepo &&
(
cd gitrepo &&
git init -q &&
git config core.ignoreCase false &&
git lfs install --local &&
git switch --create master &&
cat > .gitattributes <<-EOF &&
* -text
EOF
git add .gitattributes &&
git commit -q -m "Initialize Git configuration"
) &&
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
# 3. Run hg-fast-export
(
cd gitrepo &&
hg-fast-export.sh \
-r "../hgrepo" \
--first-commit-hash "$FIRST_HASH" --force \
-M master
) &&
# 4. Verify git file is still present
git -C gitrepo show HEAD:.gitattributes > gitattributes_check.txt &&
test "$(cat gitattributes_check.txt)" = "* -text" &&
# 5. Verify hg file is imported
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
test "$(cat readme_check.txt)" = "regular text file"
'
test_expect_success 'Mercurial history has priority over git' '
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
# 1. Create source Mercurial repository with binary files
(
hg init hgrepo &&
cd hgrepo &&
echo "hg readme file" > readme.txt &&
hg add readme.txt &&
hg commit -m "initial commit"
) &&
# 2. Prepare destination git repo with LFS setup
mkdir gitrepo &&
(
cd gitrepo &&
git init -q &&
git config core.ignoreCase false &&
git lfs install --local &&
git switch --create master &&
cat > readme.txt <<-EOF &&
git readme file
EOF
git add readme.txt &&
git commit -q -m "Initialize Git readme file"
) &&
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
# 3. Run hg-fast-export
(
cd gitrepo &&
hg-fast-export.sh \
-r "../hgrepo" \
--first-commit-hash "$FIRST_HASH" --force \
-M master
) &&
# 5. Verify hg file is imported
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
test "$(cat readme_check.txt)" = "hg readme file"
'
test_done