mirror of
https://github.com/frej/fast-export.git
synced 2026-02-24 21:40:43 +01:00
Merge branch 'frej/gh347'
This commit is contained in:
3
.github/requirements-earliest.txt
vendored
3
.github/requirements-earliest.txt
vendored
@@ -1 +1,4 @@
|
||||
mercurial==5.2
|
||||
|
||||
# Required for git_lfs_importer plugin
|
||||
pathspec==0.11.2
|
||||
2
.github/requirements-latest.txt
vendored
2
.github/requirements-latest.txt
vendored
@@ -1,2 +1,4 @@
|
||||
mercurial
|
||||
|
||||
# Required for git_lfs_importer plugin
|
||||
pathspec==0.12.1
|
||||
70
README.md
70
README.md
@@ -141,12 +141,48 @@ if [ "$3" == "1" ]; then cat; else dos2unix -q; fi
|
||||
Mercurial Largefiles Extension
|
||||
------------------------------
|
||||
|
||||
Mercurial largefiles are exported as ordinary files into git, i.e. not
|
||||
as git lfs files. In order to make the export work, make sure that
|
||||
you have all largefiles of all mercurial commits available locally.
|
||||
This can be ensured by either cloning the mercurial repository with
|
||||
the option --all-largefiles or by executing the command
|
||||
'hg lfpull --rev "all()"' inside the mercurial repository.
|
||||
### Handling Mercurial Largefiles during Migration
|
||||
|
||||
When migrating from Mercurial to Git, largefiles are exported as ordinary
|
||||
files by default. To ensure a successful migration and manage repository
|
||||
size, follow the requirements below.
|
||||
|
||||
#### 1. Pre-Export: Ensure File Availability
|
||||
|
||||
Before starting the export, you must have all largefiles from all
|
||||
Mercurial commits available locally. Use one of these methods:
|
||||
|
||||
* **For a new clone:** `hg clone --all-largefiles <repo-url>`
|
||||
* **For an existing repo:** `hg lfpull --rev "all()"`
|
||||
|
||||
#### 2. Choosing Your LFS Strategy
|
||||
|
||||
If you want your files to be versioned in Git LFS rather than as standard
|
||||
Git blobs, you have two primary paths:
|
||||
|
||||
* **[git_lfs_importer plugin](./plugins/git_lfs_importer/README.md)
|
||||
(During Conversion)**
|
||||
Recommended for large repos. This performs Just-In-Time (JIT) conversion
|
||||
by identifying large files during the export and writing LFS pointers
|
||||
immediately, skipping the need for a second pass. This also supports
|
||||
**incremental conversion**, making it much more efficient for ongoing
|
||||
migrations.
|
||||
* **[git lfs migrate import](https://github.com/git-lfs/git-lfs/blob/main/docs/man/git-lfs-migrate.adoc)
|
||||
(After Conversion)**
|
||||
A standard two-step process: first, export the full history from Mercurial
|
||||
to Git, then run a separate full history rewrite to move files into LFS.
|
||||
|
||||
### Why use the git_lfs_importer plugin?
|
||||
|
||||
For "monorepos" or very large repositories (100GiB+), the traditional
|
||||
two-step process can take days. By integrating the LFS conversion
|
||||
directly into the history export, the plugin eliminates the massive
|
||||
time overhead of a secondary history rewrite and allows for incremental
|
||||
progress.
|
||||
|
||||
For detailed setup, see the
|
||||
[git_lfs_importer](./plugins/git_lfs_importer/README.md)
|
||||
plugin documentation.
|
||||
|
||||
Plugins
|
||||
-----------------
|
||||
@@ -177,9 +213,18 @@ defined filter methods in the [dos2unix](./plugins/dos2unix) and
|
||||
[branch_name_in_commit](./plugins/branch_name_in_commit) plugins.
|
||||
|
||||
```
|
||||
commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc, 'revision': revision, 'hg_hash': hg_hash, 'committer': 'committer', 'extra': extra}
|
||||
commit_data = {
|
||||
'author': author,
|
||||
'branch': branch,
|
||||
'committer': 'committer',
|
||||
'desc': desc,
|
||||
'extra': extra,
|
||||
'hg_hash': hg_hash,
|
||||
'parents': parents,
|
||||
'revision': revision,
|
||||
}
|
||||
|
||||
def commit_message_filter(self,commit_data):
|
||||
def commit_message_filter(self, commit_data):
|
||||
```
|
||||
The `commit_message_filter` method is called for each commit, after parsing
|
||||
from hg, but before outputting to git. The dictionary `commit_data` contains the
|
||||
@@ -188,9 +233,14 @@ values in the dictionary after filters have been run are used to create the git
|
||||
commit.
|
||||
|
||||
```
|
||||
file_data = {'filename':filename,'file_ctx':file_ctx,'data':file_contents, 'is_largefile':largefile_status}
|
||||
file_data = {
|
||||
'data': file_contents,
|
||||
'file_ctx': file_ctx,
|
||||
'filename': filename,
|
||||
'is_largefile': largefile_status,
|
||||
}
|
||||
|
||||
def file_data_filter(self,file_data):
|
||||
def file_data_filter(self, file_data):
|
||||
```
|
||||
The `file_data_filter` method is called for each file within each commit.
|
||||
The dictionary `file_data` contains the above attributes about the file, and
|
||||
|
||||
@@ -284,7 +284,7 @@ def strip_leading_slash(filename):
|
||||
|
||||
def export_commit(ui,repo,revision,old_marks,max,count,authors,
|
||||
branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',
|
||||
plugins={}):
|
||||
first_commit_hash="",plugins={}):
|
||||
def get_branchname(name):
|
||||
if name in brmap:
|
||||
return brmap[name]
|
||||
@@ -332,6 +332,9 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors,
|
||||
|
||||
if not parents:
|
||||
type='full'
|
||||
if revision == 0 and first_commit_hash:
|
||||
wr(b'from %s' % first_commit_hash.encode())
|
||||
type='simple delta'
|
||||
else:
|
||||
wr(b'from %s' % revnum_to_revref(parents[0], old_marks))
|
||||
if len(parents) == 1:
|
||||
@@ -526,7 +529,8 @@ def verify_heads(ui,repo,cache,force,ignore_unnamed_heads,branchesmap):
|
||||
|
||||
def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
||||
authors={},branchesmap={},tagsmap={},
|
||||
sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,notes=False,encoding='',fn_encoding='',
|
||||
sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,
|
||||
notes=False,encoding='',fn_encoding='',first_commit_hash='',
|
||||
plugins={}):
|
||||
def check_cache(filename, contents):
|
||||
if len(contents) == 0:
|
||||
@@ -582,7 +586,7 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
||||
brmap={}
|
||||
for rev in range(min,max):
|
||||
c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
|
||||
sob,brmap,hgtags,encoding,fn_encoding,
|
||||
sob,brmap,hgtags,encoding,fn_encoding,first_commit_hash,
|
||||
plugins)
|
||||
if notes:
|
||||
for rev in range(min,max):
|
||||
@@ -656,6 +660,8 @@ if __name__=='__main__':
|
||||
help="Add a plugin with the given init string <name=init>")
|
||||
parser.add_option("--subrepo-map", type="string", dest="subrepo_map",
|
||||
help="Provide a mapping file between the subrepository name and the submodule name")
|
||||
parser.add_option("--first-commit-hash", type="string", dest="first_commit_hash",
|
||||
help="Allow importing into an existing git repository by specifying the hash of the first commit")
|
||||
|
||||
(options,args)=parser.parse_args()
|
||||
|
||||
@@ -735,4 +741,5 @@ if __name__=='__main__':
|
||||
ignore_unnamed_heads=options.ignore_unnamed_heads,
|
||||
hgtags=options.hgtags,
|
||||
notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,
|
||||
first_commit_hash=options.first_commit_hash,
|
||||
plugins=plugins_dict))
|
||||
|
||||
@@ -87,6 +87,8 @@ Options:
|
||||
with <file-path> <hg-hash> <is-binary> as arguments
|
||||
--plugin <plugin=init> Add a plugin with the given init string (repeatable)
|
||||
--plugin-path <plugin-path> Add an additional plugin lookup path
|
||||
--first-commit-hash <git-commit-hash> Use the given git commit hash as the
|
||||
first commit's parent (for grafting)
|
||||
"
|
||||
case "$1" in
|
||||
-h|--help)
|
||||
|
||||
218
plugins/git_lfs_importer/README.md
Normal file
218
plugins/git_lfs_importer/README.md
Normal file
@@ -0,0 +1,218 @@
|
||||
# git_lfs_importer Plugin
|
||||
|
||||
This plugin automatically converts matching files to use Git LFS
|
||||
(Large File Storage) during the Mercurial to Git conversion process.
|
||||
|
||||
## Overview
|
||||
|
||||
The git_lfs_importer plugin intercepts file data during the hg-fast-export
|
||||
process and converts files matching specified patterns into Git LFS pointers.
|
||||
This allows you to seamlessly migrate a Mercurial repository to Git while
|
||||
simultaneously adopting LFS for large files.
|
||||
|
||||
Why use git_lfs_importer?
|
||||
For large repositories, traditional migration requires two sequential,
|
||||
long-running steps:
|
||||
|
||||
1. Full history conversion from Mercurial to Git.
|
||||
2. Full history rewrite using git lfs import.
|
||||
|
||||
This two-step process can take hours or even days for massive
|
||||
monorepos (e.g., 100GiB+).
|
||||
|
||||
This plugin eliminates the second, time-consuming history rewrite. It performs
|
||||
the LFS conversion incrementally (Just-In-Time). During the initial export, the
|
||||
plugin identifies large files and immediately writes LFS pointers into the Git
|
||||
history. This results in significantly faster conversions and allows for
|
||||
efficient incremental imports of new changesets.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### Dependencies
|
||||
|
||||
This plugin requires the `pathspec` package:
|
||||
|
||||
```bash
|
||||
pip install pathspec
|
||||
```
|
||||
|
||||
### Git Repository Setup
|
||||
|
||||
The destination Git repository must be pre-initialized with:
|
||||
|
||||
1. A `.gitattributes` file configured for LFS tracking
|
||||
2. Git LFS properly installed and initialized
|
||||
|
||||
Example `.gitattributes`:
|
||||
```
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||
large_files/** filter=lfs diff=lfs merge=lfs -text
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Step 1: Create the Destination Git Repository
|
||||
|
||||
```bash
|
||||
# Create a new git repository
|
||||
git init my-repo
|
||||
cd my-repo
|
||||
|
||||
# Initialize Git LFS
|
||||
git lfs install
|
||||
|
||||
# Create and commit a .gitattributes file
|
||||
cat > .gitattributes << EOF
|
||||
*.bin binary diff=lfs merge=lfs -text
|
||||
*.iso binary diff=lfs merge=lfs -text
|
||||
EOF
|
||||
git add .gitattributes
|
||||
git commit -m "Initialize Git LFS configuration"
|
||||
|
||||
# Get the commit hash (needed for --first-commit-hash)
|
||||
git rev-parse HEAD
|
||||
```
|
||||
|
||||
### Step 2: Create an LFS Specification File
|
||||
|
||||
Create a file (e.g., `lfs-spec.txt`) listing the patterns of files to convert
|
||||
to LFS. This uses gitignore-style glob patterns:
|
||||
|
||||
```
|
||||
*.bin
|
||||
*.iso
|
||||
*.tar.gz
|
||||
large_files/**
|
||||
*.mp4
|
||||
```
|
||||
|
||||
### Step 3: Run hg-fast-export with the Plugin
|
||||
|
||||
```bash
|
||||
hg-fast-export.sh \
|
||||
-r <mercurial-repo-path> \
|
||||
--plugin git_lfs_importer=lfs-spec.txt \
|
||||
--first-commit-hash <git-commit-hash> \
|
||||
--force
|
||||
```
|
||||
|
||||
Replace `<git-commit-hash>` with the hash obtained from Step 1.
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Pattern Matching**: Files are matched against patterns in the
|
||||
LFS specification file using gitignore-style matching
|
||||
2. **File Processing**: For each matching file:
|
||||
- Calculates SHA256 hash of the file content
|
||||
- Stores the actual file content in `.git/lfs/objects/<hash-prefix>/<hash>`
|
||||
- Replaces the file data with an LFS pointer containing:
|
||||
- LFS version specification
|
||||
- SHA256 hash of the original content
|
||||
- Original file size
|
||||
3. **Git Fast-Import**: The LFS pointer is committed instead of the actual
|
||||
file content
|
||||
|
||||
## Important Notes
|
||||
|
||||
### First Commit Hash Requirement
|
||||
|
||||
The `--first-commit-hash` option must be provided with the Git commit hash that
|
||||
contains your `.gitattributes` file. This allows the plugin to chain from the
|
||||
existing Git history rather than creating a completely new history.
|
||||
|
||||
### Deletions
|
||||
|
||||
The plugin safely handles file deletions (data=None) and does not process them.
|
||||
|
||||
### Large Files and Largefiles
|
||||
|
||||
If the Mercurial repository uses Mercurial's largefiles extension, those files
|
||||
are already converted to their original content before reaching this plugin,
|
||||
allowing the plugin to apply LFS conversion if they match the patterns.
|
||||
|
||||
## Example Workflow
|
||||
|
||||
```bash
|
||||
# Configuration variables
|
||||
HG_REPO=/path/to/mercurial/repo
|
||||
GIT_DIR_NAME=my-project-git
|
||||
LFS_PATTERN_FILE=../lfs-patterns.txt
|
||||
|
||||
# 1. Prepare destination git repo
|
||||
mkdir "$GIT_DIR_NAME"
|
||||
cd "$GIT_DIR_NAME"
|
||||
git init
|
||||
git lfs install
|
||||
|
||||
# Create .gitattributes
|
||||
cat > .gitattributes << EOF
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes
|
||||
git commit -m "Add LFS configuration"
|
||||
FIRST_HASH=$(git rev-parse HEAD)
|
||||
|
||||
# 2. Create LFS patterns file
|
||||
cat > "$LFS_PATTERN_FILE" << EOF
|
||||
*.bin
|
||||
*.iso
|
||||
build/artifacts/**
|
||||
EOF
|
||||
|
||||
# 3. Run conversion
|
||||
/path/to/hg-fast-export.sh \
|
||||
-r "$HG_REPO" \
|
||||
--plugin "git_lfs_importer=$LFS_PATTERN_FILE" \
|
||||
--first-commit-hash $FIRST_HASH \
|
||||
--force
|
||||
|
||||
# 4. Verify
|
||||
git log --oneline
|
||||
git lfs ls-files
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### LFS Files Not Tracked
|
||||
Verify that:
|
||||
- The `.gitattributes` file exists in the destination repository
|
||||
- Patterns in `.gitattributes` match the files being converted
|
||||
- `git lfs install` was run in the repository
|
||||
|
||||
### "pathspec" Module Not Found
|
||||
Install the required dependency:
|
||||
```bash
|
||||
pip install pathspec
|
||||
```
|
||||
|
||||
### Conversion Fails at Import
|
||||
Ensure the `--first-commit-hash` value is:
|
||||
- A valid commit hash in the destination repository
|
||||
- From a commit that exists before the conversion starts
|
||||
- The hash of the commit containing `.gitattributes`
|
||||
|
||||
|
||||
### Force Requirement
|
||||
|
||||
You only need to pass the `--force` option when converting the *first*
|
||||
Mercurial commit into a non-empty Git repository. By default, `hg-fast-export`
|
||||
prevents importing Mercurial commits onto a non-empty Git repo to avoid
|
||||
creating conflicting histories. Passing `--force` overrides that safety check
|
||||
and allows the exporter to write the LFS pointer objects and integrate the
|
||||
converted data with the existing Git history.
|
||||
|
||||
If you are doing an incremental conversion (i.e., running the script a second
|
||||
time to import new changesets into an already converted repository),
|
||||
the --force flag is not required.
|
||||
|
||||
Omitting `--force` when attempting to import the first Mercurial commit into a
|
||||
non-empty repository will cause the importer to refuse the operation.
|
||||
|
||||
## See Also
|
||||
|
||||
- [Git LFS Documentation](https://git-lfs.github.com/)
|
||||
- [gitignore Pattern Format](https://git-scm.com/docs/gitignore)
|
||||
- [hg-fast-export Documentation](../README.md)
|
||||
49
plugins/git_lfs_importer/__init__.py
Normal file
49
plugins/git_lfs_importer/__init__.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import pathlib
|
||||
import hashlib
|
||||
import pathspec
|
||||
|
||||
|
||||
def build_filter(args):
|
||||
with open(args) as f:
|
||||
lfs_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, f)
|
||||
return Filter(lfs_spec)
|
||||
|
||||
|
||||
class Filter:
|
||||
def __init__(self, lfs_spec):
|
||||
self.lfs_spec = lfs_spec
|
||||
|
||||
def file_data_filter(self, file_data):
|
||||
"""
|
||||
file_data: {
|
||||
'filename': <str>,
|
||||
'file_ctx': <mercurial.filectx or None>,
|
||||
'data': <bytes or None>,
|
||||
'is_largefile': <bool>
|
||||
}
|
||||
|
||||
May be called for deletions (data=None, file_ctx=None).
|
||||
"""
|
||||
filename = file_data.get('filename')
|
||||
data = file_data.get('data')
|
||||
|
||||
# Skip deletions or filtered files early
|
||||
if data is None or not self.lfs_spec.match_file(filename.decode("utf-8")):
|
||||
return
|
||||
|
||||
# Get the file path
|
||||
sha256hash = hashlib.sha256(data).hexdigest()
|
||||
lfs_path = pathlib.Path(f".git/lfs/objects/{sha256hash[0:2]}/{sha256hash[2:4]}")
|
||||
lfs_path.mkdir(parents=True, exist_ok=True)
|
||||
lfs_file_path = lfs_path / sha256hash
|
||||
|
||||
# The binary blob is already in LFS
|
||||
if not lfs_file_path.is_file():
|
||||
(lfs_path / sha256hash).write_bytes(data)
|
||||
|
||||
# Write the LFS pointer
|
||||
file_data['data'] = (
|
||||
f"version https://git-lfs.github.com/spec/v1\n"
|
||||
f"oid sha256:{sha256hash}\n"
|
||||
f"size {len(data)}\n"
|
||||
).encode("utf-8")
|
||||
117
t/first_commit_hash_option.t
Executable file
117
t/first_commit_hash_option.t
Executable file
@@ -0,0 +1,117 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2025
|
||||
#
|
||||
|
||||
test_description='git_lfs_importer plugin integration tests'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = Test User <test@example.com>
|
||||
EOF
|
||||
|
||||
# Git config for the destination repo commits
|
||||
git config --global user.email "test@example.com"
|
||||
git config --global user.name "Test User"
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'Mercurial history is imported over the provided commit' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source Mercurial repository with binary files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "regular text file" > readme.txt &&
|
||||
hg add readme.txt &&
|
||||
hg commit -m "initial commit"
|
||||
) &&
|
||||
|
||||
# 2. Prepare destination git repo with LFS setup
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
git switch --create master &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
* -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Run hg-fast-export
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--first-commit-hash "$FIRST_HASH" --force \
|
||||
-M master
|
||||
) &&
|
||||
|
||||
# 4. Verify git file is still present
|
||||
git -C gitrepo show HEAD:.gitattributes > gitattributes_check.txt &&
|
||||
test "$(cat gitattributes_check.txt)" = "* -text" &&
|
||||
|
||||
# 5. Verify hg file is imported
|
||||
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||
test "$(cat readme_check.txt)" = "regular text file"
|
||||
'
|
||||
|
||||
test_expect_success 'Mercurial history has priority over git' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source Mercurial repository with binary files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "hg readme file" > readme.txt &&
|
||||
hg add readme.txt &&
|
||||
hg commit -m "initial commit"
|
||||
) &&
|
||||
|
||||
# 2. Prepare destination git repo with LFS setup
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
git switch --create master &&
|
||||
|
||||
cat > readme.txt <<-EOF &&
|
||||
git readme file
|
||||
EOF
|
||||
|
||||
git add readme.txt &&
|
||||
git commit -q -m "Initialize Git readme file"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Run hg-fast-export
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--first-commit-hash "$FIRST_HASH" --force \
|
||||
-M master
|
||||
) &&
|
||||
|
||||
# 5. Verify hg file is imported
|
||||
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||
test "$(cat readme_check.txt)" = "hg readme file"
|
||||
'
|
||||
|
||||
test_done
|
||||
189
t/git_lfs_importer_plugin.t
Executable file
189
t/git_lfs_importer_plugin.t
Executable file
@@ -0,0 +1,189 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2025
|
||||
#
|
||||
|
||||
test_description='git_lfs_importer plugin integration tests'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = Test User <test@example.com>
|
||||
EOF
|
||||
|
||||
# Git config for the destination repo commits
|
||||
git config --global user.email "test@example.com"
|
||||
git config --global user.name "Test User"
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'git_lfs_importer converts matched binary files to LFS pointers and pointers are properly smudged when checkouting' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source Mercurial repository with binary files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "regular text file" > readme.txt &&
|
||||
echo "binary payload" > payload.bin &&
|
||||
hg add readme.txt payload.bin &&
|
||||
hg commit -m "initial commit with binary"
|
||||
) &&
|
||||
|
||||
# 2. Prepare destination git repo with LFS setup
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git LFS configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Create LFS patterns file
|
||||
cat > lfs-patterns.txt <<-EOF &&
|
||||
*.bin
|
||||
EOF
|
||||
|
||||
# 4. Run hg-fast-export with git_lfs_importer plugin
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||
--first-commit-hash "$FIRST_HASH" --force
|
||||
) &&
|
||||
|
||||
# 5. Verify conversion: payload.bin should be an LFS pointer
|
||||
git -C gitrepo show HEAD:payload.bin > lfs_pointer.txt &&
|
||||
grep -q "version https://git-lfs.github.com/spec/v1" lfs_pointer.txt &&
|
||||
grep -q "oid sha256:" lfs_pointer.txt &&
|
||||
grep -q "size" lfs_pointer.txt &&
|
||||
|
||||
# 6. Verify non-matched file is unchanged
|
||||
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||
test "$(cat readme_check.txt)" = "regular text file" &&
|
||||
|
||||
# 7. Make sure the LFS pointer file is unsmeared when checked out
|
||||
git -C gitrepo reset --hard HEAD &&
|
||||
ls gitrepo &&
|
||||
test "$(cat gitrepo/payload.bin)" = "binary payload"
|
||||
'
|
||||
|
||||
test_expect_success 'git_lfs_importer skips files not matching patterns' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source with various files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "text" > file.txt &&
|
||||
echo "data" > file.dat &&
|
||||
echo "iso content" > image.iso &&
|
||||
hg add . &&
|
||||
hg commit -m "multiple files"
|
||||
) &&
|
||||
|
||||
# 2. Prepare git repo with LFS
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git LFS configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Only .iso files should be converted
|
||||
cat > lfs-patterns.txt <<-EOF &&
|
||||
*.iso
|
||||
EOF
|
||||
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||
--first-commit-hash "$FIRST_HASH" --force
|
||||
) &&
|
||||
|
||||
# 4. Verify .iso is LFS pointer
|
||||
git -C gitrepo show HEAD:image.iso | grep -q "oid sha256:" &&
|
||||
|
||||
# 5. Verify .txt and .dat are unchanged
|
||||
test "$(git -C gitrepo show HEAD:file.txt)" = "text" &&
|
||||
test "$(git -C gitrepo show HEAD:file.dat)" = "data"
|
||||
'
|
||||
|
||||
test_expect_success 'git_lfs_importer handles directory patterns' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create repo with files in directory
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
mkdir -p assets/images &&
|
||||
echo "logo data" > assets/images/logo.bin &&
|
||||
echo "regular" > readme.txt &&
|
||||
hg add . &&
|
||||
hg commit -m "files in directories"
|
||||
) &&
|
||||
|
||||
# 2. Prepare git repo
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
assets/** filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git LFS configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Match directory pattern
|
||||
cat > lfs-patterns.txt <<-EOF &&
|
||||
assets/**
|
||||
EOF
|
||||
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||
--first-commit-hash "$FIRST_HASH" --force
|
||||
) &&
|
||||
|
||||
# 4. Verify directory file is converted
|
||||
git -C gitrepo show HEAD:assets/images/logo.bin | grep -q "oid sha256:" &&
|
||||
|
||||
# 5. Verify file outside directory is unchanged
|
||||
test "$(git -C gitrepo show HEAD:readme.txt)" = "regular"
|
||||
'
|
||||
|
||||
test_done
|
||||
156
tests/test_git_lfs_importer_plugin.py
Normal file
156
tests/test_git_lfs_importer_plugin.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import sys
|
||||
|
||||
sys.path.append("./plugins")
|
||||
|
||||
import hashlib
|
||||
import pathlib
|
||||
import time
|
||||
import unittest
|
||||
import tempfile
|
||||
import os
|
||||
import pathspec
|
||||
|
||||
from git_lfs_importer import Filter, build_filter
|
||||
|
||||
|
||||
class TestGitLfsImporterPlugin(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# create an isolated temp dir and chdir into it for each test
|
||||
self._orig_cwd = os.getcwd()
|
||||
self._tmpdir = tempfile.TemporaryDirectory()
|
||||
self.tmp_path = pathlib.Path(self._tmpdir.name)
|
||||
os.chdir(self.tmp_path)
|
||||
|
||||
def tearDown(self):
|
||||
# restore cwd and cleanup
|
||||
os.chdir(self._orig_cwd)
|
||||
self._tmpdir.cleanup()
|
||||
|
||||
def empty_spec(self):
|
||||
return pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, [])
|
||||
|
||||
# --------------------------------------------------------
|
||||
# GIVEN-WHEN-THEN TESTS for Filter.file_data_filter
|
||||
# --------------------------------------------------------
|
||||
|
||||
def test_skips_deletions(self):
|
||||
flt = Filter(self.empty_spec())
|
||||
file_data = {"filename": b"file.txt", "data": None}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertIsNone(file_data["data"])
|
||||
self.assertFalse((self.tmp_path / ".git").exists())
|
||||
|
||||
def test_skips_files_that_do_not_match_spec(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
original = b"not matched"
|
||||
file_data = {"filename": b"file.txt", "data": original}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertEqual(file_data["data"], original)
|
||||
self.assertFalse((self.tmp_path / ".git").exists())
|
||||
|
||||
def test_converts_only_matched_files_to_lfs_pointer(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
data = b"hello world"
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
expected_pointer = (
|
||||
f"version https://git-lfs.github.com/spec/v1\n"
|
||||
f"oid sha256:{sha}\n"
|
||||
f"size {len(data)}\n"
|
||||
).encode("utf-8")
|
||||
file_data = {"filename": b"payload.bin", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertEqual(file_data["data"], expected_pointer)
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
self.assertEqual(lfs_file.read_bytes(), data)
|
||||
|
||||
def test_does_not_convert_unmatched_directory(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["assets/**"])
|
||||
flt = Filter(spec)
|
||||
data = b"outside directory"
|
||||
file_data = {"filename": b"src/images/logo.png", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertEqual(file_data["data"], data)
|
||||
self.assertFalse((self.tmp_path / ".git").exists())
|
||||
|
||||
def test_converts_matched_directory(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["assets/**"])
|
||||
flt = Filter(spec)
|
||||
data = b"inside directory"
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
file_data = {"filename": b"assets/images/logo.png", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertIn(b"version https://git-lfs.github.com/spec/v1", file_data["data"])
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
self.assertEqual(lfs_file.read_bytes(), data)
|
||||
|
||||
def test_does_not_overwrite_existing_blob(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
data = b"abc"
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
lfs_dir = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4]
|
||||
lfs_dir.mkdir(parents=True, exist_ok=True)
|
||||
lfs_file = lfs_dir / sha
|
||||
lfs_file.write_bytes(data)
|
||||
before_mtime = lfs_file.stat().st_mtime_ns
|
||||
time.sleep(0.01) # Ensure timestamp difference
|
||||
|
||||
file_data = {"filename": b"abc.bin", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
expected_pointer_prefix = b"version https://git-lfs.github.com/spec/v1"
|
||||
self.assertTrue(file_data["data"].startswith(expected_pointer_prefix))
|
||||
after_mtime = lfs_file.stat().st_mtime_ns
|
||||
self.assertEqual(after_mtime, before_mtime)
|
||||
|
||||
def test_empty_file_converted_when_matched(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
data = b""
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
file_data = {"filename": b"empty.bin", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertIn(b"size 0", file_data["data"])
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
self.assertEqual(lfs_file.read_bytes(), data)
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Optional: GIVEN-WHEN-THEN for build_filter
|
||||
# --------------------------------------------------------
|
||||
|
||||
def test_build_filter_reads_patterns_file(self):
|
||||
patterns_file = self.tmp_path / "lfs_patterns.txt"
|
||||
patterns_file.write_text("*.bin\nassets/**\n", encoding="utf-8")
|
||||
|
||||
flt = build_filter(str(patterns_file))
|
||||
|
||||
data_match = b"match me"
|
||||
sha_match = hashlib.sha256(data_match).hexdigest()
|
||||
fd_match = {"filename": b"assets/payload.bin", "data": data_match}
|
||||
flt.file_data_filter(fd_match)
|
||||
self.assertIn(b"oid sha256:", fd_match["data"])
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha_match[:2] / sha_match[2:4] / sha_match
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
|
||||
data_skip = b"skip me"
|
||||
fd_skip = {"filename": b"docs/readme.md", "data": data_skip}
|
||||
flt.file_data_filter(fd_skip)
|
||||
self.assertEqual(fd_skip["data"], data_skip)
|
||||
Reference in New Issue
Block a user