mirror of
https://github.com/frej/fast-export.git
synced 2026-02-26 22:40:42 +01:00
Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c87b66ed7d | ||
|
|
76db75d963 | ||
|
|
5c9068a1f1 | ||
|
|
42d1c89e73 | ||
|
|
9d71921ed8 | ||
|
|
f6b72d248f | ||
|
|
8e1ba281d4 | ||
|
|
d77765a23e | ||
|
|
95459e5599 | ||
|
|
de5c8d9d97 | ||
|
|
ad96531587 | ||
|
|
4af9a33bd6 | ||
|
|
f71385ec14 | ||
|
|
ae21cbf1a2 |
3
.github/requirements-earliest.txt
vendored
3
.github/requirements-earliest.txt
vendored
@@ -1 +1,4 @@
|
||||
mercurial==5.2
|
||||
|
||||
# Required for git_lfs_importer plugin
|
||||
pathspec==0.11.2
|
||||
2
.github/requirements-latest.txt
vendored
2
.github/requirements-latest.txt
vendored
@@ -1,2 +1,4 @@
|
||||
mercurial
|
||||
|
||||
# Required for git_lfs_importer plugin
|
||||
pathspec==0.12.1
|
||||
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
@@ -10,7 +10,7 @@ on:
|
||||
jobs:
|
||||
test-earliest:
|
||||
name: Run test suite on the earliest supported Python version
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
76
README.md
76
README.md
@@ -141,12 +141,48 @@ if [ "$3" == "1" ]; then cat; else dos2unix -q; fi
|
||||
Mercurial Largefiles Extension
|
||||
------------------------------
|
||||
|
||||
Mercurial largefiles are exported as ordinary files into git, i.e. not
|
||||
as git lfs files. In order to make the export work, make sure that
|
||||
you have all largefiles of all mercurial commits available locally.
|
||||
This can be ensured by either cloning the mercurial repository with
|
||||
the option --all-largefiles or by executing the command
|
||||
'hg lfpull --rev "all()"' inside the mercurial repository.
|
||||
### Handling Mercurial Largefiles during Migration
|
||||
|
||||
When migrating from Mercurial to Git, largefiles are exported as ordinary
|
||||
files by default. To ensure a successful migration and manage repository
|
||||
size, follow the requirements below.
|
||||
|
||||
#### 1. Pre-Export: Ensure File Availability
|
||||
|
||||
Before starting the export, you must have all largefiles from all
|
||||
Mercurial commits available locally. Use one of these methods:
|
||||
|
||||
* **For a new clone:** `hg clone --all-largefiles <repo-url>`
|
||||
* **For an existing repo:** `hg lfpull --rev "all()"`
|
||||
|
||||
#### 2. Choosing Your LFS Strategy
|
||||
|
||||
If you want your files to be versioned in Git LFS rather than as standard
|
||||
Git blobs, you have two primary paths:
|
||||
|
||||
* **[git_lfs_importer plugin](./plugins/git_lfs_importer/README.md)
|
||||
(During Conversion)**
|
||||
Recommended for large repos. This performs Just-In-Time (JIT) conversion
|
||||
by identifying large files during the export and writing LFS pointers
|
||||
immediately, skipping the need for a second pass. This also supports
|
||||
**incremental conversion**, making it much more efficient for ongoing
|
||||
migrations.
|
||||
* **[git lfs migrate import](https://github.com/git-lfs/git-lfs/blob/main/docs/man/git-lfs-migrate.adoc)
|
||||
(After Conversion)**
|
||||
A standard two-step process: first, export the full history from Mercurial
|
||||
to Git, then run a separate full history rewrite to move files into LFS.
|
||||
|
||||
### Why use the git_lfs_importer plugin?
|
||||
|
||||
For "monorepos" or very large repositories (100GiB+), the traditional
|
||||
two-step process can take days. By integrating the LFS conversion
|
||||
directly into the history export, the plugin eliminates the massive
|
||||
time overhead of a secondary history rewrite and allows for incremental
|
||||
progress.
|
||||
|
||||
For detailed setup, see the
|
||||
[git_lfs_importer](./plugins/git_lfs_importer/README.md)
|
||||
plugin documentation.
|
||||
|
||||
Plugins
|
||||
-----------------
|
||||
@@ -177,9 +213,18 @@ defined filter methods in the [dos2unix](./plugins/dos2unix) and
|
||||
[branch_name_in_commit](./plugins/branch_name_in_commit) plugins.
|
||||
|
||||
```
|
||||
commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc, 'revision': revision, 'hg_hash': hg_hash, 'committer': 'committer', 'extra': extra}
|
||||
commit_data = {
|
||||
'author': author,
|
||||
'branch': branch,
|
||||
'committer': 'committer',
|
||||
'desc': desc,
|
||||
'extra': extra,
|
||||
'hg_hash': hg_hash,
|
||||
'parents': parents,
|
||||
'revision': revision,
|
||||
}
|
||||
|
||||
def commit_message_filter(self,commit_data):
|
||||
def commit_message_filter(self, commit_data):
|
||||
```
|
||||
The `commit_message_filter` method is called for each commit, after parsing
|
||||
from hg, but before outputting to git. The dictionary `commit_data` contains the
|
||||
@@ -188,9 +233,14 @@ values in the dictionary after filters have been run are used to create the git
|
||||
commit.
|
||||
|
||||
```
|
||||
file_data = {'filename':filename,'file_ctx':file_ctx,'data':file_contents}
|
||||
file_data = {
|
||||
'data': file_contents,
|
||||
'file_ctx': file_ctx,
|
||||
'filename': filename,
|
||||
'is_largefile': largefile_status,
|
||||
}
|
||||
|
||||
def file_data_filter(self,file_data):
|
||||
def file_data_filter(self, file_data):
|
||||
```
|
||||
The `file_data_filter` method is called for each file within each commit.
|
||||
The dictionary `file_data` contains the above attributes about the file, and
|
||||
@@ -203,6 +253,12 @@ but in this case the `data` and `file_ctx` keys map to None. This is
|
||||
so that a filter which modifies file names can apply the same name
|
||||
transformations when files are deleted.
|
||||
|
||||
The `is_largefile` entry within the `file_data` dictionary will contain
|
||||
`True` if the original file was a largefile and has been converted
|
||||
to a normal file before the plugins were invoked. In this case, the `file_ctx`
|
||||
will still point to the filecontext for the original, unconverted file, while
|
||||
`filename` and `data` will contain the already converted information.
|
||||
|
||||
Submodules
|
||||
----------
|
||||
See README-SUBMODULES.md for how to convert subrepositories into git
|
||||
|
||||
@@ -211,15 +211,18 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}):
|
||||
b'Ignoring file %s which cannot be tracked by git\n' % filename
|
||||
)
|
||||
continue
|
||||
|
||||
largefile = False
|
||||
file_ctx=ctx.filectx(file)
|
||||
if is_largefile(filename):
|
||||
largefile = True
|
||||
filename = largefile_orig_name(filename)
|
||||
d = largefile_data(ctx, file, filename)
|
||||
else:
|
||||
file_ctx=ctx.filectx(file)
|
||||
d=file_ctx.data()
|
||||
|
||||
if plugins and plugins['file_data_filters']:
|
||||
file_data = {'filename':filename,'file_ctx':file_ctx,'data':d}
|
||||
file_data = {'filename':filename,'file_ctx':file_ctx,'data':d, 'is_largefile':largefile}
|
||||
for filter in plugins['file_data_filters']:
|
||||
filter(file_data)
|
||||
d=file_data['data']
|
||||
@@ -281,7 +284,7 @@ def strip_leading_slash(filename):
|
||||
|
||||
def export_commit(ui,repo,revision,old_marks,max,count,authors,
|
||||
branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',
|
||||
plugins={}):
|
||||
first_commit_hash="",plugins={}):
|
||||
def get_branchname(name):
|
||||
if name in brmap:
|
||||
return brmap[name]
|
||||
@@ -329,6 +332,9 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors,
|
||||
|
||||
if not parents:
|
||||
type='full'
|
||||
if revision == 0 and first_commit_hash:
|
||||
wr(b'from %s' % first_commit_hash.encode())
|
||||
type='simple delta'
|
||||
else:
|
||||
wr(b'from %s' % revnum_to_revref(parents[0], old_marks))
|
||||
if len(parents) == 1:
|
||||
@@ -482,7 +488,9 @@ def branchtip(repo, heads):
|
||||
|
||||
def verify_heads(ui,repo,cache,force,ignore_unnamed_heads,branchesmap):
|
||||
branches={}
|
||||
for bn, heads in repo.branchmap().iteritems():
|
||||
|
||||
for bn in repo.branchmap():
|
||||
heads = repo.branchmap().branchheads(bn)
|
||||
branches[bn] = branchtip(repo, heads)
|
||||
l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
|
||||
l.sort()
|
||||
@@ -523,7 +531,8 @@ def verify_heads(ui,repo,cache,force,ignore_unnamed_heads,branchesmap):
|
||||
|
||||
def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
||||
authors={},branchesmap={},tagsmap={},
|
||||
sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,notes=False,encoding='',fn_encoding='',
|
||||
sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,
|
||||
notes=False,encoding='',fn_encoding='',first_commit_hash='',
|
||||
plugins={}):
|
||||
def check_cache(filename, contents):
|
||||
if len(contents) == 0:
|
||||
@@ -539,7 +548,7 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
||||
if len(state_cache) != 0:
|
||||
for (name, data) in [(marksfile, old_marks),
|
||||
(mappingfile, mapping_cache),
|
||||
(headsfile, state_cache)]:
|
||||
(headsfile, heads_cache)]:
|
||||
check_cache(name, data)
|
||||
|
||||
ui,repo=setup_repo(repourl)
|
||||
@@ -579,7 +588,7 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
||||
brmap={}
|
||||
for rev in range(min,max):
|
||||
c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
|
||||
sob,brmap,hgtags,encoding,fn_encoding,
|
||||
sob,brmap,hgtags,encoding,fn_encoding,first_commit_hash,
|
||||
plugins)
|
||||
if notes:
|
||||
for rev in range(min,max):
|
||||
@@ -653,6 +662,8 @@ if __name__=='__main__':
|
||||
help="Add a plugin with the given init string <name=init>")
|
||||
parser.add_option("--subrepo-map", type="string", dest="subrepo_map",
|
||||
help="Provide a mapping file between the subrepository name and the submodule name")
|
||||
parser.add_option("--first-commit-hash", type="string", dest="first_commit_hash",
|
||||
help="Allow importing into an existing git repository by specifying the hash of the first commit")
|
||||
|
||||
(options,args)=parser.parse_args()
|
||||
|
||||
@@ -732,4 +743,5 @@ if __name__=='__main__':
|
||||
ignore_unnamed_heads=options.ignore_unnamed_heads,
|
||||
hgtags=options.hgtags,
|
||||
notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,
|
||||
first_commit_hash=options.first_commit_hash,
|
||||
plugins=plugins_dict))
|
||||
|
||||
@@ -87,6 +87,8 @@ Options:
|
||||
with <file-path> <hg-hash> <is-binary> as arguments
|
||||
--plugin <plugin=init> Add a plugin with the given init string (repeatable)
|
||||
--plugin-path <plugin-path> Add an additional plugin lookup path
|
||||
--first-commit-hash <git-commit-hash> Use the given git commit hash as the
|
||||
first commit's parent (for grafting)
|
||||
"
|
||||
case "$1" in
|
||||
-h|--help)
|
||||
|
||||
12
hg2git.py
12
hg2git.py
@@ -23,13 +23,21 @@ user_clean_re=re.compile(b'^["]([^"]+)["]$')
|
||||
|
||||
def set_default_branch(name):
|
||||
global cfg_master
|
||||
cfg_master = name.encode('utf8') if not isinstance(name, bytes) else name
|
||||
cfg_master = name.encode('utf8')
|
||||
|
||||
def set_origin_name(name):
|
||||
global origin_name
|
||||
origin_name = name
|
||||
origin_name = name.encode('utf8')
|
||||
|
||||
def setup_repo(url):
|
||||
try:
|
||||
# Mercurial >= 7.2 requires explicit initialization for largefile
|
||||
# support to work.
|
||||
from mercurial import initialization
|
||||
initialization.init()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
myui=ui.ui(interactive=False)
|
||||
except TypeError:
|
||||
|
||||
218
plugins/git_lfs_importer/README.md
Normal file
218
plugins/git_lfs_importer/README.md
Normal file
@@ -0,0 +1,218 @@
|
||||
# git_lfs_importer Plugin
|
||||
|
||||
This plugin automatically converts matching files to use Git LFS
|
||||
(Large File Storage) during the Mercurial to Git conversion process.
|
||||
|
||||
## Overview
|
||||
|
||||
The git_lfs_importer plugin intercepts file data during the hg-fast-export
|
||||
process and converts files matching specified patterns into Git LFS pointers.
|
||||
This allows you to seamlessly migrate a Mercurial repository to Git while
|
||||
simultaneously adopting LFS for large files.
|
||||
|
||||
Why use git_lfs_importer?
|
||||
For large repositories, traditional migration requires two sequential,
|
||||
long-running steps:
|
||||
|
||||
1. Full history conversion from Mercurial to Git.
|
||||
2. Full history rewrite using git lfs import.
|
||||
|
||||
This two-step process can take hours or even days for massive
|
||||
monorepos (e.g., 100GiB+).
|
||||
|
||||
This plugin eliminates the second, time-consuming history rewrite. It performs
|
||||
the LFS conversion incrementally (Just-In-Time). During the initial export, the
|
||||
plugin identifies large files and immediately writes LFS pointers into the Git
|
||||
history. This results in significantly faster conversions and allows for
|
||||
efficient incremental imports of new changesets.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### Dependencies
|
||||
|
||||
This plugin requires the `pathspec` package:
|
||||
|
||||
```bash
|
||||
pip install pathspec
|
||||
```
|
||||
|
||||
### Git Repository Setup
|
||||
|
||||
The destination Git repository must be pre-initialized with:
|
||||
|
||||
1. A `.gitattributes` file configured for LFS tracking
|
||||
2. Git LFS properly installed and initialized
|
||||
|
||||
Example `.gitattributes`:
|
||||
```
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||
large_files/** filter=lfs diff=lfs merge=lfs -text
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Step 1: Create the Destination Git Repository
|
||||
|
||||
```bash
|
||||
# Create a new git repository
|
||||
git init my-repo
|
||||
cd my-repo
|
||||
|
||||
# Initialize Git LFS
|
||||
git lfs install
|
||||
|
||||
# Create and commit a .gitattributes file
|
||||
cat > .gitattributes << EOF
|
||||
*.bin binary diff=lfs merge=lfs -text
|
||||
*.iso binary diff=lfs merge=lfs -text
|
||||
EOF
|
||||
git add .gitattributes
|
||||
git commit -m "Initialize Git LFS configuration"
|
||||
|
||||
# Get the commit hash (needed for --first-commit-hash)
|
||||
git rev-parse HEAD
|
||||
```
|
||||
|
||||
### Step 2: Create an LFS Specification File
|
||||
|
||||
Create a file (e.g., `lfs-spec.txt`) listing the patterns of files to convert
|
||||
to LFS. This uses gitignore-style glob patterns:
|
||||
|
||||
```
|
||||
*.bin
|
||||
*.iso
|
||||
*.tar.gz
|
||||
large_files/**
|
||||
*.mp4
|
||||
```
|
||||
|
||||
### Step 3: Run hg-fast-export with the Plugin
|
||||
|
||||
```bash
|
||||
hg-fast-export.sh \
|
||||
-r <mercurial-repo-path> \
|
||||
--plugin git_lfs_importer=lfs-spec.txt \
|
||||
--first-commit-hash <git-commit-hash> \
|
||||
--force
|
||||
```
|
||||
|
||||
Replace `<git-commit-hash>` with the hash obtained from Step 1.
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Pattern Matching**: Files are matched against patterns in the
|
||||
LFS specification file using gitignore-style matching
|
||||
2. **File Processing**: For each matching file:
|
||||
- Calculates SHA256 hash of the file content
|
||||
- Stores the actual file content in `.git/lfs/objects/<hash-prefix>/<hash>`
|
||||
- Replaces the file data with an LFS pointer containing:
|
||||
- LFS version specification
|
||||
- SHA256 hash of the original content
|
||||
- Original file size
|
||||
3. **Git Fast-Import**: The LFS pointer is committed instead of the actual
|
||||
file content
|
||||
|
||||
## Important Notes
|
||||
|
||||
### First Commit Hash Requirement
|
||||
|
||||
The `--first-commit-hash` option must be provided with the Git commit hash that
|
||||
contains your `.gitattributes` file. This allows the plugin to chain from the
|
||||
existing Git history rather than creating a completely new history.
|
||||
|
||||
### Deletions
|
||||
|
||||
The plugin safely handles file deletions (data=None) and does not process them.
|
||||
|
||||
### Large Files and Largefiles
|
||||
|
||||
If the Mercurial repository uses Mercurial's largefiles extension, those files
|
||||
are already converted to their original content before reaching this plugin,
|
||||
allowing the plugin to apply LFS conversion if they match the patterns.
|
||||
|
||||
## Example Workflow
|
||||
|
||||
```bash
|
||||
# Configuration variables
|
||||
HG_REPO=/path/to/mercurial/repo
|
||||
GIT_DIR_NAME=my-project-git
|
||||
LFS_PATTERN_FILE=../lfs-patterns.txt
|
||||
|
||||
# 1. Prepare destination git repo
|
||||
mkdir "$GIT_DIR_NAME"
|
||||
cd "$GIT_DIR_NAME"
|
||||
git init
|
||||
git lfs install
|
||||
|
||||
# Create .gitattributes
|
||||
cat > .gitattributes << EOF
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes
|
||||
git commit -m "Add LFS configuration"
|
||||
FIRST_HASH=$(git rev-parse HEAD)
|
||||
|
||||
# 2. Create LFS patterns file
|
||||
cat > "$LFS_PATTERN_FILE" << EOF
|
||||
*.bin
|
||||
*.iso
|
||||
build/artifacts/**
|
||||
EOF
|
||||
|
||||
# 3. Run conversion
|
||||
/path/to/hg-fast-export.sh \
|
||||
-r "$HG_REPO" \
|
||||
--plugin "git_lfs_importer=$LFS_PATTERN_FILE" \
|
||||
--first-commit-hash $FIRST_HASH \
|
||||
--force
|
||||
|
||||
# 4. Verify
|
||||
git log --oneline
|
||||
git lfs ls-files
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### LFS Files Not Tracked
|
||||
Verify that:
|
||||
- The `.gitattributes` file exists in the destination repository
|
||||
- Patterns in `.gitattributes` match the files being converted
|
||||
- `git lfs install` was run in the repository
|
||||
|
||||
### "pathspec" Module Not Found
|
||||
Install the required dependency:
|
||||
```bash
|
||||
pip install pathspec
|
||||
```
|
||||
|
||||
### Conversion Fails at Import
|
||||
Ensure the `--first-commit-hash` value is:
|
||||
- A valid commit hash in the destination repository
|
||||
- From a commit that exists before the conversion starts
|
||||
- The hash of the commit containing `.gitattributes`
|
||||
|
||||
|
||||
### Force Requirement
|
||||
|
||||
You only need to pass the `--force` option when converting the *first*
|
||||
Mercurial commit into a non-empty Git repository. By default, `hg-fast-export`
|
||||
prevents importing Mercurial commits onto a non-empty Git repo to avoid
|
||||
creating conflicting histories. Passing `--force` overrides that safety check
|
||||
and allows the exporter to write the LFS pointer objects and integrate the
|
||||
converted data with the existing Git history.
|
||||
|
||||
If you are doing an incremental conversion (i.e., running the script a second
|
||||
time to import new changesets into an already converted repository),
|
||||
the --force flag is not required.
|
||||
|
||||
Omitting `--force` when attempting to import the first Mercurial commit into a
|
||||
non-empty repository will cause the importer to refuse the operation.
|
||||
|
||||
## See Also
|
||||
|
||||
- [Git LFS Documentation](https://git-lfs.github.com/)
|
||||
- [gitignore Pattern Format](https://git-scm.com/docs/gitignore)
|
||||
- [hg-fast-export Documentation](../README.md)
|
||||
49
plugins/git_lfs_importer/__init__.py
Normal file
49
plugins/git_lfs_importer/__init__.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import pathlib
|
||||
import hashlib
|
||||
import pathspec
|
||||
|
||||
|
||||
def build_filter(args):
|
||||
with open(args) as f:
|
||||
lfs_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, f)
|
||||
return Filter(lfs_spec)
|
||||
|
||||
|
||||
class Filter:
|
||||
def __init__(self, lfs_spec):
|
||||
self.lfs_spec = lfs_spec
|
||||
|
||||
def file_data_filter(self, file_data):
|
||||
"""
|
||||
file_data: {
|
||||
'filename': <str>,
|
||||
'file_ctx': <mercurial.filectx or None>,
|
||||
'data': <bytes or None>,
|
||||
'is_largefile': <bool>
|
||||
}
|
||||
|
||||
May be called for deletions (data=None, file_ctx=None).
|
||||
"""
|
||||
filename = file_data.get('filename')
|
||||
data = file_data.get('data')
|
||||
|
||||
# Skip deletions or filtered files early
|
||||
if data is None or not self.lfs_spec.match_file(filename.decode("utf-8")):
|
||||
return
|
||||
|
||||
# Get the file path
|
||||
sha256hash = hashlib.sha256(data).hexdigest()
|
||||
lfs_path = pathlib.Path(f".git/lfs/objects/{sha256hash[0:2]}/{sha256hash[2:4]}")
|
||||
lfs_path.mkdir(parents=True, exist_ok=True)
|
||||
lfs_file_path = lfs_path / sha256hash
|
||||
|
||||
# The binary blob is already in LFS
|
||||
if not lfs_file_path.is_file():
|
||||
(lfs_path / sha256hash).write_bytes(data)
|
||||
|
||||
# Write the LFS pointer
|
||||
file_data['data'] = (
|
||||
f"version https://git-lfs.github.com/spec/v1\n"
|
||||
f"oid sha256:{sha256hash}\n"
|
||||
f"size {len(data)}\n"
|
||||
).encode("utf-8")
|
||||
117
t/first_commit_hash_option.t
Executable file
117
t/first_commit_hash_option.t
Executable file
@@ -0,0 +1,117 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2025
|
||||
#
|
||||
|
||||
test_description='git_lfs_importer plugin integration tests'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = Test User <test@example.com>
|
||||
EOF
|
||||
|
||||
# Git config for the destination repo commits
|
||||
git config --global user.email "test@example.com"
|
||||
git config --global user.name "Test User"
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'Mercurial history is imported over the provided commit' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source Mercurial repository with binary files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "regular text file" > readme.txt &&
|
||||
hg add readme.txt &&
|
||||
hg commit -m "initial commit"
|
||||
) &&
|
||||
|
||||
# 2. Prepare destination git repo with LFS setup
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
git switch --create master &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
* -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Run hg-fast-export
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--first-commit-hash "$FIRST_HASH" --force \
|
||||
-M master
|
||||
) &&
|
||||
|
||||
# 4. Verify git file is still present
|
||||
git -C gitrepo show HEAD:.gitattributes > gitattributes_check.txt &&
|
||||
test "$(cat gitattributes_check.txt)" = "* -text" &&
|
||||
|
||||
# 5. Verify hg file is imported
|
||||
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||
test "$(cat readme_check.txt)" = "regular text file"
|
||||
'
|
||||
|
||||
test_expect_success 'Mercurial history has priority over git' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source Mercurial repository with binary files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "hg readme file" > readme.txt &&
|
||||
hg add readme.txt &&
|
||||
hg commit -m "initial commit"
|
||||
) &&
|
||||
|
||||
# 2. Prepare destination git repo with LFS setup
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
git switch --create master &&
|
||||
|
||||
cat > readme.txt <<-EOF &&
|
||||
git readme file
|
||||
EOF
|
||||
|
||||
git add readme.txt &&
|
||||
git commit -q -m "Initialize Git readme file"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Run hg-fast-export
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--first-commit-hash "$FIRST_HASH" --force \
|
||||
-M master
|
||||
) &&
|
||||
|
||||
# 5. Verify hg file is imported
|
||||
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||
test "$(cat readme_check.txt)" = "hg readme file"
|
||||
'
|
||||
|
||||
test_done
|
||||
189
t/git_lfs_importer_plugin.t
Executable file
189
t/git_lfs_importer_plugin.t
Executable file
@@ -0,0 +1,189 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2025
|
||||
#
|
||||
|
||||
test_description='git_lfs_importer plugin integration tests'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = Test User <test@example.com>
|
||||
EOF
|
||||
|
||||
# Git config for the destination repo commits
|
||||
git config --global user.email "test@example.com"
|
||||
git config --global user.name "Test User"
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'git_lfs_importer converts matched binary files to LFS pointers and pointers are properly smudged when checkouting' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source Mercurial repository with binary files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "regular text file" > readme.txt &&
|
||||
echo "binary payload" > payload.bin &&
|
||||
hg add readme.txt payload.bin &&
|
||||
hg commit -m "initial commit with binary"
|
||||
) &&
|
||||
|
||||
# 2. Prepare destination git repo with LFS setup
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git LFS configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Create LFS patterns file
|
||||
cat > lfs-patterns.txt <<-EOF &&
|
||||
*.bin
|
||||
EOF
|
||||
|
||||
# 4. Run hg-fast-export with git_lfs_importer plugin
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||
--first-commit-hash "$FIRST_HASH" --force
|
||||
) &&
|
||||
|
||||
# 5. Verify conversion: payload.bin should be an LFS pointer
|
||||
git -C gitrepo show HEAD:payload.bin > lfs_pointer.txt &&
|
||||
grep -q "version https://git-lfs.github.com/spec/v1" lfs_pointer.txt &&
|
||||
grep -q "oid sha256:" lfs_pointer.txt &&
|
||||
grep -q "size" lfs_pointer.txt &&
|
||||
|
||||
# 6. Verify non-matched file is unchanged
|
||||
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||
test "$(cat readme_check.txt)" = "regular text file" &&
|
||||
|
||||
# 7. Make sure the LFS pointer file is unsmeared when checked out
|
||||
git -C gitrepo reset --hard HEAD &&
|
||||
ls gitrepo &&
|
||||
test "$(cat gitrepo/payload.bin)" = "binary payload"
|
||||
'
|
||||
|
||||
test_expect_success 'git_lfs_importer skips files not matching patterns' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source with various files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "text" > file.txt &&
|
||||
echo "data" > file.dat &&
|
||||
echo "iso content" > image.iso &&
|
||||
hg add . &&
|
||||
hg commit -m "multiple files"
|
||||
) &&
|
||||
|
||||
# 2. Prepare git repo with LFS
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git LFS configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Only .iso files should be converted
|
||||
cat > lfs-patterns.txt <<-EOF &&
|
||||
*.iso
|
||||
EOF
|
||||
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||
--first-commit-hash "$FIRST_HASH" --force
|
||||
) &&
|
||||
|
||||
# 4. Verify .iso is LFS pointer
|
||||
git -C gitrepo show HEAD:image.iso | grep -q "oid sha256:" &&
|
||||
|
||||
# 5. Verify .txt and .dat are unchanged
|
||||
test "$(git -C gitrepo show HEAD:file.txt)" = "text" &&
|
||||
test "$(git -C gitrepo show HEAD:file.dat)" = "data"
|
||||
'
|
||||
|
||||
test_expect_success 'git_lfs_importer handles directory patterns' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create repo with files in directory
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
mkdir -p assets/images &&
|
||||
echo "logo data" > assets/images/logo.bin &&
|
||||
echo "regular" > readme.txt &&
|
||||
hg add . &&
|
||||
hg commit -m "files in directories"
|
||||
) &&
|
||||
|
||||
# 2. Prepare git repo
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
assets/** filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git LFS configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Match directory pattern
|
||||
cat > lfs-patterns.txt <<-EOF &&
|
||||
assets/**
|
||||
EOF
|
||||
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||
--first-commit-hash "$FIRST_HASH" --force
|
||||
) &&
|
||||
|
||||
# 4. Verify directory file is converted
|
||||
git -C gitrepo show HEAD:assets/images/logo.bin | grep -q "oid sha256:" &&
|
||||
|
||||
# 5. Verify file outside directory is unchanged
|
||||
test "$(git -C gitrepo show HEAD:readme.txt)" = "regular"
|
||||
'
|
||||
|
||||
test_done
|
||||
20
t/largefile_plugin.expected
Normal file
20
t/largefile_plugin.expected
Normal file
@@ -0,0 +1,20 @@
|
||||
blob
|
||||
mark :1
|
||||
data 7
|
||||
a_file
|
||||
|
||||
blob
|
||||
mark :2
|
||||
data 6
|
||||
large
|
||||
|
||||
reset refs/heads/master
|
||||
commit refs/heads/master
|
||||
mark :3
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||
data 3
|
||||
r0
|
||||
M 100644 :1 a.txt
|
||||
M 100644 :2 b.txt
|
||||
|
||||
69
t/largefile_plugin.t
Executable file
69
t/largefile_plugin.t
Executable file
@@ -0,0 +1,69 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Felipe Contreras
|
||||
# Copyright (c) 2023 Frej Drejhammar
|
||||
# Copyright (c) 2025 Günther Nußmüller
|
||||
#
|
||||
# Check that plugin invocation works with largefiles.
|
||||
# This test uses the echo_file_data_test_plugin to verify that the
|
||||
# file data is passed correctly, including the largefile status.
|
||||
#
|
||||
|
||||
test_description='Largefiles and plugin test'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
|
||||
git_create() {
|
||||
git init -q "$1" &&
|
||||
git -C "$1" config core.ignoreCase false
|
||||
}
|
||||
|
||||
git_convert() {
|
||||
(
|
||||
cd "$2" &&
|
||||
hg-fast-export.sh --repo "../$1" \
|
||||
-s --hgtags -n \
|
||||
--plugin ../../plugins/echo_file_data_test_plugin
|
||||
)
|
||||
}
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = Grevious Bodily Harmsworth <gbh@example.com>
|
||||
[extensions]
|
||||
largefiles =
|
||||
EOF
|
||||
}
|
||||
|
||||
commit0() {
|
||||
(
|
||||
cd hgrepo &&
|
||||
echo "a_file" > a.txt &&
|
||||
echo "large" > b.txt
|
||||
hg add a.txt &&
|
||||
hg add --large b.txt &&
|
||||
hg commit -d "2023-03-17 01:00Z" -m "r0"
|
||||
)
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'largefile and plugin' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||
|
||||
(
|
||||
hg init hgrepo &&
|
||||
commit0
|
||||
) &&
|
||||
git_create gitrepo &&
|
||||
git_convert hgrepo gitrepo &&
|
||||
|
||||
git -C gitrepo fast-export --all > actual &&
|
||||
|
||||
test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin.expected actual &&
|
||||
test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin_file_info.expected gitrepo/largefile_info.txt
|
||||
'
|
||||
|
||||
test_done
|
||||
12
t/largefile_plugin_file_info.expected
Normal file
12
t/largefile_plugin_file_info.expected
Normal file
@@ -0,0 +1,12 @@
|
||||
filename: b'b.txt'
|
||||
data size: 6 bytes
|
||||
ctx rev: 0
|
||||
ctx binary: False
|
||||
is largefile: True
|
||||
|
||||
filename: b'a.txt'
|
||||
data size: 7 bytes
|
||||
ctx rev: 0
|
||||
ctx binary: False
|
||||
is largefile: False
|
||||
|
||||
18
t/plugins/echo_file_data_test_plugin/__init__.py
Normal file
18
t/plugins/echo_file_data_test_plugin/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
import sys
|
||||
from mercurial import node
|
||||
|
||||
def build_filter(args):
|
||||
return Filter(args)
|
||||
|
||||
class Filter:
|
||||
def __init__(self, _):
|
||||
pass
|
||||
|
||||
def file_data_filter(self,file_data):
|
||||
with open('largefile_info.txt', 'a') as f:
|
||||
f.write(f"filename: {file_data['filename']}\n")
|
||||
f.write(f"data size: {len(file_data['data'])} bytes\n")
|
||||
f.write(f"ctx rev: {file_data['file_ctx'].rev()}\n")
|
||||
f.write(f"ctx binary: {file_data['file_ctx'].isbinary()}\n")
|
||||
f.write(f"is largefile: {file_data.get('is_largefile', False)}\n")
|
||||
f.write("\n")
|
||||
42
t/set_origin.expected
Normal file
42
t/set_origin.expected
Normal file
@@ -0,0 +1,42 @@
|
||||
blob
|
||||
mark :1
|
||||
data 5
|
||||
zero
|
||||
|
||||
reset refs/heads/prefix/master
|
||||
commit refs/heads/prefix/master
|
||||
mark :2
|
||||
author H G Wells <wells@example.com> 1679014800 +0000
|
||||
committer H G Wells <wells@example.com> 1679014800 +0000
|
||||
data 5
|
||||
zero
|
||||
M 100644 :1 content
|
||||
|
||||
blob
|
||||
mark :3
|
||||
data 8
|
||||
branch1
|
||||
|
||||
commit refs/heads/prefix/branch1
|
||||
mark :4
|
||||
author H G Wells <wells@example.com> 1679018400 +0000
|
||||
committer H G Wells <wells@example.com> 1679018400 +0000
|
||||
data 29
|
||||
Added file in branch branch1
|
||||
from :2
|
||||
M 100644 :3 b8486c4feca589a4237a1ee428322d7109ede12e
|
||||
|
||||
blob
|
||||
mark :5
|
||||
data 8
|
||||
branch2
|
||||
|
||||
commit refs/heads/prefix/branch2
|
||||
mark :6
|
||||
author H G Wells <wells@example.com> 1679022000 +0000
|
||||
committer H G Wells <wells@example.com> 1679022000 +0000
|
||||
data 29
|
||||
Added file in branch branch2
|
||||
from :4
|
||||
M 100644 :5 fe786baee0d76603092c25609f2967b9c28a2cf2
|
||||
|
||||
59
t/set_origin.t
Executable file
59
t/set_origin.t
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Felipe Contreras
|
||||
# Copyright (c) 2025 Günther Nußmüller
|
||||
#
|
||||
|
||||
test_description='Set origin tests'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
check() {
|
||||
git -C "$1" fast-export --all > actual
|
||||
test_cmp "$SHARNESS_TEST_DIRECTORY"/set_origin.expected actual
|
||||
}
|
||||
|
||||
git_clone() {
|
||||
(
|
||||
git init -q "$2" &&
|
||||
cd "$2" &&
|
||||
git config core.ignoreCase false &&
|
||||
hg-fast-export.sh --repo "../$1" --origin "$3"
|
||||
)
|
||||
}
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = H G Wells <wells@example.com>
|
||||
EOF
|
||||
}
|
||||
|
||||
make-branch() {
|
||||
hg branch "$1"
|
||||
FILE=$(echo "$1" | sha1sum | cut -d " " -f 1)
|
||||
echo "$1" > $FILE
|
||||
hg add $FILE
|
||||
hg commit -d "2023-03-17 $2:00Z" -m "Added file in branch $1"
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'basic' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo zero > content &&
|
||||
hg add content &&
|
||||
hg commit -m zero -d "2023-03-17 01:00Z" &&
|
||||
make-branch branch1 02 &&
|
||||
make-branch branch2 03
|
||||
) &&
|
||||
|
||||
git_clone hgrepo gitrepo prefix &&
|
||||
check gitrepo
|
||||
'
|
||||
|
||||
test_done
|
||||
156
tests/test_git_lfs_importer_plugin.py
Normal file
156
tests/test_git_lfs_importer_plugin.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import sys
|
||||
|
||||
sys.path.append("./plugins")
|
||||
|
||||
import hashlib
|
||||
import pathlib
|
||||
import time
|
||||
import unittest
|
||||
import tempfile
|
||||
import os
|
||||
import pathspec
|
||||
|
||||
from git_lfs_importer import Filter, build_filter
|
||||
|
||||
|
||||
class TestGitLfsImporterPlugin(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# create an isolated temp dir and chdir into it for each test
|
||||
self._orig_cwd = os.getcwd()
|
||||
self._tmpdir = tempfile.TemporaryDirectory()
|
||||
self.tmp_path = pathlib.Path(self._tmpdir.name)
|
||||
os.chdir(self.tmp_path)
|
||||
|
||||
def tearDown(self):
|
||||
# restore cwd and cleanup
|
||||
os.chdir(self._orig_cwd)
|
||||
self._tmpdir.cleanup()
|
||||
|
||||
def empty_spec(self):
|
||||
return pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, [])
|
||||
|
||||
# --------------------------------------------------------
|
||||
# GIVEN-WHEN-THEN TESTS for Filter.file_data_filter
|
||||
# --------------------------------------------------------
|
||||
|
||||
def test_skips_deletions(self):
|
||||
flt = Filter(self.empty_spec())
|
||||
file_data = {"filename": b"file.txt", "data": None}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertIsNone(file_data["data"])
|
||||
self.assertFalse((self.tmp_path / ".git").exists())
|
||||
|
||||
def test_skips_files_that_do_not_match_spec(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
original = b"not matched"
|
||||
file_data = {"filename": b"file.txt", "data": original}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertEqual(file_data["data"], original)
|
||||
self.assertFalse((self.tmp_path / ".git").exists())
|
||||
|
||||
def test_converts_only_matched_files_to_lfs_pointer(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
data = b"hello world"
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
expected_pointer = (
|
||||
f"version https://git-lfs.github.com/spec/v1\n"
|
||||
f"oid sha256:{sha}\n"
|
||||
f"size {len(data)}\n"
|
||||
).encode("utf-8")
|
||||
file_data = {"filename": b"payload.bin", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertEqual(file_data["data"], expected_pointer)
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
self.assertEqual(lfs_file.read_bytes(), data)
|
||||
|
||||
def test_does_not_convert_unmatched_directory(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["assets/**"])
|
||||
flt = Filter(spec)
|
||||
data = b"outside directory"
|
||||
file_data = {"filename": b"src/images/logo.png", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertEqual(file_data["data"], data)
|
||||
self.assertFalse((self.tmp_path / ".git").exists())
|
||||
|
||||
def test_converts_matched_directory(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["assets/**"])
|
||||
flt = Filter(spec)
|
||||
data = b"inside directory"
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
file_data = {"filename": b"assets/images/logo.png", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertIn(b"version https://git-lfs.github.com/spec/v1", file_data["data"])
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
self.assertEqual(lfs_file.read_bytes(), data)
|
||||
|
||||
def test_does_not_overwrite_existing_blob(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
data = b"abc"
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
lfs_dir = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4]
|
||||
lfs_dir.mkdir(parents=True, exist_ok=True)
|
||||
lfs_file = lfs_dir / sha
|
||||
lfs_file.write_bytes(data)
|
||||
before_mtime = lfs_file.stat().st_mtime_ns
|
||||
time.sleep(0.01) # Ensure timestamp difference
|
||||
|
||||
file_data = {"filename": b"abc.bin", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
expected_pointer_prefix = b"version https://git-lfs.github.com/spec/v1"
|
||||
self.assertTrue(file_data["data"].startswith(expected_pointer_prefix))
|
||||
after_mtime = lfs_file.stat().st_mtime_ns
|
||||
self.assertEqual(after_mtime, before_mtime)
|
||||
|
||||
def test_empty_file_converted_when_matched(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
data = b""
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
file_data = {"filename": b"empty.bin", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertIn(b"size 0", file_data["data"])
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
self.assertEqual(lfs_file.read_bytes(), data)
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Optional: GIVEN-WHEN-THEN for build_filter
|
||||
# --------------------------------------------------------
|
||||
|
||||
def test_build_filter_reads_patterns_file(self):
|
||||
patterns_file = self.tmp_path / "lfs_patterns.txt"
|
||||
patterns_file.write_text("*.bin\nassets/**\n", encoding="utf-8")
|
||||
|
||||
flt = build_filter(str(patterns_file))
|
||||
|
||||
data_match = b"match me"
|
||||
sha_match = hashlib.sha256(data_match).hexdigest()
|
||||
fd_match = {"filename": b"assets/payload.bin", "data": data_match}
|
||||
flt.file_data_filter(fd_match)
|
||||
self.assertIn(b"oid sha256:", fd_match["data"])
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha_match[:2] / sha_match[2:4] / sha_match
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
|
||||
data_skip = b"skip me"
|
||||
fd_skip = {"filename": b"docs/readme.md", "data": data_skip}
|
||||
flt.file_data_filter(fd_skip)
|
||||
self.assertEqual(fd_skip["data"], data_skip)
|
||||
Reference in New Issue
Block a user