mirror of
https://github.com/frej/fast-export.git
synced 2026-02-27 14:50:42 +01:00
Compare commits
14 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c87b66ed7d | ||
|
|
76db75d963 | ||
|
|
5c9068a1f1 | ||
|
|
42d1c89e73 | ||
|
|
9d71921ed8 | ||
|
|
f6b72d248f | ||
|
|
8e1ba281d4 | ||
|
|
d77765a23e | ||
|
|
95459e5599 | ||
|
|
de5c8d9d97 | ||
|
|
ad96531587 | ||
|
|
4af9a33bd6 | ||
|
|
f71385ec14 | ||
|
|
ae21cbf1a2 |
3
.github/requirements-earliest.txt
vendored
3
.github/requirements-earliest.txt
vendored
@@ -1 +1,4 @@
|
|||||||
mercurial==5.2
|
mercurial==5.2
|
||||||
|
|
||||||
|
# Required for git_lfs_importer plugin
|
||||||
|
pathspec==0.11.2
|
||||||
2
.github/requirements-latest.txt
vendored
2
.github/requirements-latest.txt
vendored
@@ -1,2 +1,4 @@
|
|||||||
mercurial
|
mercurial
|
||||||
|
|
||||||
|
# Required for git_lfs_importer plugin
|
||||||
|
pathspec==0.12.1
|
||||||
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
@@ -10,7 +10,7 @@ on:
|
|||||||
jobs:
|
jobs:
|
||||||
test-earliest:
|
test-earliest:
|
||||||
name: Run test suite on the earliest supported Python version
|
name: Run test suite on the earliest supported Python version
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-22.04
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|||||||
76
README.md
76
README.md
@@ -141,12 +141,48 @@ if [ "$3" == "1" ]; then cat; else dos2unix -q; fi
|
|||||||
Mercurial Largefiles Extension
|
Mercurial Largefiles Extension
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
Mercurial largefiles are exported as ordinary files into git, i.e. not
|
### Handling Mercurial Largefiles during Migration
|
||||||
as git lfs files. In order to make the export work, make sure that
|
|
||||||
you have all largefiles of all mercurial commits available locally.
|
When migrating from Mercurial to Git, largefiles are exported as ordinary
|
||||||
This can be ensured by either cloning the mercurial repository with
|
files by default. To ensure a successful migration and manage repository
|
||||||
the option --all-largefiles or by executing the command
|
size, follow the requirements below.
|
||||||
'hg lfpull --rev "all()"' inside the mercurial repository.
|
|
||||||
|
#### 1. Pre-Export: Ensure File Availability
|
||||||
|
|
||||||
|
Before starting the export, you must have all largefiles from all
|
||||||
|
Mercurial commits available locally. Use one of these methods:
|
||||||
|
|
||||||
|
* **For a new clone:** `hg clone --all-largefiles <repo-url>`
|
||||||
|
* **For an existing repo:** `hg lfpull --rev "all()"`
|
||||||
|
|
||||||
|
#### 2. Choosing Your LFS Strategy
|
||||||
|
|
||||||
|
If you want your files to be versioned in Git LFS rather than as standard
|
||||||
|
Git blobs, you have two primary paths:
|
||||||
|
|
||||||
|
* **[git_lfs_importer plugin](./plugins/git_lfs_importer/README.md)
|
||||||
|
(During Conversion)**
|
||||||
|
Recommended for large repos. This performs Just-In-Time (JIT) conversion
|
||||||
|
by identifying large files during the export and writing LFS pointers
|
||||||
|
immediately, skipping the need for a second pass. This also supports
|
||||||
|
**incremental conversion**, making it much more efficient for ongoing
|
||||||
|
migrations.
|
||||||
|
* **[git lfs migrate import](https://github.com/git-lfs/git-lfs/blob/main/docs/man/git-lfs-migrate.adoc)
|
||||||
|
(After Conversion)**
|
||||||
|
A standard two-step process: first, export the full history from Mercurial
|
||||||
|
to Git, then run a separate full history rewrite to move files into LFS.
|
||||||
|
|
||||||
|
### Why use the git_lfs_importer plugin?
|
||||||
|
|
||||||
|
For "monorepos" or very large repositories (100GiB+), the traditional
|
||||||
|
two-step process can take days. By integrating the LFS conversion
|
||||||
|
directly into the history export, the plugin eliminates the massive
|
||||||
|
time overhead of a secondary history rewrite and allows for incremental
|
||||||
|
progress.
|
||||||
|
|
||||||
|
For detailed setup, see the
|
||||||
|
[git_lfs_importer](./plugins/git_lfs_importer/README.md)
|
||||||
|
plugin documentation.
|
||||||
|
|
||||||
Plugins
|
Plugins
|
||||||
-----------------
|
-----------------
|
||||||
@@ -177,9 +213,18 @@ defined filter methods in the [dos2unix](./plugins/dos2unix) and
|
|||||||
[branch_name_in_commit](./plugins/branch_name_in_commit) plugins.
|
[branch_name_in_commit](./plugins/branch_name_in_commit) plugins.
|
||||||
|
|
||||||
```
|
```
|
||||||
commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc, 'revision': revision, 'hg_hash': hg_hash, 'committer': 'committer', 'extra': extra}
|
commit_data = {
|
||||||
|
'author': author,
|
||||||
|
'branch': branch,
|
||||||
|
'committer': 'committer',
|
||||||
|
'desc': desc,
|
||||||
|
'extra': extra,
|
||||||
|
'hg_hash': hg_hash,
|
||||||
|
'parents': parents,
|
||||||
|
'revision': revision,
|
||||||
|
}
|
||||||
|
|
||||||
def commit_message_filter(self,commit_data):
|
def commit_message_filter(self, commit_data):
|
||||||
```
|
```
|
||||||
The `commit_message_filter` method is called for each commit, after parsing
|
The `commit_message_filter` method is called for each commit, after parsing
|
||||||
from hg, but before outputting to git. The dictionary `commit_data` contains the
|
from hg, but before outputting to git. The dictionary `commit_data` contains the
|
||||||
@@ -188,9 +233,14 @@ values in the dictionary after filters have been run are used to create the git
|
|||||||
commit.
|
commit.
|
||||||
|
|
||||||
```
|
```
|
||||||
file_data = {'filename':filename,'file_ctx':file_ctx,'data':file_contents}
|
file_data = {
|
||||||
|
'data': file_contents,
|
||||||
|
'file_ctx': file_ctx,
|
||||||
|
'filename': filename,
|
||||||
|
'is_largefile': largefile_status,
|
||||||
|
}
|
||||||
|
|
||||||
def file_data_filter(self,file_data):
|
def file_data_filter(self, file_data):
|
||||||
```
|
```
|
||||||
The `file_data_filter` method is called for each file within each commit.
|
The `file_data_filter` method is called for each file within each commit.
|
||||||
The dictionary `file_data` contains the above attributes about the file, and
|
The dictionary `file_data` contains the above attributes about the file, and
|
||||||
@@ -203,6 +253,12 @@ but in this case the `data` and `file_ctx` keys map to None. This is
|
|||||||
so that a filter which modifies file names can apply the same name
|
so that a filter which modifies file names can apply the same name
|
||||||
transformations when files are deleted.
|
transformations when files are deleted.
|
||||||
|
|
||||||
|
The `is_largefile` entry within the `file_data` dictionary will contain
|
||||||
|
`True` if the original file was a largefile and has been converted
|
||||||
|
to a normal file before the plugins were invoked. In this case, the `file_ctx`
|
||||||
|
will still point to the filecontext for the original, unconverted file, while
|
||||||
|
`filename` and `data` will contain the already converted information.
|
||||||
|
|
||||||
Submodules
|
Submodules
|
||||||
----------
|
----------
|
||||||
See README-SUBMODULES.md for how to convert subrepositories into git
|
See README-SUBMODULES.md for how to convert subrepositories into git
|
||||||
|
|||||||
@@ -211,15 +211,18 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}):
|
|||||||
b'Ignoring file %s which cannot be tracked by git\n' % filename
|
b'Ignoring file %s which cannot be tracked by git\n' % filename
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
largefile = False
|
||||||
|
file_ctx=ctx.filectx(file)
|
||||||
if is_largefile(filename):
|
if is_largefile(filename):
|
||||||
|
largefile = True
|
||||||
filename = largefile_orig_name(filename)
|
filename = largefile_orig_name(filename)
|
||||||
d = largefile_data(ctx, file, filename)
|
d = largefile_data(ctx, file, filename)
|
||||||
else:
|
else:
|
||||||
file_ctx=ctx.filectx(file)
|
|
||||||
d=file_ctx.data()
|
d=file_ctx.data()
|
||||||
|
|
||||||
if plugins and plugins['file_data_filters']:
|
if plugins and plugins['file_data_filters']:
|
||||||
file_data = {'filename':filename,'file_ctx':file_ctx,'data':d}
|
file_data = {'filename':filename,'file_ctx':file_ctx,'data':d, 'is_largefile':largefile}
|
||||||
for filter in plugins['file_data_filters']:
|
for filter in plugins['file_data_filters']:
|
||||||
filter(file_data)
|
filter(file_data)
|
||||||
d=file_data['data']
|
d=file_data['data']
|
||||||
@@ -281,7 +284,7 @@ def strip_leading_slash(filename):
|
|||||||
|
|
||||||
def export_commit(ui,repo,revision,old_marks,max,count,authors,
|
def export_commit(ui,repo,revision,old_marks,max,count,authors,
|
||||||
branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',
|
branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',
|
||||||
plugins={}):
|
first_commit_hash="",plugins={}):
|
||||||
def get_branchname(name):
|
def get_branchname(name):
|
||||||
if name in brmap:
|
if name in brmap:
|
||||||
return brmap[name]
|
return brmap[name]
|
||||||
@@ -329,6 +332,9 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors,
|
|||||||
|
|
||||||
if not parents:
|
if not parents:
|
||||||
type='full'
|
type='full'
|
||||||
|
if revision == 0 and first_commit_hash:
|
||||||
|
wr(b'from %s' % first_commit_hash.encode())
|
||||||
|
type='simple delta'
|
||||||
else:
|
else:
|
||||||
wr(b'from %s' % revnum_to_revref(parents[0], old_marks))
|
wr(b'from %s' % revnum_to_revref(parents[0], old_marks))
|
||||||
if len(parents) == 1:
|
if len(parents) == 1:
|
||||||
@@ -482,7 +488,9 @@ def branchtip(repo, heads):
|
|||||||
|
|
||||||
def verify_heads(ui,repo,cache,force,ignore_unnamed_heads,branchesmap):
|
def verify_heads(ui,repo,cache,force,ignore_unnamed_heads,branchesmap):
|
||||||
branches={}
|
branches={}
|
||||||
for bn, heads in repo.branchmap().iteritems():
|
|
||||||
|
for bn in repo.branchmap():
|
||||||
|
heads = repo.branchmap().branchheads(bn)
|
||||||
branches[bn] = branchtip(repo, heads)
|
branches[bn] = branchtip(repo, heads)
|
||||||
l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
|
l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
|
||||||
l.sort()
|
l.sort()
|
||||||
@@ -523,7 +531,8 @@ def verify_heads(ui,repo,cache,force,ignore_unnamed_heads,branchesmap):
|
|||||||
|
|
||||||
def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
||||||
authors={},branchesmap={},tagsmap={},
|
authors={},branchesmap={},tagsmap={},
|
||||||
sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,notes=False,encoding='',fn_encoding='',
|
sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,
|
||||||
|
notes=False,encoding='',fn_encoding='',first_commit_hash='',
|
||||||
plugins={}):
|
plugins={}):
|
||||||
def check_cache(filename, contents):
|
def check_cache(filename, contents):
|
||||||
if len(contents) == 0:
|
if len(contents) == 0:
|
||||||
@@ -539,7 +548,7 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
|||||||
if len(state_cache) != 0:
|
if len(state_cache) != 0:
|
||||||
for (name, data) in [(marksfile, old_marks),
|
for (name, data) in [(marksfile, old_marks),
|
||||||
(mappingfile, mapping_cache),
|
(mappingfile, mapping_cache),
|
||||||
(headsfile, state_cache)]:
|
(headsfile, heads_cache)]:
|
||||||
check_cache(name, data)
|
check_cache(name, data)
|
||||||
|
|
||||||
ui,repo=setup_repo(repourl)
|
ui,repo=setup_repo(repourl)
|
||||||
@@ -579,7 +588,7 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
|||||||
brmap={}
|
brmap={}
|
||||||
for rev in range(min,max):
|
for rev in range(min,max):
|
||||||
c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
|
c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
|
||||||
sob,brmap,hgtags,encoding,fn_encoding,
|
sob,brmap,hgtags,encoding,fn_encoding,first_commit_hash,
|
||||||
plugins)
|
plugins)
|
||||||
if notes:
|
if notes:
|
||||||
for rev in range(min,max):
|
for rev in range(min,max):
|
||||||
@@ -653,6 +662,8 @@ if __name__=='__main__':
|
|||||||
help="Add a plugin with the given init string <name=init>")
|
help="Add a plugin with the given init string <name=init>")
|
||||||
parser.add_option("--subrepo-map", type="string", dest="subrepo_map",
|
parser.add_option("--subrepo-map", type="string", dest="subrepo_map",
|
||||||
help="Provide a mapping file between the subrepository name and the submodule name")
|
help="Provide a mapping file between the subrepository name and the submodule name")
|
||||||
|
parser.add_option("--first-commit-hash", type="string", dest="first_commit_hash",
|
||||||
|
help="Allow importing into an existing git repository by specifying the hash of the first commit")
|
||||||
|
|
||||||
(options,args)=parser.parse_args()
|
(options,args)=parser.parse_args()
|
||||||
|
|
||||||
@@ -732,4 +743,5 @@ if __name__=='__main__':
|
|||||||
ignore_unnamed_heads=options.ignore_unnamed_heads,
|
ignore_unnamed_heads=options.ignore_unnamed_heads,
|
||||||
hgtags=options.hgtags,
|
hgtags=options.hgtags,
|
||||||
notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,
|
notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,
|
||||||
|
first_commit_hash=options.first_commit_hash,
|
||||||
plugins=plugins_dict))
|
plugins=plugins_dict))
|
||||||
|
|||||||
@@ -87,6 +87,8 @@ Options:
|
|||||||
with <file-path> <hg-hash> <is-binary> as arguments
|
with <file-path> <hg-hash> <is-binary> as arguments
|
||||||
--plugin <plugin=init> Add a plugin with the given init string (repeatable)
|
--plugin <plugin=init> Add a plugin with the given init string (repeatable)
|
||||||
--plugin-path <plugin-path> Add an additional plugin lookup path
|
--plugin-path <plugin-path> Add an additional plugin lookup path
|
||||||
|
--first-commit-hash <git-commit-hash> Use the given git commit hash as the
|
||||||
|
first commit's parent (for grafting)
|
||||||
"
|
"
|
||||||
case "$1" in
|
case "$1" in
|
||||||
-h|--help)
|
-h|--help)
|
||||||
|
|||||||
12
hg2git.py
12
hg2git.py
@@ -23,13 +23,21 @@ user_clean_re=re.compile(b'^["]([^"]+)["]$')
|
|||||||
|
|
||||||
def set_default_branch(name):
|
def set_default_branch(name):
|
||||||
global cfg_master
|
global cfg_master
|
||||||
cfg_master = name.encode('utf8') if not isinstance(name, bytes) else name
|
cfg_master = name.encode('utf8')
|
||||||
|
|
||||||
def set_origin_name(name):
|
def set_origin_name(name):
|
||||||
global origin_name
|
global origin_name
|
||||||
origin_name = name
|
origin_name = name.encode('utf8')
|
||||||
|
|
||||||
def setup_repo(url):
|
def setup_repo(url):
|
||||||
|
try:
|
||||||
|
# Mercurial >= 7.2 requires explicit initialization for largefile
|
||||||
|
# support to work.
|
||||||
|
from mercurial import initialization
|
||||||
|
initialization.init()
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
myui=ui.ui(interactive=False)
|
myui=ui.ui(interactive=False)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
|
|||||||
218
plugins/git_lfs_importer/README.md
Normal file
218
plugins/git_lfs_importer/README.md
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
# git_lfs_importer Plugin
|
||||||
|
|
||||||
|
This plugin automatically converts matching files to use Git LFS
|
||||||
|
(Large File Storage) during the Mercurial to Git conversion process.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The git_lfs_importer plugin intercepts file data during the hg-fast-export
|
||||||
|
process and converts files matching specified patterns into Git LFS pointers.
|
||||||
|
This allows you to seamlessly migrate a Mercurial repository to Git while
|
||||||
|
simultaneously adopting LFS for large files.
|
||||||
|
|
||||||
|
Why use git_lfs_importer?
|
||||||
|
For large repositories, traditional migration requires two sequential,
|
||||||
|
long-running steps:
|
||||||
|
|
||||||
|
1. Full history conversion from Mercurial to Git.
|
||||||
|
2. Full history rewrite using git lfs import.
|
||||||
|
|
||||||
|
This two-step process can take hours or even days for massive
|
||||||
|
monorepos (e.g., 100GiB+).
|
||||||
|
|
||||||
|
This plugin eliminates the second, time-consuming history rewrite. It performs
|
||||||
|
the LFS conversion incrementally (Just-In-Time). During the initial export, the
|
||||||
|
plugin identifies large files and immediately writes LFS pointers into the Git
|
||||||
|
history. This results in significantly faster conversions and allows for
|
||||||
|
efficient incremental imports of new changesets.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
### Dependencies
|
||||||
|
|
||||||
|
This plugin requires the `pathspec` package:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install pathspec
|
||||||
|
```
|
||||||
|
|
||||||
|
### Git Repository Setup
|
||||||
|
|
||||||
|
The destination Git repository must be pre-initialized with:
|
||||||
|
|
||||||
|
1. A `.gitattributes` file configured for LFS tracking
|
||||||
|
2. Git LFS properly installed and initialized
|
||||||
|
|
||||||
|
Example `.gitattributes`:
|
||||||
|
```
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||||
|
large_files/** filter=lfs diff=lfs merge=lfs -text
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Step 1: Create the Destination Git Repository
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create a new git repository
|
||||||
|
git init my-repo
|
||||||
|
cd my-repo
|
||||||
|
|
||||||
|
# Initialize Git LFS
|
||||||
|
git lfs install
|
||||||
|
|
||||||
|
# Create and commit a .gitattributes file
|
||||||
|
cat > .gitattributes << EOF
|
||||||
|
*.bin binary diff=lfs merge=lfs -text
|
||||||
|
*.iso binary diff=lfs merge=lfs -text
|
||||||
|
EOF
|
||||||
|
git add .gitattributes
|
||||||
|
git commit -m "Initialize Git LFS configuration"
|
||||||
|
|
||||||
|
# Get the commit hash (needed for --first-commit-hash)
|
||||||
|
git rev-parse HEAD
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2: Create an LFS Specification File
|
||||||
|
|
||||||
|
Create a file (e.g., `lfs-spec.txt`) listing the patterns of files to convert
|
||||||
|
to LFS. This uses gitignore-style glob patterns:
|
||||||
|
|
||||||
|
```
|
||||||
|
*.bin
|
||||||
|
*.iso
|
||||||
|
*.tar.gz
|
||||||
|
large_files/**
|
||||||
|
*.mp4
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 3: Run hg-fast-export with the Plugin
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hg-fast-export.sh \
|
||||||
|
-r <mercurial-repo-path> \
|
||||||
|
--plugin git_lfs_importer=lfs-spec.txt \
|
||||||
|
--first-commit-hash <git-commit-hash> \
|
||||||
|
--force
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace `<git-commit-hash>` with the hash obtained from Step 1.
|
||||||
|
|
||||||
|
## How It Works
|
||||||
|
|
||||||
|
1. **Pattern Matching**: Files are matched against patterns in the
|
||||||
|
LFS specification file using gitignore-style matching
|
||||||
|
2. **File Processing**: For each matching file:
|
||||||
|
- Calculates SHA256 hash of the file content
|
||||||
|
- Stores the actual file content in `.git/lfs/objects/<hash-prefix>/<hash>`
|
||||||
|
- Replaces the file data with an LFS pointer containing:
|
||||||
|
- LFS version specification
|
||||||
|
- SHA256 hash of the original content
|
||||||
|
- Original file size
|
||||||
|
3. **Git Fast-Import**: The LFS pointer is committed instead of the actual
|
||||||
|
file content
|
||||||
|
|
||||||
|
## Important Notes
|
||||||
|
|
||||||
|
### First Commit Hash Requirement
|
||||||
|
|
||||||
|
The `--first-commit-hash` option must be provided with the Git commit hash that
|
||||||
|
contains your `.gitattributes` file. This allows the plugin to chain from the
|
||||||
|
existing Git history rather than creating a completely new history.
|
||||||
|
|
||||||
|
### Deletions
|
||||||
|
|
||||||
|
The plugin safely handles file deletions (data=None) and does not process them.
|
||||||
|
|
||||||
|
### Large Files and Largefiles
|
||||||
|
|
||||||
|
If the Mercurial repository uses Mercurial's largefiles extension, those files
|
||||||
|
are already converted to their original content before reaching this plugin,
|
||||||
|
allowing the plugin to apply LFS conversion if they match the patterns.
|
||||||
|
|
||||||
|
## Example Workflow
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Configuration variables
|
||||||
|
HG_REPO=/path/to/mercurial/repo
|
||||||
|
GIT_DIR_NAME=my-project-git
|
||||||
|
LFS_PATTERN_FILE=../lfs-patterns.txt
|
||||||
|
|
||||||
|
# 1. Prepare destination git repo
|
||||||
|
mkdir "$GIT_DIR_NAME"
|
||||||
|
cd "$GIT_DIR_NAME"
|
||||||
|
git init
|
||||||
|
git lfs install
|
||||||
|
|
||||||
|
# Create .gitattributes
|
||||||
|
cat > .gitattributes << EOF
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||||
|
EOF
|
||||||
|
|
||||||
|
git add .gitattributes
|
||||||
|
git commit -m "Add LFS configuration"
|
||||||
|
FIRST_HASH=$(git rev-parse HEAD)
|
||||||
|
|
||||||
|
# 2. Create LFS patterns file
|
||||||
|
cat > "$LFS_PATTERN_FILE" << EOF
|
||||||
|
*.bin
|
||||||
|
*.iso
|
||||||
|
build/artifacts/**
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# 3. Run conversion
|
||||||
|
/path/to/hg-fast-export.sh \
|
||||||
|
-r "$HG_REPO" \
|
||||||
|
--plugin "git_lfs_importer=$LFS_PATTERN_FILE" \
|
||||||
|
--first-commit-hash $FIRST_HASH \
|
||||||
|
--force
|
||||||
|
|
||||||
|
# 4. Verify
|
||||||
|
git log --oneline
|
||||||
|
git lfs ls-files
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### LFS Files Not Tracked
|
||||||
|
Verify that:
|
||||||
|
- The `.gitattributes` file exists in the destination repository
|
||||||
|
- Patterns in `.gitattributes` match the files being converted
|
||||||
|
- `git lfs install` was run in the repository
|
||||||
|
|
||||||
|
### "pathspec" Module Not Found
|
||||||
|
Install the required dependency:
|
||||||
|
```bash
|
||||||
|
pip install pathspec
|
||||||
|
```
|
||||||
|
|
||||||
|
### Conversion Fails at Import
|
||||||
|
Ensure the `--first-commit-hash` value is:
|
||||||
|
- A valid commit hash in the destination repository
|
||||||
|
- From a commit that exists before the conversion starts
|
||||||
|
- The hash of the commit containing `.gitattributes`
|
||||||
|
|
||||||
|
|
||||||
|
### Force Requirement
|
||||||
|
|
||||||
|
You only need to pass the `--force` option when converting the *first*
|
||||||
|
Mercurial commit into a non-empty Git repository. By default, `hg-fast-export`
|
||||||
|
prevents importing Mercurial commits onto a non-empty Git repo to avoid
|
||||||
|
creating conflicting histories. Passing `--force` overrides that safety check
|
||||||
|
and allows the exporter to write the LFS pointer objects and integrate the
|
||||||
|
converted data with the existing Git history.
|
||||||
|
|
||||||
|
If you are doing an incremental conversion (i.e., running the script a second
|
||||||
|
time to import new changesets into an already converted repository),
|
||||||
|
the --force flag is not required.
|
||||||
|
|
||||||
|
Omitting `--force` when attempting to import the first Mercurial commit into a
|
||||||
|
non-empty repository will cause the importer to refuse the operation.
|
||||||
|
|
||||||
|
## See Also
|
||||||
|
|
||||||
|
- [Git LFS Documentation](https://git-lfs.github.com/)
|
||||||
|
- [gitignore Pattern Format](https://git-scm.com/docs/gitignore)
|
||||||
|
- [hg-fast-export Documentation](../README.md)
|
||||||
49
plugins/git_lfs_importer/__init__.py
Normal file
49
plugins/git_lfs_importer/__init__.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
import pathlib
|
||||||
|
import hashlib
|
||||||
|
import pathspec
|
||||||
|
|
||||||
|
|
||||||
|
def build_filter(args):
|
||||||
|
with open(args) as f:
|
||||||
|
lfs_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, f)
|
||||||
|
return Filter(lfs_spec)
|
||||||
|
|
||||||
|
|
||||||
|
class Filter:
|
||||||
|
def __init__(self, lfs_spec):
|
||||||
|
self.lfs_spec = lfs_spec
|
||||||
|
|
||||||
|
def file_data_filter(self, file_data):
|
||||||
|
"""
|
||||||
|
file_data: {
|
||||||
|
'filename': <str>,
|
||||||
|
'file_ctx': <mercurial.filectx or None>,
|
||||||
|
'data': <bytes or None>,
|
||||||
|
'is_largefile': <bool>
|
||||||
|
}
|
||||||
|
|
||||||
|
May be called for deletions (data=None, file_ctx=None).
|
||||||
|
"""
|
||||||
|
filename = file_data.get('filename')
|
||||||
|
data = file_data.get('data')
|
||||||
|
|
||||||
|
# Skip deletions or filtered files early
|
||||||
|
if data is None or not self.lfs_spec.match_file(filename.decode("utf-8")):
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get the file path
|
||||||
|
sha256hash = hashlib.sha256(data).hexdigest()
|
||||||
|
lfs_path = pathlib.Path(f".git/lfs/objects/{sha256hash[0:2]}/{sha256hash[2:4]}")
|
||||||
|
lfs_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
lfs_file_path = lfs_path / sha256hash
|
||||||
|
|
||||||
|
# The binary blob is already in LFS
|
||||||
|
if not lfs_file_path.is_file():
|
||||||
|
(lfs_path / sha256hash).write_bytes(data)
|
||||||
|
|
||||||
|
# Write the LFS pointer
|
||||||
|
file_data['data'] = (
|
||||||
|
f"version https://git-lfs.github.com/spec/v1\n"
|
||||||
|
f"oid sha256:{sha256hash}\n"
|
||||||
|
f"size {len(data)}\n"
|
||||||
|
).encode("utf-8")
|
||||||
117
t/first_commit_hash_option.t
Executable file
117
t/first_commit_hash_option.t
Executable file
@@ -0,0 +1,117 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (c) 2025
|
||||||
|
#
|
||||||
|
|
||||||
|
test_description='git_lfs_importer plugin integration tests'
|
||||||
|
|
||||||
|
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
cat > "$HOME"/.hgrc <<-EOF
|
||||||
|
[ui]
|
||||||
|
username = Test User <test@example.com>
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Git config for the destination repo commits
|
||||||
|
git config --global user.email "test@example.com"
|
||||||
|
git config --global user.name "Test User"
|
||||||
|
}
|
||||||
|
|
||||||
|
setup
|
||||||
|
|
||||||
|
test_expect_success 'Mercurial history is imported over the provided commit' '
|
||||||
|
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||||
|
|
||||||
|
# 1. Create source Mercurial repository with binary files
|
||||||
|
(
|
||||||
|
hg init hgrepo &&
|
||||||
|
cd hgrepo &&
|
||||||
|
echo "regular text file" > readme.txt &&
|
||||||
|
hg add readme.txt &&
|
||||||
|
hg commit -m "initial commit"
|
||||||
|
) &&
|
||||||
|
|
||||||
|
# 2. Prepare destination git repo with LFS setup
|
||||||
|
mkdir gitrepo &&
|
||||||
|
(
|
||||||
|
cd gitrepo &&
|
||||||
|
git init -q &&
|
||||||
|
git config core.ignoreCase false &&
|
||||||
|
git lfs install --local &&
|
||||||
|
git switch --create master &&
|
||||||
|
|
||||||
|
cat > .gitattributes <<-EOF &&
|
||||||
|
* -text
|
||||||
|
EOF
|
||||||
|
|
||||||
|
git add .gitattributes &&
|
||||||
|
git commit -q -m "Initialize Git configuration"
|
||||||
|
) &&
|
||||||
|
|
||||||
|
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||||
|
|
||||||
|
# 3. Run hg-fast-export
|
||||||
|
(
|
||||||
|
cd gitrepo &&
|
||||||
|
hg-fast-export.sh \
|
||||||
|
-r "../hgrepo" \
|
||||||
|
--first-commit-hash "$FIRST_HASH" --force \
|
||||||
|
-M master
|
||||||
|
) &&
|
||||||
|
|
||||||
|
# 4. Verify git file is still present
|
||||||
|
git -C gitrepo show HEAD:.gitattributes > gitattributes_check.txt &&
|
||||||
|
test "$(cat gitattributes_check.txt)" = "* -text" &&
|
||||||
|
|
||||||
|
# 5. Verify hg file is imported
|
||||||
|
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||||
|
test "$(cat readme_check.txt)" = "regular text file"
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'Mercurial history has priority over git' '
|
||||||
|
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||||
|
|
||||||
|
# 1. Create source Mercurial repository with binary files
|
||||||
|
(
|
||||||
|
hg init hgrepo &&
|
||||||
|
cd hgrepo &&
|
||||||
|
echo "hg readme file" > readme.txt &&
|
||||||
|
hg add readme.txt &&
|
||||||
|
hg commit -m "initial commit"
|
||||||
|
) &&
|
||||||
|
|
||||||
|
# 2. Prepare destination git repo with LFS setup
|
||||||
|
mkdir gitrepo &&
|
||||||
|
(
|
||||||
|
cd gitrepo &&
|
||||||
|
git init -q &&
|
||||||
|
git config core.ignoreCase false &&
|
||||||
|
git lfs install --local &&
|
||||||
|
git switch --create master &&
|
||||||
|
|
||||||
|
cat > readme.txt <<-EOF &&
|
||||||
|
git readme file
|
||||||
|
EOF
|
||||||
|
|
||||||
|
git add readme.txt &&
|
||||||
|
git commit -q -m "Initialize Git readme file"
|
||||||
|
) &&
|
||||||
|
|
||||||
|
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||||
|
|
||||||
|
# 3. Run hg-fast-export
|
||||||
|
(
|
||||||
|
cd gitrepo &&
|
||||||
|
hg-fast-export.sh \
|
||||||
|
-r "../hgrepo" \
|
||||||
|
--first-commit-hash "$FIRST_HASH" --force \
|
||||||
|
-M master
|
||||||
|
) &&
|
||||||
|
|
||||||
|
# 5. Verify hg file is imported
|
||||||
|
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||||
|
test "$(cat readme_check.txt)" = "hg readme file"
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
||||||
189
t/git_lfs_importer_plugin.t
Executable file
189
t/git_lfs_importer_plugin.t
Executable file
@@ -0,0 +1,189 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (c) 2025
|
||||||
|
#
|
||||||
|
|
||||||
|
test_description='git_lfs_importer plugin integration tests'
|
||||||
|
|
||||||
|
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
cat > "$HOME"/.hgrc <<-EOF
|
||||||
|
[ui]
|
||||||
|
username = Test User <test@example.com>
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Git config for the destination repo commits
|
||||||
|
git config --global user.email "test@example.com"
|
||||||
|
git config --global user.name "Test User"
|
||||||
|
}
|
||||||
|
|
||||||
|
setup
|
||||||
|
|
||||||
|
test_expect_success 'git_lfs_importer converts matched binary files to LFS pointers and pointers are properly smudged when checkouting' '
|
||||||
|
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||||
|
|
||||||
|
# 1. Create source Mercurial repository with binary files
|
||||||
|
(
|
||||||
|
hg init hgrepo &&
|
||||||
|
cd hgrepo &&
|
||||||
|
echo "regular text file" > readme.txt &&
|
||||||
|
echo "binary payload" > payload.bin &&
|
||||||
|
hg add readme.txt payload.bin &&
|
||||||
|
hg commit -m "initial commit with binary"
|
||||||
|
) &&
|
||||||
|
|
||||||
|
# 2. Prepare destination git repo with LFS setup
|
||||||
|
mkdir gitrepo &&
|
||||||
|
(
|
||||||
|
cd gitrepo &&
|
||||||
|
git init -q &&
|
||||||
|
git config core.ignoreCase false &&
|
||||||
|
git lfs install --local &&
|
||||||
|
|
||||||
|
cat > .gitattributes <<-EOF &&
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
EOF
|
||||||
|
|
||||||
|
git add .gitattributes &&
|
||||||
|
git commit -q -m "Initialize Git LFS configuration"
|
||||||
|
) &&
|
||||||
|
|
||||||
|
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||||
|
|
||||||
|
# 3. Create LFS patterns file
|
||||||
|
cat > lfs-patterns.txt <<-EOF &&
|
||||||
|
*.bin
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# 4. Run hg-fast-export with git_lfs_importer plugin
|
||||||
|
(
|
||||||
|
cd gitrepo &&
|
||||||
|
hg-fast-export.sh \
|
||||||
|
-r "../hgrepo" \
|
||||||
|
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||||
|
--first-commit-hash "$FIRST_HASH" --force
|
||||||
|
) &&
|
||||||
|
|
||||||
|
# 5. Verify conversion: payload.bin should be an LFS pointer
|
||||||
|
git -C gitrepo show HEAD:payload.bin > lfs_pointer.txt &&
|
||||||
|
grep -q "version https://git-lfs.github.com/spec/v1" lfs_pointer.txt &&
|
||||||
|
grep -q "oid sha256:" lfs_pointer.txt &&
|
||||||
|
grep -q "size" lfs_pointer.txt &&
|
||||||
|
|
||||||
|
# 6. Verify non-matched file is unchanged
|
||||||
|
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||||
|
test "$(cat readme_check.txt)" = "regular text file" &&
|
||||||
|
|
||||||
|
# 7. Make sure the LFS pointer file is unsmeared when checked out
|
||||||
|
git -C gitrepo reset --hard HEAD &&
|
||||||
|
ls gitrepo &&
|
||||||
|
test "$(cat gitrepo/payload.bin)" = "binary payload"
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'git_lfs_importer skips files not matching patterns' '
|
||||||
|
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||||
|
|
||||||
|
# 1. Create source with various files
|
||||||
|
(
|
||||||
|
hg init hgrepo &&
|
||||||
|
cd hgrepo &&
|
||||||
|
echo "text" > file.txt &&
|
||||||
|
echo "data" > file.dat &&
|
||||||
|
echo "iso content" > image.iso &&
|
||||||
|
hg add . &&
|
||||||
|
hg commit -m "multiple files"
|
||||||
|
) &&
|
||||||
|
|
||||||
|
# 2. Prepare git repo with LFS
|
||||||
|
mkdir gitrepo &&
|
||||||
|
(
|
||||||
|
cd gitrepo &&
|
||||||
|
git init -q &&
|
||||||
|
git config core.ignoreCase false &&
|
||||||
|
git lfs install --local &&
|
||||||
|
|
||||||
|
cat > .gitattributes <<-EOF &&
|
||||||
|
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||||
|
EOF
|
||||||
|
|
||||||
|
git add .gitattributes &&
|
||||||
|
git commit -q -m "Initialize Git LFS configuration"
|
||||||
|
) &&
|
||||||
|
|
||||||
|
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||||
|
|
||||||
|
# 3. Only .iso files should be converted
|
||||||
|
cat > lfs-patterns.txt <<-EOF &&
|
||||||
|
*.iso
|
||||||
|
EOF
|
||||||
|
|
||||||
|
(
|
||||||
|
cd gitrepo &&
|
||||||
|
hg-fast-export.sh \
|
||||||
|
-r "../hgrepo" \
|
||||||
|
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||||
|
--first-commit-hash "$FIRST_HASH" --force
|
||||||
|
) &&
|
||||||
|
|
||||||
|
# 4. Verify .iso is LFS pointer
|
||||||
|
git -C gitrepo show HEAD:image.iso | grep -q "oid sha256:" &&
|
||||||
|
|
||||||
|
# 5. Verify .txt and .dat are unchanged
|
||||||
|
test "$(git -C gitrepo show HEAD:file.txt)" = "text" &&
|
||||||
|
test "$(git -C gitrepo show HEAD:file.dat)" = "data"
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'git_lfs_importer handles directory patterns' '
|
||||||
|
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||||
|
|
||||||
|
# 1. Create repo with files in directory
|
||||||
|
(
|
||||||
|
hg init hgrepo &&
|
||||||
|
cd hgrepo &&
|
||||||
|
mkdir -p assets/images &&
|
||||||
|
echo "logo data" > assets/images/logo.bin &&
|
||||||
|
echo "regular" > readme.txt &&
|
||||||
|
hg add . &&
|
||||||
|
hg commit -m "files in directories"
|
||||||
|
) &&
|
||||||
|
|
||||||
|
# 2. Prepare git repo
|
||||||
|
mkdir gitrepo &&
|
||||||
|
(
|
||||||
|
cd gitrepo &&
|
||||||
|
git init -q &&
|
||||||
|
git config core.ignoreCase false &&
|
||||||
|
git lfs install --local &&
|
||||||
|
|
||||||
|
cat > .gitattributes <<-EOF &&
|
||||||
|
assets/** filter=lfs diff=lfs merge=lfs -text
|
||||||
|
EOF
|
||||||
|
|
||||||
|
git add .gitattributes &&
|
||||||
|
git commit -q -m "Initialize Git LFS configuration"
|
||||||
|
) &&
|
||||||
|
|
||||||
|
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||||
|
|
||||||
|
# 3. Match directory pattern
|
||||||
|
cat > lfs-patterns.txt <<-EOF &&
|
||||||
|
assets/**
|
||||||
|
EOF
|
||||||
|
|
||||||
|
(
|
||||||
|
cd gitrepo &&
|
||||||
|
hg-fast-export.sh \
|
||||||
|
-r "../hgrepo" \
|
||||||
|
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||||
|
--first-commit-hash "$FIRST_HASH" --force
|
||||||
|
) &&
|
||||||
|
|
||||||
|
# 4. Verify directory file is converted
|
||||||
|
git -C gitrepo show HEAD:assets/images/logo.bin | grep -q "oid sha256:" &&
|
||||||
|
|
||||||
|
# 5. Verify file outside directory is unchanged
|
||||||
|
test "$(git -C gitrepo show HEAD:readme.txt)" = "regular"
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
||||||
20
t/largefile_plugin.expected
Normal file
20
t/largefile_plugin.expected
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
blob
|
||||||
|
mark :1
|
||||||
|
data 7
|
||||||
|
a_file
|
||||||
|
|
||||||
|
blob
|
||||||
|
mark :2
|
||||||
|
data 6
|
||||||
|
large
|
||||||
|
|
||||||
|
reset refs/heads/master
|
||||||
|
commit refs/heads/master
|
||||||
|
mark :3
|
||||||
|
author Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||||
|
committer Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||||
|
data 3
|
||||||
|
r0
|
||||||
|
M 100644 :1 a.txt
|
||||||
|
M 100644 :2 b.txt
|
||||||
|
|
||||||
69
t/largefile_plugin.t
Executable file
69
t/largefile_plugin.t
Executable file
@@ -0,0 +1,69 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (c) 2023 Felipe Contreras
|
||||||
|
# Copyright (c) 2023 Frej Drejhammar
|
||||||
|
# Copyright (c) 2025 Günther Nußmüller
|
||||||
|
#
|
||||||
|
# Check that plugin invocation works with largefiles.
|
||||||
|
# This test uses the echo_file_data_test_plugin to verify that the
|
||||||
|
# file data is passed correctly, including the largefile status.
|
||||||
|
#
|
||||||
|
|
||||||
|
test_description='Largefiles and plugin test'
|
||||||
|
|
||||||
|
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||||
|
|
||||||
|
|
||||||
|
git_create() {
|
||||||
|
git init -q "$1" &&
|
||||||
|
git -C "$1" config core.ignoreCase false
|
||||||
|
}
|
||||||
|
|
||||||
|
git_convert() {
|
||||||
|
(
|
||||||
|
cd "$2" &&
|
||||||
|
hg-fast-export.sh --repo "../$1" \
|
||||||
|
-s --hgtags -n \
|
||||||
|
--plugin ../../plugins/echo_file_data_test_plugin
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
cat > "$HOME"/.hgrc <<-EOF
|
||||||
|
[ui]
|
||||||
|
username = Grevious Bodily Harmsworth <gbh@example.com>
|
||||||
|
[extensions]
|
||||||
|
largefiles =
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
commit0() {
|
||||||
|
(
|
||||||
|
cd hgrepo &&
|
||||||
|
echo "a_file" > a.txt &&
|
||||||
|
echo "large" > b.txt
|
||||||
|
hg add a.txt &&
|
||||||
|
hg add --large b.txt &&
|
||||||
|
hg commit -d "2023-03-17 01:00Z" -m "r0"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
setup
|
||||||
|
|
||||||
|
test_expect_success 'largefile and plugin' '
|
||||||
|
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||||
|
|
||||||
|
(
|
||||||
|
hg init hgrepo &&
|
||||||
|
commit0
|
||||||
|
) &&
|
||||||
|
git_create gitrepo &&
|
||||||
|
git_convert hgrepo gitrepo &&
|
||||||
|
|
||||||
|
git -C gitrepo fast-export --all > actual &&
|
||||||
|
|
||||||
|
test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin.expected actual &&
|
||||||
|
test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin_file_info.expected gitrepo/largefile_info.txt
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
||||||
12
t/largefile_plugin_file_info.expected
Normal file
12
t/largefile_plugin_file_info.expected
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
filename: b'b.txt'
|
||||||
|
data size: 6 bytes
|
||||||
|
ctx rev: 0
|
||||||
|
ctx binary: False
|
||||||
|
is largefile: True
|
||||||
|
|
||||||
|
filename: b'a.txt'
|
||||||
|
data size: 7 bytes
|
||||||
|
ctx rev: 0
|
||||||
|
ctx binary: False
|
||||||
|
is largefile: False
|
||||||
|
|
||||||
18
t/plugins/echo_file_data_test_plugin/__init__.py
Normal file
18
t/plugins/echo_file_data_test_plugin/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
import sys
|
||||||
|
from mercurial import node
|
||||||
|
|
||||||
|
def build_filter(args):
|
||||||
|
return Filter(args)
|
||||||
|
|
||||||
|
class Filter:
|
||||||
|
def __init__(self, _):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def file_data_filter(self,file_data):
|
||||||
|
with open('largefile_info.txt', 'a') as f:
|
||||||
|
f.write(f"filename: {file_data['filename']}\n")
|
||||||
|
f.write(f"data size: {len(file_data['data'])} bytes\n")
|
||||||
|
f.write(f"ctx rev: {file_data['file_ctx'].rev()}\n")
|
||||||
|
f.write(f"ctx binary: {file_data['file_ctx'].isbinary()}\n")
|
||||||
|
f.write(f"is largefile: {file_data.get('is_largefile', False)}\n")
|
||||||
|
f.write("\n")
|
||||||
42
t/set_origin.expected
Normal file
42
t/set_origin.expected
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
blob
|
||||||
|
mark :1
|
||||||
|
data 5
|
||||||
|
zero
|
||||||
|
|
||||||
|
reset refs/heads/prefix/master
|
||||||
|
commit refs/heads/prefix/master
|
||||||
|
mark :2
|
||||||
|
author H G Wells <wells@example.com> 1679014800 +0000
|
||||||
|
committer H G Wells <wells@example.com> 1679014800 +0000
|
||||||
|
data 5
|
||||||
|
zero
|
||||||
|
M 100644 :1 content
|
||||||
|
|
||||||
|
blob
|
||||||
|
mark :3
|
||||||
|
data 8
|
||||||
|
branch1
|
||||||
|
|
||||||
|
commit refs/heads/prefix/branch1
|
||||||
|
mark :4
|
||||||
|
author H G Wells <wells@example.com> 1679018400 +0000
|
||||||
|
committer H G Wells <wells@example.com> 1679018400 +0000
|
||||||
|
data 29
|
||||||
|
Added file in branch branch1
|
||||||
|
from :2
|
||||||
|
M 100644 :3 b8486c4feca589a4237a1ee428322d7109ede12e
|
||||||
|
|
||||||
|
blob
|
||||||
|
mark :5
|
||||||
|
data 8
|
||||||
|
branch2
|
||||||
|
|
||||||
|
commit refs/heads/prefix/branch2
|
||||||
|
mark :6
|
||||||
|
author H G Wells <wells@example.com> 1679022000 +0000
|
||||||
|
committer H G Wells <wells@example.com> 1679022000 +0000
|
||||||
|
data 29
|
||||||
|
Added file in branch branch2
|
||||||
|
from :4
|
||||||
|
M 100644 :5 fe786baee0d76603092c25609f2967b9c28a2cf2
|
||||||
|
|
||||||
59
t/set_origin.t
Executable file
59
t/set_origin.t
Executable file
@@ -0,0 +1,59 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (c) 2023 Felipe Contreras
|
||||||
|
# Copyright (c) 2025 Günther Nußmüller
|
||||||
|
#
|
||||||
|
|
||||||
|
test_description='Set origin tests'
|
||||||
|
|
||||||
|
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||||
|
|
||||||
|
check() {
|
||||||
|
git -C "$1" fast-export --all > actual
|
||||||
|
test_cmp "$SHARNESS_TEST_DIRECTORY"/set_origin.expected actual
|
||||||
|
}
|
||||||
|
|
||||||
|
git_clone() {
|
||||||
|
(
|
||||||
|
git init -q "$2" &&
|
||||||
|
cd "$2" &&
|
||||||
|
git config core.ignoreCase false &&
|
||||||
|
hg-fast-export.sh --repo "../$1" --origin "$3"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
cat > "$HOME"/.hgrc <<-EOF
|
||||||
|
[ui]
|
||||||
|
username = H G Wells <wells@example.com>
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
make-branch() {
|
||||||
|
hg branch "$1"
|
||||||
|
FILE=$(echo "$1" | sha1sum | cut -d " " -f 1)
|
||||||
|
echo "$1" > $FILE
|
||||||
|
hg add $FILE
|
||||||
|
hg commit -d "2023-03-17 $2:00Z" -m "Added file in branch $1"
|
||||||
|
}
|
||||||
|
|
||||||
|
setup
|
||||||
|
|
||||||
|
test_expect_success 'basic' '
|
||||||
|
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||||
|
|
||||||
|
(
|
||||||
|
hg init hgrepo &&
|
||||||
|
cd hgrepo &&
|
||||||
|
echo zero > content &&
|
||||||
|
hg add content &&
|
||||||
|
hg commit -m zero -d "2023-03-17 01:00Z" &&
|
||||||
|
make-branch branch1 02 &&
|
||||||
|
make-branch branch2 03
|
||||||
|
) &&
|
||||||
|
|
||||||
|
git_clone hgrepo gitrepo prefix &&
|
||||||
|
check gitrepo
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
||||||
156
tests/test_git_lfs_importer_plugin.py
Normal file
156
tests/test_git_lfs_importer_plugin.py
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append("./plugins")
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import pathlib
|
||||||
|
import time
|
||||||
|
import unittest
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
import pathspec
|
||||||
|
|
||||||
|
from git_lfs_importer import Filter, build_filter
|
||||||
|
|
||||||
|
|
||||||
|
class TestGitLfsImporterPlugin(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
# create an isolated temp dir and chdir into it for each test
|
||||||
|
self._orig_cwd = os.getcwd()
|
||||||
|
self._tmpdir = tempfile.TemporaryDirectory()
|
||||||
|
self.tmp_path = pathlib.Path(self._tmpdir.name)
|
||||||
|
os.chdir(self.tmp_path)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
# restore cwd and cleanup
|
||||||
|
os.chdir(self._orig_cwd)
|
||||||
|
self._tmpdir.cleanup()
|
||||||
|
|
||||||
|
def empty_spec(self):
|
||||||
|
return pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, [])
|
||||||
|
|
||||||
|
# --------------------------------------------------------
|
||||||
|
# GIVEN-WHEN-THEN TESTS for Filter.file_data_filter
|
||||||
|
# --------------------------------------------------------
|
||||||
|
|
||||||
|
def test_skips_deletions(self):
|
||||||
|
flt = Filter(self.empty_spec())
|
||||||
|
file_data = {"filename": b"file.txt", "data": None}
|
||||||
|
|
||||||
|
flt.file_data_filter(file_data)
|
||||||
|
|
||||||
|
self.assertIsNone(file_data["data"])
|
||||||
|
self.assertFalse((self.tmp_path / ".git").exists())
|
||||||
|
|
||||||
|
def test_skips_files_that_do_not_match_spec(self):
|
||||||
|
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||||
|
flt = Filter(spec)
|
||||||
|
original = b"not matched"
|
||||||
|
file_data = {"filename": b"file.txt", "data": original}
|
||||||
|
|
||||||
|
flt.file_data_filter(file_data)
|
||||||
|
|
||||||
|
self.assertEqual(file_data["data"], original)
|
||||||
|
self.assertFalse((self.tmp_path / ".git").exists())
|
||||||
|
|
||||||
|
def test_converts_only_matched_files_to_lfs_pointer(self):
|
||||||
|
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||||
|
flt = Filter(spec)
|
||||||
|
data = b"hello world"
|
||||||
|
sha = hashlib.sha256(data).hexdigest()
|
||||||
|
expected_pointer = (
|
||||||
|
f"version https://git-lfs.github.com/spec/v1\n"
|
||||||
|
f"oid sha256:{sha}\n"
|
||||||
|
f"size {len(data)}\n"
|
||||||
|
).encode("utf-8")
|
||||||
|
file_data = {"filename": b"payload.bin", "data": data}
|
||||||
|
|
||||||
|
flt.file_data_filter(file_data)
|
||||||
|
|
||||||
|
self.assertEqual(file_data["data"], expected_pointer)
|
||||||
|
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||||
|
self.assertTrue(lfs_file.is_file())
|
||||||
|
self.assertEqual(lfs_file.read_bytes(), data)
|
||||||
|
|
||||||
|
def test_does_not_convert_unmatched_directory(self):
|
||||||
|
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["assets/**"])
|
||||||
|
flt = Filter(spec)
|
||||||
|
data = b"outside directory"
|
||||||
|
file_data = {"filename": b"src/images/logo.png", "data": data}
|
||||||
|
|
||||||
|
flt.file_data_filter(file_data)
|
||||||
|
|
||||||
|
self.assertEqual(file_data["data"], data)
|
||||||
|
self.assertFalse((self.tmp_path / ".git").exists())
|
||||||
|
|
||||||
|
def test_converts_matched_directory(self):
|
||||||
|
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["assets/**"])
|
||||||
|
flt = Filter(spec)
|
||||||
|
data = b"inside directory"
|
||||||
|
sha = hashlib.sha256(data).hexdigest()
|
||||||
|
file_data = {"filename": b"assets/images/logo.png", "data": data}
|
||||||
|
|
||||||
|
flt.file_data_filter(file_data)
|
||||||
|
|
||||||
|
self.assertIn(b"version https://git-lfs.github.com/spec/v1", file_data["data"])
|
||||||
|
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||||
|
self.assertTrue(lfs_file.is_file())
|
||||||
|
self.assertEqual(lfs_file.read_bytes(), data)
|
||||||
|
|
||||||
|
def test_does_not_overwrite_existing_blob(self):
|
||||||
|
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||||
|
flt = Filter(spec)
|
||||||
|
data = b"abc"
|
||||||
|
sha = hashlib.sha256(data).hexdigest()
|
||||||
|
lfs_dir = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4]
|
||||||
|
lfs_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
lfs_file = lfs_dir / sha
|
||||||
|
lfs_file.write_bytes(data)
|
||||||
|
before_mtime = lfs_file.stat().st_mtime_ns
|
||||||
|
time.sleep(0.01) # Ensure timestamp difference
|
||||||
|
|
||||||
|
file_data = {"filename": b"abc.bin", "data": data}
|
||||||
|
|
||||||
|
flt.file_data_filter(file_data)
|
||||||
|
|
||||||
|
expected_pointer_prefix = b"version https://git-lfs.github.com/spec/v1"
|
||||||
|
self.assertTrue(file_data["data"].startswith(expected_pointer_prefix))
|
||||||
|
after_mtime = lfs_file.stat().st_mtime_ns
|
||||||
|
self.assertEqual(after_mtime, before_mtime)
|
||||||
|
|
||||||
|
def test_empty_file_converted_when_matched(self):
|
||||||
|
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||||
|
flt = Filter(spec)
|
||||||
|
data = b""
|
||||||
|
sha = hashlib.sha256(data).hexdigest()
|
||||||
|
file_data = {"filename": b"empty.bin", "data": data}
|
||||||
|
|
||||||
|
flt.file_data_filter(file_data)
|
||||||
|
|
||||||
|
self.assertIn(b"size 0", file_data["data"])
|
||||||
|
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||||
|
self.assertTrue(lfs_file.is_file())
|
||||||
|
self.assertEqual(lfs_file.read_bytes(), data)
|
||||||
|
|
||||||
|
# --------------------------------------------------------
|
||||||
|
# Optional: GIVEN-WHEN-THEN for build_filter
|
||||||
|
# --------------------------------------------------------
|
||||||
|
|
||||||
|
def test_build_filter_reads_patterns_file(self):
|
||||||
|
patterns_file = self.tmp_path / "lfs_patterns.txt"
|
||||||
|
patterns_file.write_text("*.bin\nassets/**\n", encoding="utf-8")
|
||||||
|
|
||||||
|
flt = build_filter(str(patterns_file))
|
||||||
|
|
||||||
|
data_match = b"match me"
|
||||||
|
sha_match = hashlib.sha256(data_match).hexdigest()
|
||||||
|
fd_match = {"filename": b"assets/payload.bin", "data": data_match}
|
||||||
|
flt.file_data_filter(fd_match)
|
||||||
|
self.assertIn(b"oid sha256:", fd_match["data"])
|
||||||
|
lfs_file = pathlib.Path(".git/lfs/objects") / sha_match[:2] / sha_match[2:4] / sha_match
|
||||||
|
self.assertTrue(lfs_file.is_file())
|
||||||
|
|
||||||
|
data_skip = b"skip me"
|
||||||
|
fd_skip = {"filename": b"docs/readme.md", "data": data_skip}
|
||||||
|
flt.file_data_filter(fd_skip)
|
||||||
|
self.assertEqual(fd_skip["data"], data_skip)
|
||||||
Reference in New Issue
Block a user