mirror of
https://github.com/frej/fast-export.git
synced 2026-02-26 06:20:42 +01:00
Compare commits
148 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c87b66ed7d | ||
|
|
76db75d963 | ||
|
|
5c9068a1f1 | ||
|
|
42d1c89e73 | ||
|
|
9d71921ed8 | ||
|
|
f6b72d248f | ||
|
|
8e1ba281d4 | ||
|
|
d77765a23e | ||
|
|
95459e5599 | ||
|
|
de5c8d9d97 | ||
|
|
ad96531587 | ||
|
|
4af9a33bd6 | ||
|
|
f71385ec14 | ||
|
|
ae21cbf1a2 | ||
|
|
8762fee403 | ||
|
|
bd707b5d6e | ||
|
|
0afd336d6f | ||
|
|
dd1c8f219b | ||
|
|
f947189dcc | ||
|
|
2a3806576c | ||
|
|
08e2297853 | ||
|
|
893d6302b7 | ||
|
|
3de7bcfc18 | ||
|
|
d72e96b202 | ||
|
|
fb225c4700 | ||
|
|
997e8e1a8c | ||
|
|
ddb574004f | ||
|
|
e63feee1b9 | ||
|
|
7b4bb7ff1d | ||
|
|
53bbe05278 | ||
|
|
ddfc3a8300 | ||
|
|
21ab3f347b | ||
|
|
878ba44f48 | ||
|
|
2476d08517 | ||
|
|
d4298a0906 | ||
|
|
efe934e16b | ||
|
|
59675eca22 | ||
|
|
3c694243c4 | ||
|
|
1bbf7028b4 | ||
|
|
c8fa290adf | ||
|
|
c49dd0cf60 | ||
|
|
4f94d61d84 | ||
|
|
a3d0562737 | ||
|
|
0d0e90d328 | ||
|
|
64ee34dfb0 | ||
|
|
71834a584c | ||
|
|
4310e47760 | ||
|
|
278cc9966c | ||
|
|
cf66c36a32 | ||
|
|
269c23c5bb | ||
|
|
90c6ad5f87 | ||
|
|
51db3b4236 | ||
|
|
fba03b95fb | ||
|
|
2cc7db7556 | ||
|
|
a89033b5b1 | ||
|
|
fd5bd48a6c | ||
|
|
84a877d112 | ||
|
|
3f57c4340a | ||
|
|
1e872eb235 | ||
|
|
ecdbf0e42e | ||
|
|
9754a9f3f6 | ||
|
|
d2f11bd619 | ||
|
|
3582221efd | ||
|
|
0ae0d20496 | ||
|
|
e09a14a266 | ||
|
|
9df2f97f6c | ||
|
|
531fa9b3a2 | ||
|
|
a229b39d66 | ||
|
|
c666fd9c95 | ||
|
|
21fa443b4a | ||
|
|
fd6ba361c6 | ||
|
|
153ba2a5c1 | ||
|
|
df5278f755 | ||
|
|
6fbe4d0ad0 | ||
|
|
fa73d8dec9 | ||
|
|
e1e15b2091 | ||
|
|
534d2bdd92 | ||
|
|
23f41c0ff1 | ||
|
|
8b1fd408ca | ||
|
|
4a4d242e98 | ||
|
|
432254100b | ||
|
|
5e4bc6eb03 | ||
|
|
7886016978 | ||
|
|
18577f559d | ||
|
|
88defe7fd1 | ||
|
|
4edea927fb | ||
|
|
bbab981130 | ||
|
|
c3cbf1e04d | ||
|
|
4c10270302 | ||
|
|
723d8032ba | ||
|
|
268299a358 | ||
|
|
6700b164d0 | ||
|
|
13c273f10c | ||
|
|
667404e836 | ||
|
|
38e236962d | ||
|
|
dbb8158527 | ||
|
|
bb0bcda7ba | ||
|
|
838b654614 | ||
|
|
f179afce65 | ||
|
|
5b7ca5aaec | ||
|
|
4227621eed | ||
|
|
bdfc0c08c7 | ||
|
|
001749e69d | ||
|
|
20c22a3110 | ||
|
|
f741bf39f2 | ||
|
|
427663c766 | ||
|
|
056756f193 | ||
|
|
588e03bb23 | ||
|
|
89da4ad8af | ||
|
|
b0d5e56c8d | ||
|
|
787e8559b9 | ||
|
|
ab500a24a7 | ||
|
|
ead75895b0 | ||
|
|
bf5f14ddab | ||
|
|
7057ce2c2b | ||
|
|
2b6f735b8c | ||
|
|
71acb42a09 | ||
|
|
a7955bc49b | ||
|
|
9c6dea9fd4 | ||
|
|
21827a53f7 | ||
|
|
5c1cbf82b0 | ||
|
|
50631c4b34 | ||
|
|
2a9dd53d14 | ||
|
|
597093eaf1 | ||
|
|
3910044a97 | ||
|
|
44c50d0fae | ||
|
|
d29d30363b | ||
|
|
f102d2a69f | ||
|
|
cf0e5837b6 | ||
|
|
61d22307af | ||
|
|
3b3f86b71e | ||
|
|
e51844cd65 | ||
|
|
90eeef2ff4 | ||
|
|
7f4d9c3ad4 | ||
|
|
b37420f404 | ||
|
|
f2aa47fdf7 | ||
|
|
6361b44c33 | ||
|
|
afeb58ae95 | ||
|
|
48508ee299 | ||
|
|
56da62847a | ||
|
|
750fe6d3e1 | ||
|
|
e4d6d433ec | ||
|
|
058c791b75 | ||
|
|
13010f7a25 | ||
|
|
4071f720b0 | ||
|
|
160aa3c9ef | ||
|
|
883474184d | ||
|
|
b961f146df |
28
.github/contributing.md
vendored
Normal file
28
.github/contributing.md
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
When submitting a patch make sure the commits in your pull request:
|
||||
|
||||
* Have good commit messages
|
||||
|
||||
Please read Chris Beams' blog post [How to Write a Git Commit
|
||||
Message](https://chris.beams.io/posts/git-commit/) on how to write a
|
||||
good commit message. Although the article recommends at most 50
|
||||
characters for the subject, up to 72 characters are frequently
|
||||
accepted for fast-export.
|
||||
|
||||
* Adhere to good [commit
|
||||
hygiene](http://www.ericbmerritt.com/2011/09/21/commit-hygiene-and-git.html)
|
||||
|
||||
When developing a pull request for hg-fast-export, base your work on
|
||||
the current `master` branch and rebase your work if it no longer can
|
||||
be merged into the current `master` without conflicts. Never merge
|
||||
`master` into your development branch, rebase if your work needs
|
||||
updates from `master`.
|
||||
|
||||
When a pull request is modified due to review feedback, please
|
||||
incorporate the changes into the proper commit. A good reference on
|
||||
how to modify history is in the [Pro Git book, Section
|
||||
7.6](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History).
|
||||
|
||||
Please do not submit a pull request if you are not willing to spend
|
||||
the time required to address review comments or revise the patch until
|
||||
it follows the guidelines above. A _take it or leave it_ approach to
|
||||
contributing wastes both your and the maintainer's time.
|
||||
4
.github/requirements-earliest.txt
vendored
Normal file
4
.github/requirements-earliest.txt
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
mercurial==5.2
|
||||
|
||||
# Required for git_lfs_importer plugin
|
||||
pathspec==0.11.2
|
||||
4
.github/requirements-latest.txt
vendored
Normal file
4
.github/requirements-latest.txt
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
mercurial
|
||||
|
||||
# Required for git_lfs_importer plugin
|
||||
pathspec==0.12.1
|
||||
71
.github/workflows/ci.yml
vendored
Normal file
71
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [master]
|
||||
|
||||
jobs:
|
||||
test-earliest:
|
||||
name: Run test suite on the earliest supported Python version
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Checkout repository
|
||||
with:
|
||||
fetch-depth: 1
|
||||
submodules: 'recursive'
|
||||
- uses: actions/setup-python@v5
|
||||
id: earliest
|
||||
with:
|
||||
python-version: '3.7.x'
|
||||
check-latest: true
|
||||
cache: 'pip'
|
||||
cache-dependency-path: '**/requirements-earliest.txt'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r .github/requirements-earliest.txt
|
||||
|
||||
- name: Report selected versions
|
||||
run: |
|
||||
echo Selected '${{ steps.earliest.outputs.python-version }}'
|
||||
./hg-fast-export.sh --debug
|
||||
|
||||
- name: Run tests on earliest supported Python version
|
||||
run: make -C t
|
||||
|
||||
test-latest:
|
||||
name: Run test suite on the latest supported python version
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
name: Checkout repository
|
||||
with:
|
||||
fetch-depth: 1
|
||||
submodules: 'recursive'
|
||||
- uses: actions/setup-python@v5
|
||||
id: latest
|
||||
with:
|
||||
python-version: '3.x'
|
||||
check-latest: true
|
||||
cache: 'pip'
|
||||
cache-dependency-path: '**/requirements-latest.txt'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r .github/requirements-latest.txt
|
||||
|
||||
- name: Report selected version
|
||||
run: |
|
||||
echo Selected '${{ steps.latest.outputs.python-version }}'
|
||||
./hg-fast-export.sh --debug
|
||||
|
||||
- name: Run tests on 3.x
|
||||
run: make -C t
|
||||
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "t/sharness"]
|
||||
path = t/sharness
|
||||
url = https://github.com/felipec/sharness.git
|
||||
@@ -27,10 +27,10 @@ command line option.
|
||||
|
||||
## Example
|
||||
|
||||
Example mercurial repo folder structure (~/mercurial):
|
||||
Example mercurial repo folder structure (~/mercurial) containing two subrepos:
|
||||
src/...
|
||||
subrepo/subrepo1
|
||||
subrepo/subrepo2
|
||||
subrepos/subrepo1
|
||||
subrepos/subrepo2
|
||||
|
||||
### Setup
|
||||
Create an empty new folder where all the converted git modules will be imported:
|
||||
@@ -41,18 +41,18 @@ Create an empty new folder where all the converted git modules will be imported:
|
||||
mkdir submodule1
|
||||
cd submodule1
|
||||
git init
|
||||
hg-fast-export.sh -r ~/mercurial/subrepo1
|
||||
hg-fast-export.sh -r ~/mercurial/subrepos/subrepo1
|
||||
cd ..
|
||||
mkdir submodule2
|
||||
cd submodule2
|
||||
git init
|
||||
hg-fast-export.sh -r ~/mercurial/subrepo2
|
||||
hg-fast-export.sh -r ~/mercurial/subrepos/subrepo2
|
||||
|
||||
### Create mapping file
|
||||
cd ~/imported-gits
|
||||
cat > submodule-mappings << EOF
|
||||
"subrepo/subrepo1"="../submodule1"
|
||||
"subrepo/subrepo2"="../submodule2"
|
||||
"subrepos/subrepo1"="../submodule1"
|
||||
"subrepos/subrepo2"="../submodule2"
|
||||
EOF
|
||||
|
||||
### Convert main repository
|
||||
@@ -60,16 +60,16 @@ Create an empty new folder where all the converted git modules will be imported:
|
||||
mkdir git-main-repo
|
||||
cd git-main-repo
|
||||
git init
|
||||
hg-fast-export.sh -r ~/mercurial --subrepo-map=../submodule-mappings
|
||||
hg-fast-export.sh -r ~/mercurial --subrepo-map=~/imported-gits/submodule-mappings
|
||||
|
||||
### Result
|
||||
The resulting repository will now contain the subrepo/subrepo1 and
|
||||
subrepo/subrepo1 submodules. The created .gitmodules file will look
|
||||
like:
|
||||
The resulting repository will now contain the submodules at the paths
|
||||
`subrepos/subrepo1` and `subrepos/subrepo2`. The created .gitmodules
|
||||
file will look like:
|
||||
|
||||
[submodule "subrepo/subrepo1"]
|
||||
path = subrepo/subrepo1
|
||||
[submodule "subrepos/subrepo1"]
|
||||
path = subrepos/subrepo1
|
||||
url = ../submodule1
|
||||
[submodule "subrepo/subrepo2"]
|
||||
path = subrepo/subrepo2
|
||||
[submodule "subrepos/subrepo2"]
|
||||
path = subrepos/subrepo2
|
||||
url = ../submodule2
|
||||
|
||||
170
README.md
170
README.md
@@ -1,4 +1,4 @@
|
||||
hg-fast-export.(sh|py) - mercurial to git converter using git-fast-import
|
||||
hg-fast-export.sh - mercurial to git converter using git-fast-import
|
||||
=========================================================================
|
||||
|
||||
Legal
|
||||
@@ -29,8 +29,8 @@ first time.
|
||||
System Requirements
|
||||
-------------------
|
||||
|
||||
This project depends on Python 2.7 and the Mercurial >= 4.6
|
||||
package. If Python is not installed, install it before proceeding. The
|
||||
This project depends on Python (>=3.7) and the Mercurial package (>=
|
||||
5.2). If Python is not installed, install it before proceeding. The
|
||||
Mercurial package can be installed with `pip install mercurial`.
|
||||
|
||||
On windows the bash that comes with "Git for Windows" is known to work
|
||||
@@ -42,11 +42,10 @@ Usage
|
||||
Using hg-fast-export is quite simple for a mercurial repository <repo>:
|
||||
|
||||
```
|
||||
mkdir repo-git # or whatever
|
||||
git init repo-git # or whatever
|
||||
cd repo-git
|
||||
git init
|
||||
hg-fast-export.sh -r <local-repo>
|
||||
git checkout HEAD
|
||||
git checkout
|
||||
```
|
||||
|
||||
Please note that hg-fast-export does not automatically check out the
|
||||
@@ -79,10 +78,10 @@ author information than git, an author mapping file can be given to
|
||||
hg-fast-export to fix up malformed author strings. The file is
|
||||
specified using the -A option. The file should contain lines of the
|
||||
form `"<key>"="<value>"`. Inside the key and value strings, all escape
|
||||
sequences understood by the python `string_escape` encoding are
|
||||
supported. (Versions of fast-export prior to v171002 had a different
|
||||
syntax, the old syntax can be enabled by the flag
|
||||
`--mappings-are-raw`.)
|
||||
sequences understood by the python `unicode_escape` encoding are
|
||||
supported; strings are otherwise assumed to be UTF8-encoded.
|
||||
(Versions of fast-export prior to v171002 had a different syntax, the
|
||||
old syntax can be enabled by the flag `--mappings-are-raw`.)
|
||||
|
||||
The example authors.map below will translate `User
|
||||
<garbage<tab><user@example.com>` to `User <user@example.com>`.
|
||||
@@ -93,6 +92,9 @@ The example authors.map below will translate `User
|
||||
-- End of authors.map --
|
||||
```
|
||||
|
||||
If you have many Mercurial repositories, Chris J Billington's
|
||||
[hg-export-tool] allows you to batch convert them.
|
||||
|
||||
Tag and Branch Naming
|
||||
---------------------
|
||||
|
||||
@@ -107,8 +109,8 @@ branch/tag names. In the future -n will become the default, but in
|
||||
order to not break existing incremental conversions, the default
|
||||
remains with the old behavior.
|
||||
|
||||
By default, the `default` mercurial branch is renamed to the `master`
|
||||
branch on git. If your mercurial repo contains both `default` and
|
||||
By default, the `default` mercurial branch is renamed to the `master`
|
||||
branch on git. If your mercurial repo contains both `default` and
|
||||
`master` branches, you'll need to override this behavior. Use
|
||||
`-M <newName>` to specify what name to give the `default` branch.
|
||||
|
||||
@@ -129,10 +131,58 @@ is to convert line endings in text files from CRLF to git's preferred LF:
|
||||
# $2 = Mercurial's hash of the file
|
||||
# $3 = "1" if Mercurial reports the file as binary, otherwise "0"
|
||||
|
||||
if [ "$3" == "1" ]; then cat; else dos2unix; fi
|
||||
if [ "$3" == "1" ]; then cat; else dos2unix -q; fi
|
||||
# -q option in call to dos2unix allows to avoid returning an
|
||||
# error code when handling non-ascii based text files (like UTF-16
|
||||
# encoded text files)
|
||||
-- End of crlf-filter.sh --
|
||||
```
|
||||
|
||||
Mercurial Largefiles Extension
|
||||
------------------------------
|
||||
|
||||
### Handling Mercurial Largefiles during Migration
|
||||
|
||||
When migrating from Mercurial to Git, largefiles are exported as ordinary
|
||||
files by default. To ensure a successful migration and manage repository
|
||||
size, follow the requirements below.
|
||||
|
||||
#### 1. Pre-Export: Ensure File Availability
|
||||
|
||||
Before starting the export, you must have all largefiles from all
|
||||
Mercurial commits available locally. Use one of these methods:
|
||||
|
||||
* **For a new clone:** `hg clone --all-largefiles <repo-url>`
|
||||
* **For an existing repo:** `hg lfpull --rev "all()"`
|
||||
|
||||
#### 2. Choosing Your LFS Strategy
|
||||
|
||||
If you want your files to be versioned in Git LFS rather than as standard
|
||||
Git blobs, you have two primary paths:
|
||||
|
||||
* **[git_lfs_importer plugin](./plugins/git_lfs_importer/README.md)
|
||||
(During Conversion)**
|
||||
Recommended for large repos. This performs Just-In-Time (JIT) conversion
|
||||
by identifying large files during the export and writing LFS pointers
|
||||
immediately, skipping the need for a second pass. This also supports
|
||||
**incremental conversion**, making it much more efficient for ongoing
|
||||
migrations.
|
||||
* **[git lfs migrate import](https://github.com/git-lfs/git-lfs/blob/main/docs/man/git-lfs-migrate.adoc)
|
||||
(After Conversion)**
|
||||
A standard two-step process: first, export the full history from Mercurial
|
||||
to Git, then run a separate full history rewrite to move files into LFS.
|
||||
|
||||
### Why use the git_lfs_importer plugin?
|
||||
|
||||
For "monorepos" or very large repositories (100GiB+), the traditional
|
||||
two-step process can take days. By integrating the LFS conversion
|
||||
directly into the history export, the plugin eliminates the massive
|
||||
time overhead of a secondary history rewrite and allows for incremental
|
||||
progress.
|
||||
|
||||
For detailed setup, see the
|
||||
[git_lfs_importer](./plugins/git_lfs_importer/README.md)
|
||||
plugin documentation.
|
||||
|
||||
Plugins
|
||||
-----------------
|
||||
@@ -163,9 +213,18 @@ defined filter methods in the [dos2unix](./plugins/dos2unix) and
|
||||
[branch_name_in_commit](./plugins/branch_name_in_commit) plugins.
|
||||
|
||||
```
|
||||
commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc}
|
||||
commit_data = {
|
||||
'author': author,
|
||||
'branch': branch,
|
||||
'committer': 'committer',
|
||||
'desc': desc,
|
||||
'extra': extra,
|
||||
'hg_hash': hg_hash,
|
||||
'parents': parents,
|
||||
'revision': revision,
|
||||
}
|
||||
|
||||
def commit_message_filter(self,commit_data):
|
||||
def commit_message_filter(self, commit_data):
|
||||
```
|
||||
The `commit_message_filter` method is called for each commit, after parsing
|
||||
from hg, but before outputting to git. The dictionary `commit_data` contains the
|
||||
@@ -174,9 +233,14 @@ values in the dictionary after filters have been run are used to create the git
|
||||
commit.
|
||||
|
||||
```
|
||||
file_data = {'filename':filename,'file_ctx':file_ctx,'d':d}
|
||||
file_data = {
|
||||
'data': file_contents,
|
||||
'file_ctx': file_ctx,
|
||||
'filename': filename,
|
||||
'is_largefile': largefile_status,
|
||||
}
|
||||
|
||||
def file_data_filter(self,file_data):
|
||||
def file_data_filter(self, file_data):
|
||||
```
|
||||
The `file_data_filter` method is called for each file within each commit.
|
||||
The dictionary `file_data` contains the above attributes about the file, and
|
||||
@@ -184,6 +248,17 @@ can be modified by any filter. `file_ctx` is the filecontext from the
|
||||
mercurial python library. After all filters have been run, the values
|
||||
are used to add the file to the git commit.
|
||||
|
||||
The `file_data_filter` method is also called when files are deleted,
|
||||
but in this case the `data` and `file_ctx` keys map to None. This is
|
||||
so that a filter which modifies file names can apply the same name
|
||||
transformations when files are deleted.
|
||||
|
||||
The `is_largefile` entry within the `file_data` dictionary will contain
|
||||
`True` if the original file was a largefile and has been converted
|
||||
to a normal file before the plugins were invoked. In this case, the `file_ctx`
|
||||
will still point to the filecontext for the original, unconverted file, while
|
||||
`filename` and `data` will contain the already converted information.
|
||||
|
||||
Submodules
|
||||
----------
|
||||
See README-SUBMODULES.md for how to convert subrepositories into git
|
||||
@@ -194,7 +269,15 @@ Notes/Limitations
|
||||
|
||||
hg-fast-export supports multiple branches but only named branches with
|
||||
exactly one head each. Otherwise commits to the tip of these heads
|
||||
within the branch will get flattened into merge commits.
|
||||
within the branch will get flattened into merge commits. There are a
|
||||
few options to deal with this:
|
||||
1. Chris J Billington's [hg-export-tool] can help you to handle branches with
|
||||
duplicate heads.
|
||||
2. Use the [head2branch plugin](./plugins/head2branch) to create a new named
|
||||
branch from an unnamed head.
|
||||
3. You can ignore unnamed heads with the `--ignore-unnamed-heads` option, which
|
||||
is appropriate in situations such as the extra heads being close commits
|
||||
(abandoned, unmerged changes).
|
||||
|
||||
hg-fast-export will ignore any files or directories tracked by mercurial
|
||||
called `.git`, and will print a warning if it encounters one. Git cannot
|
||||
@@ -213,8 +296,8 @@ possible to use hg-fast-export on remote repositories
|
||||
Design
|
||||
------
|
||||
|
||||
hg-fast-export.py was designed in a way that doesn't require a 2-pass
|
||||
mechanism or any prior repository analysis: if just feeds what it
|
||||
hg-fast-export was designed in a way that doesn't require a 2-pass
|
||||
mechanism or any prior repository analysis: it just feeds what it
|
||||
finds into git-fast-import. This also implies that it heavily relies
|
||||
on strictly linear ordering of changesets from hg, i.e. its
|
||||
append-only storage model so that changesets hg-fast-export already
|
||||
@@ -223,15 +306,37 @@ saw never get modified.
|
||||
Submitting Patches
|
||||
------------------
|
||||
|
||||
Please use the [issue-tracker](https://github.com/frej/fast-export) at
|
||||
github to report bugs and submit patches.
|
||||
Please create a pull request at
|
||||
[Github](https://github.com/frej/fast-export/pulls) to submit patches.
|
||||
|
||||
Please read
|
||||
[https://chris.beams.io/posts/git-commit/](https://chris.beams.io/posts/git-commit/)
|
||||
on how to write a good commit message before submitting a pull request
|
||||
for review. Although the article recommends at most 50 characters for
|
||||
the subject, up to 72 characters are frequently accepted for
|
||||
fast-export.
|
||||
When submitting a patch make sure the commits in your pull request:
|
||||
|
||||
* Have good commit messages
|
||||
|
||||
Please read Chris Beams' blog post [How to Write a Git Commit
|
||||
Message](https://chris.beams.io/posts/git-commit/) on how to write a
|
||||
good commit message. Although the article recommends at most 50
|
||||
characters for the subject, up to 72 characters are frequently
|
||||
accepted for fast-export.
|
||||
|
||||
* Adhere to good [commit
|
||||
hygiene](http://www.ericbmerritt.com/2011/09/21/commit-hygiene-and-git.html)
|
||||
|
||||
When developing a pull request for hg-fast-export, base your work on
|
||||
the current `master` branch and rebase your work if it no longer can
|
||||
be merged into the current `master` without conflicts. Never merge
|
||||
`master` into your development branch, rebase if your work needs
|
||||
updates from `master`.
|
||||
|
||||
When a pull request is modified due to review feedback, please
|
||||
incorporate the changes into the proper commit. A good reference on
|
||||
how to modify history is in the [Pro Git book, Section
|
||||
7.6](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History).
|
||||
|
||||
Please do not submit a pull request if you are not willing to spend
|
||||
the time required to address review comments or revise the patch until
|
||||
it follows the guidelines above. A _take it or leave it_ approach to
|
||||
contributing wastes both your and the maintainer's time.
|
||||
|
||||
Frequent Problems
|
||||
=================
|
||||
@@ -274,3 +379,12 @@ Frequent Problems
|
||||
By design fast export does not touch your working directory, so to
|
||||
git it looks like you have deleted all files, when in fact they have
|
||||
never been checked out. Just do a checkout of the branch you want.
|
||||
|
||||
* `Error: repository has at least one unnamed head: hg r<N>`
|
||||
|
||||
By design, hg-fast-export cannot deal with extra heads on a branch.
|
||||
There are a few options depending on whether the extra heads are
|
||||
in-use/open or normally closed. See [Notes/Limitations](#noteslimitations)
|
||||
section for more details.
|
||||
|
||||
[hg-export-tool]: https://github.com/chrisjbillington/hg-export-tool
|
||||
|
||||
@@ -1,28 +1,21 @@
|
||||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
|
||||
# Copyright (c) 2025 Siemens
|
||||
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
|
||||
|
||||
from mercurial import node
|
||||
from mercurial.scmutil import revsymbol
|
||||
from hg2git import setup_repo,fixup_user,get_branch,get_changeset
|
||||
from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name
|
||||
from optparse import OptionParser
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
from binascii import hexlify
|
||||
import pluginloader
|
||||
|
||||
if sys.platform == "win32":
|
||||
# On Windows, sys.stdout is initially opened in text mode, which means that
|
||||
# when a LF (\n) character is written to sys.stdout, it will be converted
|
||||
# into CRLF (\r\n). That makes git blow up, so use this platform-specific
|
||||
# code to change the mode of sys.stdout to binary.
|
||||
import msvcrt
|
||||
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
|
||||
from hgext.largefiles import lfutil
|
||||
|
||||
# silly regex to catch Signed-off-by lines in log message
|
||||
sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
|
||||
sob_re=re.compile(b'^Signed-[Oo]ff-[Bb]y: (.+)$')
|
||||
# insert 'checkpoint' command after this many commits or none at all if 0
|
||||
cfg_checkpoint_count=0
|
||||
# write some progress message every this many file contents written
|
||||
@@ -36,63 +29,43 @@ submodule_mappings=None
|
||||
auto_sanitize = None
|
||||
|
||||
def gitmode(flags):
|
||||
return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
|
||||
return b'l' in flags and b'120000' or b'x' in flags and b'100755' or b'100644'
|
||||
|
||||
def wr_no_nl(msg=''):
|
||||
def wr_no_nl(msg=b''):
|
||||
assert isinstance(msg, bytes)
|
||||
if msg:
|
||||
sys.stdout.write(msg)
|
||||
sys.stdout.buffer.write(msg)
|
||||
|
||||
def wr(msg=''):
|
||||
wr_no_nl(msg)
|
||||
sys.stdout.write('\n')
|
||||
def wr(msg=b''):
|
||||
wr_no_nl(msg + b'\n')
|
||||
#map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
|
||||
|
||||
def wr_data(data):
|
||||
wr(b'data %d' % (len(data)))
|
||||
wr(data)
|
||||
|
||||
def checkpoint(count):
|
||||
count=count+1
|
||||
if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0:
|
||||
sys.stderr.write("Checkpoint after %d commits\n" % count)
|
||||
wr('checkpoint')
|
||||
sys.stderr.buffer.write(b"Checkpoint after %d commits\n" % count)
|
||||
wr(b'checkpoint')
|
||||
wr()
|
||||
return count
|
||||
|
||||
def revnum_to_revref(rev, old_marks):
|
||||
"""Convert an hg revnum to a git-fast-import rev reference (an SHA1
|
||||
or a mark)"""
|
||||
return old_marks.get(rev) or ':%d' % (rev+1)
|
||||
return old_marks.get(rev) or b':%d' % (rev+1)
|
||||
|
||||
def file_mismatch(f1,f2):
|
||||
"""See if two revisions of a file are not equal."""
|
||||
return node.hex(f1)!=node.hex(f2)
|
||||
|
||||
def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch):
|
||||
"""Loop over our repository and find all changed and missing files."""
|
||||
for left in dleft.keys():
|
||||
right=dright.get(left,None)
|
||||
if right==None:
|
||||
# we have the file but our parent hasn't: add to left set
|
||||
l.append(left)
|
||||
elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)):
|
||||
# we have it but checksums mismatch: add to center set
|
||||
c.append(left)
|
||||
for right in dright.keys():
|
||||
left=dleft.get(right,None)
|
||||
if left==None:
|
||||
# if parent has file but we don't: add to right set
|
||||
r.append(right)
|
||||
# change is already handled when comparing child against parent
|
||||
return l,c,r
|
||||
|
||||
def get_filechanges(repo,revision,parents,mleft):
|
||||
def get_filechanges(repo,revision,parents,files):
|
||||
"""Given some repository and revision, find all changed/deleted files."""
|
||||
l,c,r=[],[],[]
|
||||
for p in parents:
|
||||
if p<0: continue
|
||||
mright=revsymbol(repo,str(p)).manifest()
|
||||
l,c,r=split_dict(mleft,mright,l,c,r)
|
||||
l.sort()
|
||||
c.sort()
|
||||
r.sort()
|
||||
return l,c,r
|
||||
if not parents:
|
||||
# first revision: feed in full manifest
|
||||
return files,[]
|
||||
else:
|
||||
# take the changes from the first parent
|
||||
f=repo.status(parents[0],revision)
|
||||
return f.modified+f.added,f.removed
|
||||
|
||||
def get_author(logmessage,committer,authors):
|
||||
"""As git distincts between author and committer of a patch, try to
|
||||
@@ -110,7 +83,7 @@ def get_author(logmessage,committer,authors):
|
||||
"Signed-off-by: foo" and thus matching our detection regex. Prevent
|
||||
that."""
|
||||
|
||||
loglines=logmessage.split('\n')
|
||||
loglines=logmessage.split(b'\n')
|
||||
i=len(loglines)
|
||||
# from tail walk to top skipping empty lines
|
||||
while i>=0:
|
||||
@@ -138,23 +111,23 @@ def remove_gitmodules(ctx):
|
||||
# be to only remove the submodules of the first parent.
|
||||
for parent_ctx in ctx.parents():
|
||||
for submodule in parent_ctx.substate.keys():
|
||||
wr('D %s' % submodule)
|
||||
wr('D .gitmodules')
|
||||
wr(b'D %s' % submodule)
|
||||
wr(b'D .gitmodules')
|
||||
|
||||
def refresh_git_submodule(name,subrepo_info):
|
||||
wr('M 160000 %s %s' % (subrepo_info[1],name))
|
||||
sys.stderr.write("Adding/updating submodule %s, revision %s\n"
|
||||
% (name,subrepo_info[1]))
|
||||
return '[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name,
|
||||
subrepo_info[0])
|
||||
wr(b'M 160000 %s %s' % (subrepo_info[1],name))
|
||||
sys.stderr.buffer.write(
|
||||
b"Adding/updating submodule %s, revision %s\n" % (name, subrepo_info[1])
|
||||
)
|
||||
return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name, name, subrepo_info[0])
|
||||
|
||||
def refresh_hg_submodule(name,subrepo_info):
|
||||
gitRepoLocation=submodule_mappings[name] + "/.git"
|
||||
gitRepoLocation=submodule_mappings[name] + b"/.git"
|
||||
|
||||
# Populate the cache to map mercurial revision to git revision
|
||||
if not name in subrepo_cache:
|
||||
subrepo_cache[name]=(load_cache(gitRepoLocation+"/hg2git-mapping"),
|
||||
load_cache(gitRepoLocation+"/hg2git-marks",
|
||||
subrepo_cache[name]=(load_cache(gitRepoLocation+b"/hg2git-mapping"),
|
||||
load_cache(gitRepoLocation+b"/hg2git-marks",
|
||||
lambda s: int(s)-1))
|
||||
|
||||
(mapping_cache,marks_cache)=subrepo_cache[name]
|
||||
@@ -162,71 +135,110 @@ def refresh_hg_submodule(name,subrepo_info):
|
||||
if subrepo_hash in mapping_cache:
|
||||
revnum=mapping_cache[subrepo_hash]
|
||||
gitSha=marks_cache[int(revnum)]
|
||||
wr('M 160000 %s %s' % (gitSha,name))
|
||||
sys.stderr.write("Adding/updating submodule %s, revision %s->%s\n"
|
||||
% (name,subrepo_hash,gitSha))
|
||||
return '[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name,
|
||||
wr(b'M 160000 %s %s' % (gitSha,name))
|
||||
sys.stderr.buffer.write(
|
||||
b"Adding/updating submodule %s, revision %s->%s\n"
|
||||
% (name, subrepo_hash, gitSha)
|
||||
)
|
||||
return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name,
|
||||
submodule_mappings[name])
|
||||
else:
|
||||
sys.stderr.write("Warning: Could not find hg revision %s for %s in git %s\n" %
|
||||
(subrepo_hash,name,gitRepoLocation))
|
||||
return ''
|
||||
sys.stderr.buffer.write(
|
||||
b"Warning: Could not find hg revision %s for %s in git %s\n"
|
||||
% (subrepo_hash, name, gitRepoLocation,)
|
||||
)
|
||||
return b''
|
||||
|
||||
def refresh_gitmodules(ctx):
|
||||
"""Updates list of ctx submodules according to .hgsubstate file"""
|
||||
remove_gitmodules(ctx)
|
||||
gitmodules=""
|
||||
gitmodules=b""
|
||||
# Create the .gitmodules file and all submodules
|
||||
for name,subrepo_info in ctx.substate.items():
|
||||
if subrepo_info[2]=='git':
|
||||
if subrepo_info[2]==b'git':
|
||||
gitmodules+=refresh_git_submodule(name,subrepo_info)
|
||||
elif submodule_mappings and name in submodule_mappings:
|
||||
gitmodules+=refresh_hg_submodule(name,subrepo_info)
|
||||
|
||||
if len(gitmodules):
|
||||
wr('M 100644 inline .gitmodules')
|
||||
wr('data %d' % (len(gitmodules)+1))
|
||||
wr(gitmodules)
|
||||
wr(b'M 100644 inline .gitmodules')
|
||||
wr_data(gitmodules)
|
||||
|
||||
def is_largefile(filename):
|
||||
return filename[:6] == b'.hglf/'
|
||||
|
||||
def largefile_orig_name(filename):
|
||||
return filename[6:]
|
||||
|
||||
def largefile_data(ctx, file, filename):
|
||||
lf_file_ctx=ctx.filectx(file)
|
||||
lf_hash=lf_file_ctx.data().strip(b'\n')
|
||||
sys.stderr.write("Detected large file hash %s\n" % lf_hash.decode())
|
||||
#should detect where the large files are located
|
||||
file_with_data = lfutil.findfile(ctx.repo(), lf_hash)
|
||||
if file_with_data is None:
|
||||
# Autodownloading from the mercurial repository would be an issue as there
|
||||
# is a good chance that we may need to input some username and password.
|
||||
# This will surely break fast-export as there will be some unexpected
|
||||
# output.
|
||||
sys.stderr.write("Large file wasn't found in local cache.\n")
|
||||
sys.stderr.write("Please clone with --all-largefiles\n")
|
||||
sys.stderr.write("or pull all large files with 'hg lfpull --rev "
|
||||
"\"all()\"'\n")
|
||||
# closing in the middle of import will revert everything to the last checkpoint
|
||||
sys.exit(3)
|
||||
with open(os.path.normpath(file_with_data), 'rb') as file_with_data_handle:
|
||||
return file_with_data_handle.read()
|
||||
|
||||
def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}):
|
||||
count=0
|
||||
max=len(files)
|
||||
is_submodules_refreshed=False
|
||||
for file in files:
|
||||
if not is_submodules_refreshed and (file=='.hgsub' or file=='.hgsubstate'):
|
||||
if not is_submodules_refreshed and (file==b'.hgsub' or file==b'.hgsubstate'):
|
||||
is_submodules_refreshed=True
|
||||
refresh_gitmodules(ctx)
|
||||
# Skip .hgtags files. They only get us in trouble.
|
||||
if not hgtags and file == ".hgtags":
|
||||
sys.stderr.write('Skip %s\n' % (file))
|
||||
if not hgtags and file == b".hgtags":
|
||||
sys.stderr.buffer.write(b'Skip %s\n' % file)
|
||||
continue
|
||||
if encoding:
|
||||
filename=file.decode(encoding).encode('utf8')
|
||||
else:
|
||||
filename=file
|
||||
if '.git' in filename.split(os.path.sep):
|
||||
sys.stderr.write('Ignoring file %s which cannot be tracked by git\n' % filename)
|
||||
if b'.git' in filename.split(b'/'): # Even on Windows, the path separator is / here.
|
||||
sys.stderr.buffer.write(
|
||||
b'Ignoring file %s which cannot be tracked by git\n' % filename
|
||||
)
|
||||
continue
|
||||
|
||||
largefile = False
|
||||
file_ctx=ctx.filectx(file)
|
||||
d=file_ctx.data()
|
||||
if is_largefile(filename):
|
||||
largefile = True
|
||||
filename = largefile_orig_name(filename)
|
||||
d = largefile_data(ctx, file, filename)
|
||||
else:
|
||||
d=file_ctx.data()
|
||||
|
||||
if plugins and plugins['file_data_filters']:
|
||||
file_data = {'filename':filename,'file_ctx':file_ctx,'data':d}
|
||||
file_data = {'filename':filename,'file_ctx':file_ctx,'data':d, 'is_largefile':largefile}
|
||||
for filter in plugins['file_data_filters']:
|
||||
filter(file_data)
|
||||
d=file_data['data']
|
||||
filename=file_data['filename']
|
||||
file_ctx=file_data['file_ctx']
|
||||
|
||||
wr('M %s inline %s' % (gitmode(manifest.flags(file)),
|
||||
strip_leading_slash(filename)))
|
||||
wr('data %d' % len(d)) # had some trouble with size()
|
||||
wr(d)
|
||||
count+=1
|
||||
if count%cfg_export_boundary==0:
|
||||
sys.stderr.write('Exported %d/%d files\n' % (count,max))
|
||||
if d is not None:
|
||||
wr(b'M %s inline %s' % (gitmode(manifest.flags(file)),
|
||||
strip_leading_slash(filename)))
|
||||
wr(b'data %d' % len(d)) # had some trouble with size()
|
||||
wr(d)
|
||||
count+=1
|
||||
if count%cfg_export_boundary==0:
|
||||
sys.stderr.buffer.write(b'Exported %d/%d files\n' % (count,max))
|
||||
if max>cfg_export_boundary:
|
||||
sys.stderr.write('Exported %d/%d files\n' % (count,max))
|
||||
sys.stderr.buffer.write(b'Exported %d/%d files\n' % (count,max))
|
||||
|
||||
def sanitize_name(name,what="branch", mapping={}):
|
||||
"""Sanitize input roughly according to git-check-ref-format(1)"""
|
||||
@@ -246,164 +258,172 @@ def sanitize_name(name,what="branch", mapping={}):
|
||||
|
||||
def dot(name):
|
||||
if not name: return name
|
||||
if name[0] == '.': return '_'+name[1:]
|
||||
if name[0:1] == b'.': return b'_'+name[1:]
|
||||
return name
|
||||
|
||||
if not auto_sanitize:
|
||||
return mapping.get(name,name)
|
||||
n=mapping.get(name,name)
|
||||
p=re.compile('([[ ~^:?\\\\*]|\.\.)')
|
||||
n=p.sub('_', n)
|
||||
if n[-1] in ('/', '.'): n=n[:-1]+'_'
|
||||
n='/'.join(map(dot,n.split('/')))
|
||||
p=re.compile('_+')
|
||||
n=p.sub('_', n)
|
||||
p=re.compile(b'([\\[ ~^:?\\\\*]|\\.\\.)')
|
||||
n=p.sub(b'_', n)
|
||||
if n[-1:] in (b'/', b'.'): n=n[:-1]+b'_'
|
||||
n=b'/'.join([dot(s) for s in n.split(b'/')])
|
||||
p=re.compile(b'_+')
|
||||
n=p.sub(b'_', n)
|
||||
|
||||
if n!=name:
|
||||
sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n))
|
||||
sys.stderr.buffer.write(
|
||||
b'Warning: sanitized %s [%s] to [%s]\n' % (what.encode(), name, n)
|
||||
)
|
||||
return n
|
||||
|
||||
def strip_leading_slash(filename):
|
||||
if filename[0] == '/':
|
||||
if filename[0:1] == b'/':
|
||||
return filename[1:]
|
||||
return filename
|
||||
|
||||
def export_commit(ui,repo,revision,old_marks,max,count,authors,
|
||||
branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',
|
||||
plugins={}):
|
||||
first_commit_hash="",plugins={}):
|
||||
def get_branchname(name):
|
||||
if brmap.has_key(name):
|
||||
if name in brmap:
|
||||
return brmap[name]
|
||||
n=sanitize_name(name, "branch", branchesmap)
|
||||
brmap[name]=n
|
||||
return n
|
||||
|
||||
(revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors,encoding)
|
||||
if repo[revnode].hidden():
|
||||
ctx=repo[revision]
|
||||
|
||||
if ctx.hidden():
|
||||
return count
|
||||
|
||||
(_,user,(time,timezone),files,desc,branch,extra)=get_changeset(ui,repo,revision,authors,encoding)
|
||||
|
||||
branch=get_branchname(branch)
|
||||
|
||||
parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
|
||||
author = get_author(desc,user,authors)
|
||||
hg_hash=ctx.hex()
|
||||
|
||||
if plugins and plugins['commit_message_filters']:
|
||||
commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc}
|
||||
commit_data = {'branch': branch, 'parents': parents,
|
||||
'author': author, 'desc': desc,
|
||||
'revision': revision, 'hg_hash': hg_hash,
|
||||
'committer': user, 'extra': extra}
|
||||
for filter in plugins['commit_message_filters']:
|
||||
filter(commit_data)
|
||||
branch = commit_data['branch']
|
||||
parents = commit_data['parents']
|
||||
author = commit_data['author']
|
||||
user = commit_data['committer']
|
||||
desc = commit_data['desc']
|
||||
|
||||
if len(parents)==0 and revision != 0:
|
||||
wr('reset refs/heads/%s' % branch)
|
||||
wr(b'reset refs/heads/%s' % branch)
|
||||
|
||||
wr('commit refs/heads/%s' % branch)
|
||||
wr('mark :%d' % (revision+1))
|
||||
wr(b'commit refs/heads/%s' % branch)
|
||||
wr(b'mark :%d' % (revision+1))
|
||||
if sob:
|
||||
wr('author %s %d %s' % (author,time,timezone))
|
||||
wr('committer %s %d %s' % (user,time,timezone))
|
||||
wr('data %d' % (len(desc)+1)) # wtf?
|
||||
wr(desc)
|
||||
wr()
|
||||
wr(b'author %s %d %s' % (author,time,timezone))
|
||||
wr(b'committer %s %d %s' % (user,time,timezone))
|
||||
wr_data(desc + b'\n')
|
||||
|
||||
ctx=revsymbol(repo,str(revision))
|
||||
man=ctx.manifest()
|
||||
added,changed,removed,type=[],[],[],''
|
||||
|
||||
if len(parents) == 0:
|
||||
# first revision: feed in full manifest
|
||||
added=man.keys()
|
||||
added.sort()
|
||||
if not parents:
|
||||
type='full'
|
||||
if revision == 0 and first_commit_hash:
|
||||
wr(b'from %s' % first_commit_hash.encode())
|
||||
type='simple delta'
|
||||
else:
|
||||
wr('from %s' % revnum_to_revref(parents[0], old_marks))
|
||||
wr(b'from %s' % revnum_to_revref(parents[0], old_marks))
|
||||
if len(parents) == 1:
|
||||
# later non-merge revision: feed in changed manifest
|
||||
# if we have exactly one parent, just take the changes from the
|
||||
# manifest without expensively comparing checksums
|
||||
f=repo.status(parents[0],revnode)
|
||||
added,changed,removed=f.added,f.modified,f.removed
|
||||
type='simple delta'
|
||||
else: # a merge with two parents
|
||||
wr('merge %s' % revnum_to_revref(parents[1], old_marks))
|
||||
# later merge revision: feed in changed manifest
|
||||
# for many files comparing checksums is expensive so only do it for
|
||||
# merges where we really need it due to hg's revlog logic
|
||||
added,changed,removed=get_filechanges(repo,revision,parents,man)
|
||||
wr(b'merge %s' % revnum_to_revref(parents[1], old_marks))
|
||||
type='thorough delta'
|
||||
|
||||
sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
|
||||
(branch,type,revision+1,max,len(added),len(changed),len(removed)))
|
||||
modified,removed=get_filechanges(repo,revision,parents,files)
|
||||
|
||||
for filename in removed:
|
||||
sys.stderr.buffer.write(
|
||||
b'%s: Exporting %s revision %d/%d with %d/%d modified/removed files\n'
|
||||
% (branch, type.encode(), revision + 1, max, len(modified), len(removed))
|
||||
)
|
||||
|
||||
for file in removed:
|
||||
if fn_encoding:
|
||||
filename=filename.decode(fn_encoding).encode('utf8')
|
||||
filename=strip_leading_slash(filename)
|
||||
if filename=='.hgsub':
|
||||
remove_gitmodules(ctx)
|
||||
wr('D %s' % filename)
|
||||
filename=file.decode(fn_encoding).encode('utf8')
|
||||
else:
|
||||
filename=file
|
||||
|
||||
export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins)
|
||||
export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins)
|
||||
if plugins and plugins['file_data_filters']:
|
||||
file_data = {'filename':filename, 'file_ctx':None, 'data':None}
|
||||
for filter in plugins['file_data_filters']:
|
||||
filter(file_data)
|
||||
filename=file_data['filename']
|
||||
|
||||
filename=strip_leading_slash(filename)
|
||||
if filename==b'.hgsub':
|
||||
remove_gitmodules(ctx)
|
||||
if is_largefile(filename):
|
||||
filename=largefile_orig_name(filename)
|
||||
wr(b'D %s' % filename)
|
||||
|
||||
export_file_contents(ctx,man,modified,hgtags,fn_encoding,plugins)
|
||||
wr()
|
||||
|
||||
return checkpoint(count)
|
||||
|
||||
def export_note(ui,repo,revision,count,authors,encoding,is_first):
|
||||
(revnode,_,user,(time,timezone),_,_,_,_)=get_changeset(ui,repo,revision,authors,encoding)
|
||||
if repo[revnode].hidden():
|
||||
ctx = repo[revision]
|
||||
|
||||
if ctx.hidden():
|
||||
return count
|
||||
|
||||
parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
|
||||
(_,user,(time,timezone),_,_,_,_)=get_changeset(ui,repo,revision,authors,encoding)
|
||||
|
||||
wr('commit refs/notes/hg')
|
||||
wr('committer %s %d %s' % (user,time,timezone))
|
||||
wr('data 0')
|
||||
wr(b'commit refs/notes/hg')
|
||||
wr(b'committer %s %d %s' % (user,time,timezone))
|
||||
wr(b'data 0')
|
||||
if is_first:
|
||||
wr('from refs/notes/hg^0')
|
||||
wr('N inline :%d' % (revision+1))
|
||||
hg_hash=revsymbol(repo,str(revision)).hex()
|
||||
wr('data %d' % (len(hg_hash)))
|
||||
wr_no_nl(hg_hash)
|
||||
wr(b'from refs/notes/hg^0')
|
||||
wr(b'N inline :%d' % (revision+1))
|
||||
hg_hash=ctx.hex()
|
||||
wr_data(hg_hash)
|
||||
wr()
|
||||
return checkpoint(count)
|
||||
|
||||
wr('data %d' % (len(desc)+1)) # wtf?
|
||||
wr(desc)
|
||||
wr()
|
||||
|
||||
def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap):
|
||||
l=repo.tagslist()
|
||||
for tag,node in l:
|
||||
# Remap the branch name
|
||||
tag=sanitize_name(tag,"tag",tagsmap)
|
||||
# ignore latest revision
|
||||
if tag=='tip': continue
|
||||
if tag==b'tip': continue
|
||||
# ignore tags to nodes that are missing (ie, 'in the future')
|
||||
if node.encode('hex_codec') not in mapping_cache:
|
||||
sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec')))
|
||||
if hexlify(node) not in mapping_cache:
|
||||
sys.stderr.buffer.write(b'Tag %s refers to unseen node %s\n' % (tag, hexlify(node)))
|
||||
continue
|
||||
|
||||
rev=int(mapping_cache[node.encode('hex_codec')])
|
||||
rev=int(mapping_cache[hexlify(node)])
|
||||
|
||||
ref=revnum_to_revref(rev, old_marks)
|
||||
if ref==None:
|
||||
sys.stderr.write('Failed to find reference for creating tag'
|
||||
' %s at r%d\n' % (tag,rev))
|
||||
sys.stderr.buffer.write(
|
||||
b'Failed to find reference for creating tag %s at r%d\n' % (tag, rev)
|
||||
)
|
||||
continue
|
||||
sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref))
|
||||
wr('reset refs/tags/%s' % tag)
|
||||
wr('from %s' % ref)
|
||||
sys.stderr.buffer.write(b'Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag, rev, ref))
|
||||
wr(b'reset refs/tags/%s' % tag)
|
||||
wr(b'from %s' % ref)
|
||||
wr()
|
||||
count=checkpoint(count)
|
||||
return count
|
||||
|
||||
def load_mapping(name, filename, mapping_is_raw):
|
||||
raw_regexp=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
|
||||
string_regexp='"(((\\.)|(\\")|[^"])*)"'
|
||||
quoted_regexp=re.compile('^'+string_regexp+'[ ]*=[ ]*'+string_regexp+'$')
|
||||
raw_regexp=re.compile(b'^([^=]+)[ ]*=[ ]*(.+)$')
|
||||
string_regexp=b'"(((\\.)|(\\")|[^"])*)"'
|
||||
quoted_regexp=re.compile(b'^'+string_regexp+b'[ ]*=[ ]*'+string_regexp+b'$')
|
||||
|
||||
def parse_raw_line(line):
|
||||
m=raw_regexp.match(line)
|
||||
@@ -411,26 +431,40 @@ def load_mapping(name, filename, mapping_is_raw):
|
||||
return None
|
||||
return (m.group(1).strip(), m.group(2).strip())
|
||||
|
||||
def process_unicode_escape_sequences(s):
|
||||
# Replace unicode escape sequences in the otherwise UTF8-encoded bytestring s with
|
||||
# the UTF8-encoded characters they represent. We need to do an additional
|
||||
# .decode('utf8').encode('ascii', 'backslashreplace') to convert any non-ascii
|
||||
# characters into their escape sequences so that the subsequent
|
||||
# .decode('unicode-escape') succeeds:
|
||||
return (
|
||||
s.decode('utf8')
|
||||
.encode('ascii', 'backslashreplace')
|
||||
.decode('unicode-escape')
|
||||
.encode('utf8')
|
||||
)
|
||||
|
||||
def parse_quoted_line(line):
|
||||
m=quoted_regexp.match(line)
|
||||
if m==None:
|
||||
return None
|
||||
return (m.group(1).decode('string_escape'),
|
||||
m.group(5).decode('string_escape'))
|
||||
return
|
||||
|
||||
return (process_unicode_escape_sequences(m.group(1)),
|
||||
process_unicode_escape_sequences(m.group(5)))
|
||||
|
||||
cache={}
|
||||
if not os.path.exists(filename):
|
||||
sys.stderr.write('Could not open mapping file [%s]\n' % (filename))
|
||||
return cache
|
||||
f=open(filename,'r')
|
||||
f=open(filename,'rb')
|
||||
l=0
|
||||
a=0
|
||||
for line in f.readlines():
|
||||
l+=1
|
||||
line=line.strip()
|
||||
if l==1 and line[0]=='#' and line=='# quoted-escaped-strings':
|
||||
if l==1 and line[0:1]==b'#' and line==b'# quoted-escaped-strings':
|
||||
continue
|
||||
elif line=='' or line[0]=='#':
|
||||
elif line==b'' or line[0:1]==b'#':
|
||||
continue
|
||||
m=parse_raw_line(line) if mapping_is_raw else parse_quoted_line(line)
|
||||
if m==None:
|
||||
@@ -452,9 +486,11 @@ def branchtip(repo, heads):
|
||||
break
|
||||
return tip
|
||||
|
||||
def verify_heads(ui,repo,cache,force,branchesmap):
|
||||
def verify_heads(ui,repo,cache,force,ignore_unnamed_heads,branchesmap):
|
||||
branches={}
|
||||
for bn, heads in repo.branchmap().iteritems():
|
||||
|
||||
for bn in repo.branchmap():
|
||||
heads = repo.branchmap().branchheads(bn)
|
||||
branches[bn] = branchtip(repo, heads)
|
||||
l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
|
||||
l.sort()
|
||||
@@ -465,26 +501,38 @@ def verify_heads(ui,repo,cache,force,branchesmap):
|
||||
sanitized_name=sanitize_name(b,"branch",branchesmap)
|
||||
sha1=get_git_sha1(sanitized_name)
|
||||
c=cache.get(sanitized_name)
|
||||
if sha1!=c:
|
||||
sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:'
|
||||
'\n%s (repo) != %s (cache)\n' % (b,sha1,c))
|
||||
if not c and sha1:
|
||||
sys.stderr.buffer.write(
|
||||
b'Error: Branch [%s] already exists and was not created by hg-fast-export, '
|
||||
b'export would overwrite unrelated branch\n' % b)
|
||||
if not force: return False
|
||||
elif sha1!=c:
|
||||
sys.stderr.buffer.write(
|
||||
b'Error: Branch [%s] modified outside hg-fast-export:'
|
||||
b'\n%s (repo) != %s (cache)\n' % (b, b'<None>' if sha1 is None else sha1, c)
|
||||
)
|
||||
if not force: return False
|
||||
|
||||
# verify that branch has exactly one head
|
||||
t={}
|
||||
for h in repo.filtered('visible').heads():
|
||||
(_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h)
|
||||
unnamed_heads=False
|
||||
for h in repo.filtered(b'visible').heads():
|
||||
branch=get_branch(repo[h].branch())
|
||||
if t.get(branch,False):
|
||||
sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' %
|
||||
repo.changelog.rev(h))
|
||||
if not force: return False
|
||||
sys.stderr.buffer.write(
|
||||
b'Error: repository has an unnamed head: hg r%d\n'
|
||||
% repo.changelog.rev(h)
|
||||
)
|
||||
unnamed_heads=True
|
||||
if not force and not ignore_unnamed_heads: return False
|
||||
t[branch]=True
|
||||
|
||||
if unnamed_heads and not force and not ignore_unnamed_heads: return False
|
||||
return True
|
||||
|
||||
def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
||||
authors={},branchesmap={},tagsmap={},
|
||||
sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='',
|
||||
sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,
|
||||
notes=False,encoding='',fn_encoding='',first_commit_hash='',
|
||||
plugins={}):
|
||||
def check_cache(filename, contents):
|
||||
if len(contents) == 0:
|
||||
@@ -500,12 +548,12 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
||||
if len(state_cache) != 0:
|
||||
for (name, data) in [(marksfile, old_marks),
|
||||
(mappingfile, mapping_cache),
|
||||
(headsfile, state_cache)]:
|
||||
(headsfile, heads_cache)]:
|
||||
check_cache(name, data)
|
||||
|
||||
ui,repo=setup_repo(repourl)
|
||||
|
||||
if not verify_heads(ui,repo,heads_cache,force,branchesmap):
|
||||
if not verify_heads(ui,repo,heads_cache,force,ignore_unnamed_heads,branchesmap):
|
||||
return 1
|
||||
|
||||
try:
|
||||
@@ -513,26 +561,26 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
||||
except AttributeError:
|
||||
tip=len(repo)
|
||||
|
||||
min=int(state_cache.get('tip',0))
|
||||
min=int(state_cache.get(b'tip',0))
|
||||
max=_max
|
||||
if _max<0 or max>tip:
|
||||
max=tip
|
||||
|
||||
for rev in range(0,max):
|
||||
(revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors)
|
||||
if repo[revnode].hidden():
|
||||
continue
|
||||
mapping_cache[revnode.encode('hex_codec')] = str(rev)
|
||||
ctx=repo[rev]
|
||||
if ctx.hidden():
|
||||
continue
|
||||
mapping_cache[ctx.hex()] = b"%d" % rev
|
||||
|
||||
if submodule_mappings:
|
||||
# Make sure that all submodules are registered in the submodule-mappings file
|
||||
# Make sure that all mercurial submodules are registered in the submodule-mappings file
|
||||
for rev in range(0,max):
|
||||
ctx=revsymbol(repo,str(rev))
|
||||
ctx=repo[rev]
|
||||
if ctx.hidden():
|
||||
continue
|
||||
if ctx.substate:
|
||||
for key in ctx.substate:
|
||||
if key not in submodule_mappings:
|
||||
if ctx.substate[key][2]=='hg' and key not in submodule_mappings:
|
||||
sys.stderr.write("Error: %s not found in submodule-mappings\n" % (key))
|
||||
return 1
|
||||
|
||||
@@ -540,14 +588,14 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
|
||||
brmap={}
|
||||
for rev in range(min,max):
|
||||
c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
|
||||
sob,brmap,hgtags,encoding,fn_encoding,
|
||||
sob,brmap,hgtags,encoding,fn_encoding,first_commit_hash,
|
||||
plugins)
|
||||
if notes:
|
||||
for rev in range(min,max):
|
||||
c=export_note(ui,repo,rev,c,authors, encoding, rev == min and min != 0)
|
||||
|
||||
state_cache['tip']=max
|
||||
state_cache['repo']=repourl
|
||||
state_cache[b'tip']=max
|
||||
state_cache[b'repo']=repourl
|
||||
save_cache(tipfile,state_cache)
|
||||
save_cache(mappingfile,mapping_cache)
|
||||
|
||||
@@ -591,7 +639,9 @@ if __name__=='__main__':
|
||||
parser.add_option("-T","--tags",dest="tagsfile",
|
||||
help="Read tags map from TAGSFILE")
|
||||
parser.add_option("-f","--force",action="store_true",dest="force",
|
||||
default=False,help="Ignore validation errors by force")
|
||||
default=False,help="Ignore validation errors by force, implies --ignore-unnamed-heads")
|
||||
parser.add_option("--ignore-unnamed-heads",action="store_true",dest="ignore_unnamed_heads",
|
||||
default=False,help="Ignore unnamed head errors")
|
||||
parser.add_option("-M","--default-branch",dest="default_branch",
|
||||
help="Set the default branch")
|
||||
parser.add_option("-o","--origin",dest="origin_name",
|
||||
@@ -612,6 +662,8 @@ if __name__=='__main__':
|
||||
help="Add a plugin with the given init string <name=init>")
|
||||
parser.add_option("--subrepo-map", type="string", dest="subrepo_map",
|
||||
help="Provide a mapping file between the subrepository name and the submodule name")
|
||||
parser.add_option("--first-commit-hash", type="string", dest="first_commit_hash",
|
||||
help="Allow importing into an existing git repository by specifying the hash of the first commit")
|
||||
|
||||
(options,args)=parser.parse_args()
|
||||
|
||||
@@ -687,6 +739,9 @@ if __name__=='__main__':
|
||||
sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,
|
||||
options.headsfile, options.statusfile,
|
||||
authors=a,branchesmap=b,tagsmap=t,
|
||||
sob=options.sob,force=options.force,hgtags=options.hgtags,
|
||||
sob=options.sob,force=options.force,
|
||||
ignore_unnamed_heads=options.ignore_unnamed_heads,
|
||||
hgtags=options.hgtags,
|
||||
notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,
|
||||
first_commit_hash=options.first_commit_hash,
|
||||
plugins=plugins_dict))
|
||||
|
||||
@@ -28,29 +28,32 @@ SFX_STATE="state"
|
||||
GFI_OPTS=""
|
||||
|
||||
if [ -z "${PYTHON}" ]; then
|
||||
# $PYTHON is not set, so we try to find a working python 2.7 to
|
||||
# use. PEP 394 tells us to use 'python2', otherwise try plain
|
||||
# 'python'.
|
||||
if command -v python2 > /dev/null; then
|
||||
PYTHON="python2"
|
||||
elif command -v python > /dev/null; then
|
||||
PYTHON="python"
|
||||
else
|
||||
echo "Could not find any python interpreter, please use the 'PYTHON'" \
|
||||
"environment variable to specify the interpreter to use."
|
||||
exit 1
|
||||
fi
|
||||
# $PYTHON is not set, so we try to find a working python with mercurial:
|
||||
for python_cmd in python3 python; do
|
||||
if command -v $python_cmd > /dev/null; then
|
||||
$python_cmd -c 'from mercurial.scmutil import revsymbol' 2> /dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
PYTHON=$python_cmd
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Check that the python specified by the user or autodetected above is
|
||||
# >= 2.7 and < 3.
|
||||
if ! ${PYTHON} -c 'import sys; v=sys.version_info; exit(0 if v.major == 2 and v.minor >= 7 else 1)' > /dev/null 2>&1 ; then
|
||||
echo "${PYTHON} is not a working python 2.7 interpreter, please use the" \
|
||||
"'PYTHON' environment variable to specify the interpreter to use."
|
||||
if [ -z "${PYTHON}" ]; then
|
||||
echo "Could not find a python interpreter with the mercurial module >= 4.6 available. " \
|
||||
"Please use the 'PYTHON' environment variable to specify the interpreter to use."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
USAGE="[--quiet] [-r <repo>] [--force] [-m <max>] [-s] [--hgtags] [-A <file>] [-B <file>] [-T <file>] [-M <name>] [-o <name>] [--hg-hash] [-e <encoding>]"
|
||||
"${PYTHON}" -c 'import sys; exit(sys.version_info.major==3 and sys.version_info.minor >= 7)'
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Could not find an interpreter for a supported Python version (>= 3.7)" \
|
||||
"Please use the 'PYTHON' environment variable to specify the interpreter to use."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
USAGE="[--quiet] [-r <repo>] [--force] [--ignore-unnamed-heads] [-m <max>] [-s] [--hgtags] [-A <file>] [-B <file>] [-T <file>] [-M <name>] [-o <name>] [--hg-hash] [-e <encoding>]"
|
||||
LONG_USAGE="Import hg repository <repo> up to either tip or <max>
|
||||
If <repo> is omitted, use last hg repository as obtained from state file,
|
||||
GIT_DIR/$PFX-$SFX_STATE by default.
|
||||
@@ -84,6 +87,8 @@ Options:
|
||||
with <file-path> <hg-hash> <is-binary> as arguments
|
||||
--plugin <plugin=init> Add a plugin with the given init string (repeatable)
|
||||
--plugin-path <plugin-path> Add an additional plugin lookup path
|
||||
--first-commit-hash <git-commit-hash> Use the given git commit hash as the
|
||||
first commit's parent (for grafting)
|
||||
"
|
||||
case "$1" in
|
||||
-h|--help)
|
||||
@@ -91,6 +96,14 @@ case "$1" in
|
||||
echo ""
|
||||
echo "$LONG_USAGE"
|
||||
exit 0
|
||||
;;
|
||||
|
||||
--debug)
|
||||
echo -n "Using Python: "
|
||||
"${PYTHON}" --version
|
||||
echo -n "Using Mercurial: "
|
||||
hg --version
|
||||
exit 0
|
||||
esac
|
||||
|
||||
IS_BARE=$(git rev-parse --is-bare-repository) \
|
||||
|
||||
69
hg-reset.py
69
hg-reset.py
@@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
|
||||
# License: GPLv2
|
||||
@@ -7,6 +7,7 @@ from mercurial import node
|
||||
from hg2git import setup_repo,load_cache,get_changeset,get_git_sha1
|
||||
from optparse import OptionParser
|
||||
import sys
|
||||
from binascii import hexlify
|
||||
|
||||
def heads(ui,repo,start=None,stop=None,max=None):
|
||||
# this is copied from mercurial/revlog.py and differs only in
|
||||
@@ -24,7 +25,7 @@ def heads(ui,repo,start=None,stop=None,max=None):
|
||||
heads = {startrev: 1}
|
||||
|
||||
parentrevs = repo.changelog.parentrevs
|
||||
for r in xrange(startrev + 1, max):
|
||||
for r in range(startrev + 1, max):
|
||||
for p in parentrevs(r):
|
||||
if p in reachable:
|
||||
if r not in stoprevs:
|
||||
@@ -33,7 +34,7 @@ def heads(ui,repo,start=None,stop=None,max=None):
|
||||
if p in heads and p not in stoprevs:
|
||||
del heads[p]
|
||||
|
||||
return [(repo.changelog.node(r),str(r)) for r in heads]
|
||||
return [(repo.changelog.node(r), b"%d" % r) for r in heads]
|
||||
|
||||
def get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,max):
|
||||
h=heads(ui,repo,max=max)
|
||||
@@ -44,11 +45,11 @@ def get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,max):
|
||||
_,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev)
|
||||
del stale[branch]
|
||||
git_sha1=get_git_sha1(branch)
|
||||
cache_sha1=marks_cache.get(str(int(rev)+1))
|
||||
cache_sha1=marks_cache.get(b"%d" % (int(rev)+1))
|
||||
if git_sha1!=None and git_sha1==cache_sha1:
|
||||
unchanged.append([branch,cache_sha1,rev,desc.split('\n')[0],user])
|
||||
unchanged.append([branch,cache_sha1,rev,desc.split(b'\n')[0],user])
|
||||
else:
|
||||
changed.append([branch,cache_sha1,rev,desc.split('\n')[0],user])
|
||||
changed.append([branch,cache_sha1,rev,desc.split(b'\n')[0],user])
|
||||
changed.sort()
|
||||
unchanged.sort()
|
||||
return stale,changed,unchanged
|
||||
@@ -57,20 +58,20 @@ def get_tags(ui,repo,marks_cache,mapping_cache,max):
|
||||
l=repo.tagslist()
|
||||
good,bad=[],[]
|
||||
for tag,node in l:
|
||||
if tag=='tip': continue
|
||||
rev=int(mapping_cache[node.encode('hex_codec')])
|
||||
cache_sha1=marks_cache.get(str(int(rev)+1))
|
||||
if tag==b'tip': continue
|
||||
rev=int(mapping_cache[hexlify(node)])
|
||||
cache_sha1=marks_cache.get(b"%d" % (int(rev)+1))
|
||||
_,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev)
|
||||
if int(rev)>int(max):
|
||||
bad.append([tag,branch,cache_sha1,rev,desc.split('\n')[0],user])
|
||||
bad.append([tag,branch,cache_sha1,rev,desc.split(b'\n')[0],user])
|
||||
else:
|
||||
good.append([tag,branch,cache_sha1,rev,desc.split('\n')[0],user])
|
||||
good.append([tag,branch,cache_sha1,rev,desc.split(b'\n')[0],user])
|
||||
good.sort()
|
||||
bad.sort()
|
||||
return good,bad
|
||||
|
||||
def mangle_mark(mark):
|
||||
return str(int(mark)-1)
|
||||
return b"%d" % (int(mark)-1)
|
||||
|
||||
if __name__=='__main__':
|
||||
def bail(parser,opt):
|
||||
@@ -107,7 +108,7 @@ if __name__=='__main__':
|
||||
state_cache=load_cache(options.statusfile)
|
||||
mapping_cache = load_cache(options.mappingfile)
|
||||
|
||||
l=int(state_cache.get('tip',options.revision))
|
||||
l=int(state_cache.get(b'tip',options.revision))
|
||||
if options.revision+1>l:
|
||||
sys.stderr.write('Revision is beyond last revision imported: %d>%d\n' % (options.revision,l))
|
||||
sys.exit(1)
|
||||
@@ -117,19 +118,39 @@ if __name__=='__main__':
|
||||
stale,changed,unchanged=get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,options.revision+1)
|
||||
good,bad=get_tags(ui,repo,marks_cache,mapping_cache,options.revision+1)
|
||||
|
||||
print "Possibly stale branches:"
|
||||
map(lambda b: sys.stdout.write('\t%s\n' % b),stale.keys())
|
||||
print("Possibly stale branches:")
|
||||
for b in stale:
|
||||
sys.stdout.write('\t%s\n' % b.decode('utf8'))
|
||||
|
||||
print "Possibly stale tags:"
|
||||
map(lambda b: sys.stdout.write('\t%s on %s (r%s)\n' % (b[0],b[1],b[3])),bad)
|
||||
print("Possibly stale tags:")
|
||||
for b in bad:
|
||||
sys.stdout.write(
|
||||
'\t%s on %s (r%s)\n'
|
||||
% (b[0].decode('utf8'), b[1].decode('utf8'), b[3].decode('utf8'))
|
||||
)
|
||||
|
||||
print "Unchanged branches:"
|
||||
map(lambda b: sys.stdout.write('\t%s (r%s)\n' % (b[0],b[2])),unchanged)
|
||||
print("Unchanged branches:")
|
||||
for b in unchanged:
|
||||
sys.stdout.write('\t%s (r%s)\n' % (b[0].decode('utf8'),b[2].decode('utf8')))
|
||||
|
||||
print "Unchanged tags:"
|
||||
map(lambda b: sys.stdout.write('\t%s on %s (r%s)\n' % (b[0],b[1],b[3])),good)
|
||||
print("Unchanged tags:")
|
||||
for b in good:
|
||||
sys.stdout.write(
|
||||
'\t%s on %s (r%s)\n'
|
||||
% (b[0].decode('utf8'), b[1].decode('utf8'), b[3].decode('utf8'))
|
||||
)
|
||||
|
||||
print "Reset branches in '%s' to:" % options.headsfile
|
||||
map(lambda b: sys.stdout.write('\t:%s %s\n\t\t(r%s: %s: %s)\n' % (b[0],b[1],b[2],b[4],b[3])),changed)
|
||||
print("Reset branches in '%s' to:" % options.headsfile)
|
||||
for b in changed:
|
||||
sys.stdout.write(
|
||||
'\t:%s %s\n\t\t(r%s: %s: %s)\n'
|
||||
% (
|
||||
b[0].decode('utf8'),
|
||||
b[1].decode('utf8'),
|
||||
b[2].decode('utf8'),
|
||||
b[4].decode('utf8'),
|
||||
b[3].decode('utf8'),
|
||||
)
|
||||
)
|
||||
|
||||
print "Reset ':tip' in '%s' to '%d'" % (options.statusfile,options.revision)
|
||||
print("Reset ':tip' in '%s' to '%d'" % (options.statusfile,options.revision))
|
||||
|
||||
19
hg-reset.sh
19
hg-reset.sh
@@ -11,7 +11,24 @@ SFX_MAPPING="mapping"
|
||||
SFX_HEADS="heads"
|
||||
SFX_STATE="state"
|
||||
QUIET=""
|
||||
PYTHON=${PYTHON:-python}
|
||||
|
||||
if [ -z "${PYTHON}" ]; then
|
||||
# $PYTHON is not set, so we try to find a working python with mercurial:
|
||||
for python_cmd in python2 python python3; do
|
||||
if command -v $python_cmd > /dev/null; then
|
||||
$python_cmd -c 'import mercurial' 2> /dev/null
|
||||
if [ $? -eq 0 ]; then
|
||||
PYTHON=$python_cmd
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
if [ -z "${PYTHON}" ]; then
|
||||
echo "Could not find a python interpreter with the mercurial module available. " \
|
||||
"Please use the 'PYTHON'environment variable to specify the interpreter to use."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
USAGE="[-r <repo>] -R <rev>"
|
||||
LONG_USAGE="Print SHA1s of latest changes per branch up to <rev> useful
|
||||
|
||||
85
hg2git.py
85
hg2git.py
@@ -1,11 +1,11 @@
|
||||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
|
||||
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
|
||||
|
||||
from mercurial import hg,util,ui,templatefilters
|
||||
from mercurial import error as hgerror
|
||||
from mercurial.scmutil import revsymbol,binnode
|
||||
from mercurial.scmutil import binnode
|
||||
|
||||
import re
|
||||
import os
|
||||
@@ -13,47 +13,55 @@ import sys
|
||||
import subprocess
|
||||
|
||||
# default git branch name
|
||||
cfg_master='master'
|
||||
cfg_master=b'master'
|
||||
# default origin name
|
||||
origin_name=''
|
||||
origin_name=b''
|
||||
# silly regex to see if user field has email address
|
||||
user_re=re.compile('([^<]+) (<[^>]*>)$')
|
||||
user_re=re.compile(b'([^<]+) (<[^>]*>)$')
|
||||
# silly regex to clean out user names
|
||||
user_clean_re=re.compile('^["]([^"]+)["]$')
|
||||
user_clean_re=re.compile(b'^["]([^"]+)["]$')
|
||||
|
||||
def set_default_branch(name):
|
||||
global cfg_master
|
||||
cfg_master = name
|
||||
cfg_master = name.encode('utf8')
|
||||
|
||||
def set_origin_name(name):
|
||||
global origin_name
|
||||
origin_name = name
|
||||
origin_name = name.encode('utf8')
|
||||
|
||||
def setup_repo(url):
|
||||
try:
|
||||
# Mercurial >= 7.2 requires explicit initialization for largefile
|
||||
# support to work.
|
||||
from mercurial import initialization
|
||||
initialization.init()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
myui=ui.ui(interactive=False)
|
||||
except TypeError:
|
||||
myui=ui.ui()
|
||||
myui.setconfig('ui', 'interactive', 'off')
|
||||
myui.setconfig(b'ui', b'interactive', b'off')
|
||||
# Avoids a warning when the repository has obsolete markers
|
||||
myui.setconfig('experimental', 'evolution.createmarkers', True)
|
||||
return myui,hg.repository(myui,url).unfiltered()
|
||||
myui.setconfig(b'experimental', b'evolution.createmarkers', True)
|
||||
return myui,hg.repository(myui, os.fsencode(url)).unfiltered()
|
||||
|
||||
def fixup_user(user,authors):
|
||||
user=user.strip("\"")
|
||||
user=user.strip(b"\"")
|
||||
if authors!=None:
|
||||
# if we have an authors table, try to get mapping
|
||||
# by defaulting to the current value of 'user'
|
||||
user=authors.get(user,user)
|
||||
name,mail,m='','',user_re.match(user)
|
||||
name,mail,m=b'',b'',user_re.match(user)
|
||||
if m==None:
|
||||
# if we don't have 'Name <mail>' syntax, extract name
|
||||
# and mail from hg helpers. this seems to work pretty well.
|
||||
# if email doesn't contain @, replace it with devnull@localhost
|
||||
name=templatefilters.person(user)
|
||||
mail='<%s>' % templatefilters.email(user)
|
||||
if '@' not in mail:
|
||||
mail = '<devnull@localhost>'
|
||||
mail=b'<%s>' % templatefilters.email(user)
|
||||
if b'@' not in mail:
|
||||
mail = b'<devnull@localhost>'
|
||||
else:
|
||||
# if we have 'Name <mail>' syntax, everything is fine :)
|
||||
name,mail=m.group(1),m.group(2)
|
||||
@@ -62,34 +70,25 @@ def fixup_user(user,authors):
|
||||
m2=user_clean_re.match(name)
|
||||
if m2!=None:
|
||||
name=m2.group(1)
|
||||
return '%s %s' % (name,mail)
|
||||
return b'%s %s' % (name,mail)
|
||||
|
||||
def get_branch(name):
|
||||
# 'HEAD' is the result of a bug in mutt's cvs->hg conversion,
|
||||
# other CVS imports may need it, too
|
||||
if name=='HEAD' or name=='default' or name=='':
|
||||
if name==b'HEAD' or name==b'default' or name==b'':
|
||||
name=cfg_master
|
||||
if origin_name:
|
||||
return origin_name + '/' + name
|
||||
return origin_name + b'/' + name
|
||||
return name
|
||||
|
||||
def get_changeset(ui,repo,revision,authors={},encoding=''):
|
||||
# Starting with Mercurial 4.6 lookup no longer accepts raw hashes
|
||||
# for lookups. Work around it by changing our behaviour depending on
|
||||
# how it fails
|
||||
try:
|
||||
node=repo.lookup(revision)
|
||||
except hgerror.ProgrammingError:
|
||||
node=binnode(revsymbol(repo,str(revision))) # We were given a numeric rev
|
||||
except hgerror.RepoLookupError:
|
||||
node=revision # We got a raw hash
|
||||
(manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node)
|
||||
(manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(revision)
|
||||
if encoding:
|
||||
user=user.decode(encoding).encode('utf8')
|
||||
desc=desc.decode(encoding).encode('utf8')
|
||||
tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60))
|
||||
branch=get_branch(extra.get('branch','master'))
|
||||
return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra)
|
||||
tz=b"%+03d%02d" % (-timezone // 3600, ((-timezone % 3600) // 60))
|
||||
branch=get_branch(extra.get(b'branch', b''))
|
||||
return (manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra)
|
||||
|
||||
def mangle_key(key):
|
||||
return key
|
||||
@@ -98,29 +97,35 @@ def load_cache(filename,get_key=mangle_key):
|
||||
cache={}
|
||||
if not os.path.exists(filename):
|
||||
return cache
|
||||
f=open(filename,'r')
|
||||
f=open(filename,'rb')
|
||||
l=0
|
||||
for line in f.readlines():
|
||||
l+=1
|
||||
fields=line.split(' ')
|
||||
if fields==None or not len(fields)==2 or fields[0][0]!=':':
|
||||
fields=line.split(b' ')
|
||||
if fields==None or not len(fields)==2 or fields[0][0:1]!=b':':
|
||||
sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
|
||||
continue
|
||||
# put key:value in cache, key without ^:
|
||||
cache[get_key(fields[0][1:])]=fields[1].split('\n')[0]
|
||||
cache[get_key(fields[0][1:])]=fields[1].split(b'\n')[0]
|
||||
f.close()
|
||||
return cache
|
||||
|
||||
def save_cache(filename,cache):
|
||||
f=open(filename,'w+')
|
||||
map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys())
|
||||
f=open(filename,'wb')
|
||||
for key, value in cache.items():
|
||||
if not isinstance(key, bytes):
|
||||
key = str(key).encode('utf8')
|
||||
if not isinstance(value, bytes):
|
||||
value = str(value).encode('utf8')
|
||||
f.write(b':%s %s\n' % (key, value))
|
||||
f.close()
|
||||
|
||||
def get_git_sha1(name,type='heads'):
|
||||
try:
|
||||
# use git-rev-parse to support packed refs
|
||||
ref="refs/%s/%s" % (type,name)
|
||||
l=subprocess.check_output(["git", "rev-parse", "--verify", "--quiet", ref])
|
||||
ref="refs/%s/%s" % (type,name.decode('utf8'))
|
||||
l=subprocess.check_output(["git", "rev-parse", "--verify",
|
||||
"--quiet", ref.encode('utf8')])
|
||||
if l == None or len(l) == 0:
|
||||
return None
|
||||
return l[0:40]
|
||||
|
||||
@@ -1,19 +1,23 @@
|
||||
import os
|
||||
import imp
|
||||
import importlib.machinery
|
||||
import importlib.util
|
||||
PluginFolder = os.path.join(os.path.dirname(os.path.realpath(__file__)),"..","plugins")
|
||||
MainModule = "__init__"
|
||||
|
||||
def get_plugin(name, plugin_path):
|
||||
search_dirs = [PluginFolder]
|
||||
search_dirs = [PluginFolder, '.']
|
||||
if plugin_path:
|
||||
search_dirs = [plugin_path] + search_dirs
|
||||
for dir in search_dirs:
|
||||
location = os.path.join(dir, name)
|
||||
if not os.path.isdir(location) or not MainModule + ".py" in os.listdir(location):
|
||||
continue
|
||||
info = imp.find_module(MainModule, [location])
|
||||
return {"name": name, "info": info, "path": location}
|
||||
spec = importlib.machinery.PathFinder.find_spec(MainModule, [location])
|
||||
return {"name": name, "spec": spec, "path": location}
|
||||
raise Exception("Could not find plugin with name " + name)
|
||||
|
||||
def load_plugin(plugin):
|
||||
return imp.load_module(MainModule, *plugin["info"])
|
||||
spec = plugin["spec"]
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
@@ -15,9 +15,11 @@ class Filter:
|
||||
raise ValueError("Unknown args: " + ','.join(args))
|
||||
|
||||
def commit_message_filter(self, commit_data):
|
||||
if not (self.skip_master and commit_data['branch'] == 'master'):
|
||||
if not (self.skip_master and commit_data['branch'] == b'master'):
|
||||
if self.start:
|
||||
sep = ': ' if self.sameline else '\n'
|
||||
sep = b': ' if self.sameline else b'\n'
|
||||
commit_data['desc'] = commit_data['branch'] + sep + commit_data['desc']
|
||||
if self.end:
|
||||
commit_data['desc'] = commit_data['desc'] + '\n' + commit_data['branch']
|
||||
commit_data['desc'] = (
|
||||
commit_data['desc'] + b'\n' + commit_data['branch']
|
||||
)
|
||||
|
||||
@@ -6,6 +6,8 @@ class Filter():
|
||||
pass
|
||||
|
||||
def file_data_filter(self,file_data):
|
||||
if file_data['file_ctx'] == None:
|
||||
return
|
||||
file_ctx = file_data['file_ctx']
|
||||
if not file_ctx.isbinary():
|
||||
file_data['data'] = file_data['data'].replace('\r\n', '\n')
|
||||
file_data['data'] = file_data['data'].replace(b'\r\n', b'\n')
|
||||
|
||||
12
plugins/drop/README.md
Normal file
12
plugins/drop/README.md
Normal file
@@ -0,0 +1,12 @@
|
||||
## Drop commits from output
|
||||
|
||||
To use the plugin, add the command line flag `--plugin drop=<spec>`.
|
||||
The flag can be given multiple times to drop more than one commit.
|
||||
|
||||
The <spec> value can be either
|
||||
|
||||
- a comma-separated list of hg hashes in the full form (40
|
||||
hexadecimal characters) to drop the corresponding changesets, or
|
||||
|
||||
- a regular expression pattern to drop all changesets with matching
|
||||
descriptions.
|
||||
61
plugins/drop/__init__.py
Normal file
61
plugins/drop/__init__.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from __future__ import print_function
|
||||
|
||||
import sys, re
|
||||
|
||||
|
||||
def build_filter(args):
|
||||
if re.match(r'([A-Fa-f0-9]{40}(,|$))+$', args):
|
||||
return RevisionIdFilter(args.split(','))
|
||||
else:
|
||||
return DescriptionFilter(args)
|
||||
|
||||
|
||||
def log(fmt, *args):
|
||||
print(fmt % args, file=sys.stderr)
|
||||
sys.stderr.flush()
|
||||
|
||||
|
||||
class FilterBase(object):
|
||||
def __init__(self):
|
||||
self.remapped_parents = {}
|
||||
|
||||
def commit_message_filter(self, commit_data):
|
||||
rev = commit_data['revision']
|
||||
|
||||
mapping = self.remapped_parents
|
||||
parent_revs = [rp for p in commit_data['parents']
|
||||
for rp in mapping.get(p, [p])]
|
||||
|
||||
commit_data['parents'] = parent_revs
|
||||
|
||||
if self.should_drop_commit(commit_data):
|
||||
log('Dropping revision %i.', rev)
|
||||
|
||||
self.remapped_parents[rev] = parent_revs
|
||||
|
||||
# Head commits cannot be dropped because they have no
|
||||
# children, so detach them to a separate branch.
|
||||
commit_data['branch'] = b'dropped-hg-head'
|
||||
commit_data['parents'] = []
|
||||
|
||||
def should_drop_commit(self, commit_data):
|
||||
return False
|
||||
|
||||
|
||||
class RevisionIdFilter(FilterBase):
|
||||
def __init__(self, revision_hash_list):
|
||||
super(RevisionIdFilter, self).__init__()
|
||||
self.unwanted_hg_hashes = {h.encode('ascii', 'strict')
|
||||
for h in revision_hash_list}
|
||||
|
||||
def should_drop_commit(self, commit_data):
|
||||
return commit_data['hg_hash'] in self.unwanted_hg_hashes
|
||||
|
||||
|
||||
class DescriptionFilter(FilterBase):
|
||||
def __init__(self, pattern):
|
||||
super(DescriptionFilter, self).__init__()
|
||||
self.pattern = re.compile(pattern.encode('ascii', 'strict'))
|
||||
|
||||
def should_drop_commit(self, commit_data):
|
||||
return self.pattern.match(commit_data['desc'])
|
||||
218
plugins/git_lfs_importer/README.md
Normal file
218
plugins/git_lfs_importer/README.md
Normal file
@@ -0,0 +1,218 @@
|
||||
# git_lfs_importer Plugin
|
||||
|
||||
This plugin automatically converts matching files to use Git LFS
|
||||
(Large File Storage) during the Mercurial to Git conversion process.
|
||||
|
||||
## Overview
|
||||
|
||||
The git_lfs_importer plugin intercepts file data during the hg-fast-export
|
||||
process and converts files matching specified patterns into Git LFS pointers.
|
||||
This allows you to seamlessly migrate a Mercurial repository to Git while
|
||||
simultaneously adopting LFS for large files.
|
||||
|
||||
Why use git_lfs_importer?
|
||||
For large repositories, traditional migration requires two sequential,
|
||||
long-running steps:
|
||||
|
||||
1. Full history conversion from Mercurial to Git.
|
||||
2. Full history rewrite using git lfs import.
|
||||
|
||||
This two-step process can take hours or even days for massive
|
||||
monorepos (e.g., 100GiB+).
|
||||
|
||||
This plugin eliminates the second, time-consuming history rewrite. It performs
|
||||
the LFS conversion incrementally (Just-In-Time). During the initial export, the
|
||||
plugin identifies large files and immediately writes LFS pointers into the Git
|
||||
history. This results in significantly faster conversions and allows for
|
||||
efficient incremental imports of new changesets.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### Dependencies
|
||||
|
||||
This plugin requires the `pathspec` package:
|
||||
|
||||
```bash
|
||||
pip install pathspec
|
||||
```
|
||||
|
||||
### Git Repository Setup
|
||||
|
||||
The destination Git repository must be pre-initialized with:
|
||||
|
||||
1. A `.gitattributes` file configured for LFS tracking
|
||||
2. Git LFS properly installed and initialized
|
||||
|
||||
Example `.gitattributes`:
|
||||
```
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||
large_files/** filter=lfs diff=lfs merge=lfs -text
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Step 1: Create the Destination Git Repository
|
||||
|
||||
```bash
|
||||
# Create a new git repository
|
||||
git init my-repo
|
||||
cd my-repo
|
||||
|
||||
# Initialize Git LFS
|
||||
git lfs install
|
||||
|
||||
# Create and commit a .gitattributes file
|
||||
cat > .gitattributes << EOF
|
||||
*.bin binary diff=lfs merge=lfs -text
|
||||
*.iso binary diff=lfs merge=lfs -text
|
||||
EOF
|
||||
git add .gitattributes
|
||||
git commit -m "Initialize Git LFS configuration"
|
||||
|
||||
# Get the commit hash (needed for --first-commit-hash)
|
||||
git rev-parse HEAD
|
||||
```
|
||||
|
||||
### Step 2: Create an LFS Specification File
|
||||
|
||||
Create a file (e.g., `lfs-spec.txt`) listing the patterns of files to convert
|
||||
to LFS. This uses gitignore-style glob patterns:
|
||||
|
||||
```
|
||||
*.bin
|
||||
*.iso
|
||||
*.tar.gz
|
||||
large_files/**
|
||||
*.mp4
|
||||
```
|
||||
|
||||
### Step 3: Run hg-fast-export with the Plugin
|
||||
|
||||
```bash
|
||||
hg-fast-export.sh \
|
||||
-r <mercurial-repo-path> \
|
||||
--plugin git_lfs_importer=lfs-spec.txt \
|
||||
--first-commit-hash <git-commit-hash> \
|
||||
--force
|
||||
```
|
||||
|
||||
Replace `<git-commit-hash>` with the hash obtained from Step 1.
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Pattern Matching**: Files are matched against patterns in the
|
||||
LFS specification file using gitignore-style matching
|
||||
2. **File Processing**: For each matching file:
|
||||
- Calculates SHA256 hash of the file content
|
||||
- Stores the actual file content in `.git/lfs/objects/<hash-prefix>/<hash>`
|
||||
- Replaces the file data with an LFS pointer containing:
|
||||
- LFS version specification
|
||||
- SHA256 hash of the original content
|
||||
- Original file size
|
||||
3. **Git Fast-Import**: The LFS pointer is committed instead of the actual
|
||||
file content
|
||||
|
||||
## Important Notes
|
||||
|
||||
### First Commit Hash Requirement
|
||||
|
||||
The `--first-commit-hash` option must be provided with the Git commit hash that
|
||||
contains your `.gitattributes` file. This allows the plugin to chain from the
|
||||
existing Git history rather than creating a completely new history.
|
||||
|
||||
### Deletions
|
||||
|
||||
The plugin safely handles file deletions (data=None) and does not process them.
|
||||
|
||||
### Large Files and Largefiles
|
||||
|
||||
If the Mercurial repository uses Mercurial's largefiles extension, those files
|
||||
are already converted to their original content before reaching this plugin,
|
||||
allowing the plugin to apply LFS conversion if they match the patterns.
|
||||
|
||||
## Example Workflow
|
||||
|
||||
```bash
|
||||
# Configuration variables
|
||||
HG_REPO=/path/to/mercurial/repo
|
||||
GIT_DIR_NAME=my-project-git
|
||||
LFS_PATTERN_FILE=../lfs-patterns.txt
|
||||
|
||||
# 1. Prepare destination git repo
|
||||
mkdir "$GIT_DIR_NAME"
|
||||
cd "$GIT_DIR_NAME"
|
||||
git init
|
||||
git lfs install
|
||||
|
||||
# Create .gitattributes
|
||||
cat > .gitattributes << EOF
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes
|
||||
git commit -m "Add LFS configuration"
|
||||
FIRST_HASH=$(git rev-parse HEAD)
|
||||
|
||||
# 2. Create LFS patterns file
|
||||
cat > "$LFS_PATTERN_FILE" << EOF
|
||||
*.bin
|
||||
*.iso
|
||||
build/artifacts/**
|
||||
EOF
|
||||
|
||||
# 3. Run conversion
|
||||
/path/to/hg-fast-export.sh \
|
||||
-r "$HG_REPO" \
|
||||
--plugin "git_lfs_importer=$LFS_PATTERN_FILE" \
|
||||
--first-commit-hash $FIRST_HASH \
|
||||
--force
|
||||
|
||||
# 4. Verify
|
||||
git log --oneline
|
||||
git lfs ls-files
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### LFS Files Not Tracked
|
||||
Verify that:
|
||||
- The `.gitattributes` file exists in the destination repository
|
||||
- Patterns in `.gitattributes` match the files being converted
|
||||
- `git lfs install` was run in the repository
|
||||
|
||||
### "pathspec" Module Not Found
|
||||
Install the required dependency:
|
||||
```bash
|
||||
pip install pathspec
|
||||
```
|
||||
|
||||
### Conversion Fails at Import
|
||||
Ensure the `--first-commit-hash` value is:
|
||||
- A valid commit hash in the destination repository
|
||||
- From a commit that exists before the conversion starts
|
||||
- The hash of the commit containing `.gitattributes`
|
||||
|
||||
|
||||
### Force Requirement
|
||||
|
||||
You only need to pass the `--force` option when converting the *first*
|
||||
Mercurial commit into a non-empty Git repository. By default, `hg-fast-export`
|
||||
prevents importing Mercurial commits onto a non-empty Git repo to avoid
|
||||
creating conflicting histories. Passing `--force` overrides that safety check
|
||||
and allows the exporter to write the LFS pointer objects and integrate the
|
||||
converted data with the existing Git history.
|
||||
|
||||
If you are doing an incremental conversion (i.e., running the script a second
|
||||
time to import new changesets into an already converted repository),
|
||||
the --force flag is not required.
|
||||
|
||||
Omitting `--force` when attempting to import the first Mercurial commit into a
|
||||
non-empty repository will cause the importer to refuse the operation.
|
||||
|
||||
## See Also
|
||||
|
||||
- [Git LFS Documentation](https://git-lfs.github.com/)
|
||||
- [gitignore Pattern Format](https://git-scm.com/docs/gitignore)
|
||||
- [hg-fast-export Documentation](../README.md)
|
||||
49
plugins/git_lfs_importer/__init__.py
Normal file
49
plugins/git_lfs_importer/__init__.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import pathlib
|
||||
import hashlib
|
||||
import pathspec
|
||||
|
||||
|
||||
def build_filter(args):
|
||||
with open(args) as f:
|
||||
lfs_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, f)
|
||||
return Filter(lfs_spec)
|
||||
|
||||
|
||||
class Filter:
|
||||
def __init__(self, lfs_spec):
|
||||
self.lfs_spec = lfs_spec
|
||||
|
||||
def file_data_filter(self, file_data):
|
||||
"""
|
||||
file_data: {
|
||||
'filename': <str>,
|
||||
'file_ctx': <mercurial.filectx or None>,
|
||||
'data': <bytes or None>,
|
||||
'is_largefile': <bool>
|
||||
}
|
||||
|
||||
May be called for deletions (data=None, file_ctx=None).
|
||||
"""
|
||||
filename = file_data.get('filename')
|
||||
data = file_data.get('data')
|
||||
|
||||
# Skip deletions or filtered files early
|
||||
if data is None or not self.lfs_spec.match_file(filename.decode("utf-8")):
|
||||
return
|
||||
|
||||
# Get the file path
|
||||
sha256hash = hashlib.sha256(data).hexdigest()
|
||||
lfs_path = pathlib.Path(f".git/lfs/objects/{sha256hash[0:2]}/{sha256hash[2:4]}")
|
||||
lfs_path.mkdir(parents=True, exist_ok=True)
|
||||
lfs_file_path = lfs_path / sha256hash
|
||||
|
||||
# The binary blob is already in LFS
|
||||
if not lfs_file_path.is_file():
|
||||
(lfs_path / sha256hash).write_bytes(data)
|
||||
|
||||
# Write the LFS pointer
|
||||
file_data['data'] = (
|
||||
f"version https://git-lfs.github.com/spec/v1\n"
|
||||
f"oid sha256:{sha256hash}\n"
|
||||
f"size {len(data)}\n"
|
||||
).encode("utf-8")
|
||||
13
plugins/head2branch/README.md
Normal file
13
plugins/head2branch/README.md
Normal file
@@ -0,0 +1,13 @@
|
||||
## Convert Head to Branch
|
||||
|
||||
`fast-export` can only handle one head per branch. This plugin makes it possible
|
||||
to create a new branch from a head by specifying the new branch name and
|
||||
the first divergent commit for that head.
|
||||
|
||||
Note: the hg hash must be in the full form, 40 hexadecimal characters.
|
||||
|
||||
Note: you must run `fast-export` with `--ignore-unnamed-heads` option,
|
||||
otherwise, the conversion will fail.
|
||||
|
||||
To use the plugin, add the command line flag `--plugin head2branch=name,<hg_hash>`.
|
||||
The flag can be given multiple times to name more than one head.
|
||||
24
plugins/head2branch/__init__.py
Normal file
24
plugins/head2branch/__init__.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import sys
|
||||
|
||||
def build_filter(args):
|
||||
return Filter(args)
|
||||
|
||||
class Filter:
|
||||
|
||||
def __init__(self, args):
|
||||
args = args.split(',')
|
||||
self.branch_name = args[0].encode('ascii', 'replace')
|
||||
self.starting_commit_hash = args[1].encode('ascii', 'strict')
|
||||
self.branch_parents = set()
|
||||
|
||||
def commit_message_filter(self, commit_data):
|
||||
hg_hash = commit_data['hg_hash']
|
||||
rev = commit_data['revision']
|
||||
rev_parents = commit_data['parents']
|
||||
if (hg_hash == self.starting_commit_hash
|
||||
or any(rp in self.branch_parents for rp in rev_parents)
|
||||
):
|
||||
self.branch_parents.add(rev)
|
||||
commit_data['branch'] = self.branch_name
|
||||
sys.stderr.write('\nchanging r%s to branch %r\n' % (rev, self.branch_name))
|
||||
sys.stderr.flush()
|
||||
@@ -7,9 +7,11 @@ def build_filter(args):
|
||||
|
||||
class Filter:
|
||||
def __init__(self, args):
|
||||
if not isinstance(args, bytes):
|
||||
args = args.encode('utf8')
|
||||
self.prefix = args
|
||||
|
||||
def commit_message_filter(self, commit_data):
|
||||
for match in re.findall('#[1-9][0-9]+', commit_data['desc']):
|
||||
for match in re.findall(b'#[1-9][0-9]+', commit_data['desc']):
|
||||
commit_data['desc'] = commit_data['desc'].replace(
|
||||
match, '#%s%s' % (self.prefix, match[1:]))
|
||||
match, b'#%s%s' % (self.prefix, match[1:]))
|
||||
|
||||
@@ -4,13 +4,13 @@ def build_filter(args):
|
||||
class Filter:
|
||||
def __init__(self, args):
|
||||
if args == '':
|
||||
message = '<empty commit message>'
|
||||
message = b'<empty commit message>'
|
||||
else:
|
||||
message = args
|
||||
message = args.encode('utf8')
|
||||
self.message = message
|
||||
|
||||
def commit_message_filter(self,commit_data):
|
||||
# Only write the commit message if the recorded commit
|
||||
# message is null.
|
||||
if commit_data['desc'] == '\x00':
|
||||
if commit_data['desc'] == b'\x00':
|
||||
commit_data['desc'] = self.message
|
||||
|
||||
@@ -15,6 +15,8 @@ class Filter:
|
||||
d = file_data['data']
|
||||
file_ctx = file_data['file_ctx']
|
||||
filename = file_data['filename']
|
||||
if file_ctx == None:
|
||||
return
|
||||
filter_cmd = self.filter_contents + [filename, node.hex(file_ctx.filenode()), '1' if file_ctx.isbinary() else '0']
|
||||
try:
|
||||
filter_proc = subprocess.Popen(filter_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
|
||||
1
t/.gitignore
vendored
Normal file
1
t/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/test-results/
|
||||
12
t/Makefile
Normal file
12
t/Makefile
Normal file
@@ -0,0 +1,12 @@
|
||||
T = $(wildcard *.t)
|
||||
|
||||
test: $(T)
|
||||
@$(MAKE) --silent clean
|
||||
|
||||
$(T): clean
|
||||
./$@ $(TEST_OPTS)
|
||||
|
||||
clean:
|
||||
@rm -fr test-results
|
||||
|
||||
.PHONY: test $(T) clean
|
||||
30
t/file_data_filter-removefiles.expected
Normal file
30
t/file_data_filter-removefiles.expected
Normal file
@@ -0,0 +1,30 @@
|
||||
blob
|
||||
mark :1
|
||||
data 7
|
||||
good_a
|
||||
|
||||
reset refs/heads/master
|
||||
commit refs/heads/master
|
||||
mark :2
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||
data 3
|
||||
r0
|
||||
M 100644 :1 good_a.txt
|
||||
|
||||
commit refs/heads/master
|
||||
mark :3
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
|
||||
data 3
|
||||
r1
|
||||
from :2
|
||||
|
||||
commit refs/heads/master
|
||||
mark :4
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679022000 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679022000 +0000
|
||||
data 3
|
||||
r2
|
||||
from :3
|
||||
|
||||
91
t/file_data_filter-removefiles.t
Executable file
91
t/file_data_filter-removefiles.t
Executable file
@@ -0,0 +1,91 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Felipe Contreras
|
||||
# Copyright (c) 2023 Frej Drejhammar
|
||||
# Copyright (c) 2024 Stephan Hohe
|
||||
#
|
||||
# Check that files that file_data_filter sets to None are removed from repository
|
||||
#
|
||||
|
||||
test_description='Remove files from file_data_filter plugin test'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
check() {
|
||||
echo "$3" > expected &&
|
||||
git -C "$1" show -q --format='%s' "$2" > actual &&
|
||||
test_cmp expected actual
|
||||
}
|
||||
|
||||
git_create() {
|
||||
git init -q "$1" &&
|
||||
git -C "$1" config core.ignoreCase false
|
||||
}
|
||||
|
||||
git_convert() {
|
||||
(
|
||||
cd "$2" &&
|
||||
hg-fast-export.sh --repo "../$1" \
|
||||
-s --hgtags -n \
|
||||
--plugin ../../plugins/removefiles_test_plugin
|
||||
)
|
||||
}
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = Grevious Bodily Harmsworth <gbh@example.com>
|
||||
EOF
|
||||
}
|
||||
|
||||
commit0() {
|
||||
(
|
||||
# Test inital revision with suppressed file
|
||||
cd hgrepo &&
|
||||
echo "good_a" > good_a.txt &&
|
||||
echo "bad_a" > bad_a.txt &&
|
||||
hg add good_a.txt bad_a.txt &&
|
||||
hg commit -d "2023-03-17 01:00Z" -m "r0"
|
||||
)
|
||||
}
|
||||
|
||||
commit1() {
|
||||
(
|
||||
# Test modifying suppressed file
|
||||
# Test adding suppressed file
|
||||
cd hgrepo &&
|
||||
echo "bad_a_modif" > bad_a.txt &&
|
||||
echo "bad_b" > bad_b.txt &&
|
||||
hg add bad_b.txt &&
|
||||
hg commit -d "2023-03-17 02:00Z" -m "r1"
|
||||
)
|
||||
}
|
||||
|
||||
commit2() {
|
||||
(
|
||||
# Test removing suppressed file
|
||||
cd hgrepo &&
|
||||
hg rm bad_a.txt &&
|
||||
hg commit -d "2023-03-17 03:00Z" -m "r2"
|
||||
)
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'all in one' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||
|
||||
(
|
||||
hg init hgrepo &&
|
||||
commit0 &&
|
||||
commit1 &&
|
||||
commit2
|
||||
) &&
|
||||
git_create gitrepo &&
|
||||
git_convert hgrepo gitrepo &&
|
||||
git -C gitrepo fast-export --all > actual &&
|
||||
|
||||
test_cmp "$SHARNESS_TEST_DIRECTORY"/file_data_filter-removefiles.expected actual
|
||||
'
|
||||
|
||||
test_done
|
||||
29
t/file_data_filter.expected
Normal file
29
t/file_data_filter.expected
Normal file
@@ -0,0 +1,29 @@
|
||||
blob
|
||||
mark :1
|
||||
data 7
|
||||
a_file
|
||||
|
||||
blob
|
||||
mark :2
|
||||
data 17
|
||||
a_file_to_rename
|
||||
|
||||
reset refs/heads/master
|
||||
commit refs/heads/master
|
||||
mark :3
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||
data 3
|
||||
r0
|
||||
M 100644 :1 a.txt
|
||||
M 100644 :2 c.txt
|
||||
|
||||
commit refs/heads/master
|
||||
mark :4
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
|
||||
data 3
|
||||
r1
|
||||
from :3
|
||||
D c.txt
|
||||
|
||||
84
t/file_data_filter.t
Executable file
84
t/file_data_filter.t
Executable file
@@ -0,0 +1,84 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Felipe Contreras
|
||||
# Copyright (c) 2023 Frej Drejhammar
|
||||
#
|
||||
# Check that the file_data_filter is called for removed files.
|
||||
#
|
||||
|
||||
test_description='Smoke test'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
check() {
|
||||
echo "$3" > expected &&
|
||||
git -C "$1" show -q --format='%s' "$2" > actual &&
|
||||
test_cmp expected actual
|
||||
}
|
||||
|
||||
git_create() {
|
||||
git init -q "$1" &&
|
||||
git -C "$1" config core.ignoreCase false
|
||||
}
|
||||
|
||||
git_convert() {
|
||||
(
|
||||
cd "$2" &&
|
||||
hg-fast-export.sh --repo "../$1" \
|
||||
-s --hgtags -n \
|
||||
--plugin ../../plugins/rename_file_test_plugin \
|
||||
--plugin dos2unix \
|
||||
--plugin shell_filter_file_contents=../../plugins/id
|
||||
)
|
||||
}
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = Grevious Bodily Harmsworth <gbh@example.com>
|
||||
EOF
|
||||
}
|
||||
|
||||
commit0() {
|
||||
(
|
||||
cd hgrepo &&
|
||||
echo "a_file" > a.txt &&
|
||||
echo "a_file_to_rename" > b.txt &&
|
||||
hg add a.txt b.txt &&
|
||||
hg commit -d "2023-03-17 01:00Z" -m "r0"
|
||||
)
|
||||
}
|
||||
|
||||
commit1() {
|
||||
(
|
||||
cd hgrepo &&
|
||||
hg remove b.txt &&
|
||||
hg commit -d "2023-03-17 02:00Z" -m "r1"
|
||||
)
|
||||
}
|
||||
make-branch() {
|
||||
hg branch "$1"
|
||||
FILE=$(echo "$1" | sha1sum | cut -d " " -f 1)
|
||||
echo "$1" > $FILE
|
||||
hg add $FILE
|
||||
hg commit -d "2023-03-17 $2:00Z" -m "Added file in branch $1"
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'all in one' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||
|
||||
(
|
||||
hg init hgrepo &&
|
||||
commit0 &&
|
||||
commit1
|
||||
) &&
|
||||
git_create gitrepo &&
|
||||
git_convert hgrepo gitrepo &&
|
||||
git -C gitrepo fast-export --all > actual &&
|
||||
|
||||
test_cmp "$SHARNESS_TEST_DIRECTORY"/file_data_filter.expected actual
|
||||
'
|
||||
|
||||
test_done
|
||||
117
t/first_commit_hash_option.t
Executable file
117
t/first_commit_hash_option.t
Executable file
@@ -0,0 +1,117 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2025
|
||||
#
|
||||
|
||||
test_description='git_lfs_importer plugin integration tests'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = Test User <test@example.com>
|
||||
EOF
|
||||
|
||||
# Git config for the destination repo commits
|
||||
git config --global user.email "test@example.com"
|
||||
git config --global user.name "Test User"
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'Mercurial history is imported over the provided commit' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source Mercurial repository with binary files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "regular text file" > readme.txt &&
|
||||
hg add readme.txt &&
|
||||
hg commit -m "initial commit"
|
||||
) &&
|
||||
|
||||
# 2. Prepare destination git repo with LFS setup
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
git switch --create master &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
* -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Run hg-fast-export
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--first-commit-hash "$FIRST_HASH" --force \
|
||||
-M master
|
||||
) &&
|
||||
|
||||
# 4. Verify git file is still present
|
||||
git -C gitrepo show HEAD:.gitattributes > gitattributes_check.txt &&
|
||||
test "$(cat gitattributes_check.txt)" = "* -text" &&
|
||||
|
||||
# 5. Verify hg file is imported
|
||||
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||
test "$(cat readme_check.txt)" = "regular text file"
|
||||
'
|
||||
|
||||
test_expect_success 'Mercurial history has priority over git' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source Mercurial repository with binary files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "hg readme file" > readme.txt &&
|
||||
hg add readme.txt &&
|
||||
hg commit -m "initial commit"
|
||||
) &&
|
||||
|
||||
# 2. Prepare destination git repo with LFS setup
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
git switch --create master &&
|
||||
|
||||
cat > readme.txt <<-EOF &&
|
||||
git readme file
|
||||
EOF
|
||||
|
||||
git add readme.txt &&
|
||||
git commit -q -m "Initialize Git readme file"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Run hg-fast-export
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--first-commit-hash "$FIRST_HASH" --force \
|
||||
-M master
|
||||
) &&
|
||||
|
||||
# 5. Verify hg file is imported
|
||||
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||
test "$(cat readme_check.txt)" = "hg readme file"
|
||||
'
|
||||
|
||||
test_done
|
||||
189
t/git_lfs_importer_plugin.t
Executable file
189
t/git_lfs_importer_plugin.t
Executable file
@@ -0,0 +1,189 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2025
|
||||
#
|
||||
|
||||
test_description='git_lfs_importer plugin integration tests'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = Test User <test@example.com>
|
||||
EOF
|
||||
|
||||
# Git config for the destination repo commits
|
||||
git config --global user.email "test@example.com"
|
||||
git config --global user.name "Test User"
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'git_lfs_importer converts matched binary files to LFS pointers and pointers are properly smudged when checkouting' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source Mercurial repository with binary files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "regular text file" > readme.txt &&
|
||||
echo "binary payload" > payload.bin &&
|
||||
hg add readme.txt payload.bin &&
|
||||
hg commit -m "initial commit with binary"
|
||||
) &&
|
||||
|
||||
# 2. Prepare destination git repo with LFS setup
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git LFS configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Create LFS patterns file
|
||||
cat > lfs-patterns.txt <<-EOF &&
|
||||
*.bin
|
||||
EOF
|
||||
|
||||
# 4. Run hg-fast-export with git_lfs_importer plugin
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||
--first-commit-hash "$FIRST_HASH" --force
|
||||
) &&
|
||||
|
||||
# 5. Verify conversion: payload.bin should be an LFS pointer
|
||||
git -C gitrepo show HEAD:payload.bin > lfs_pointer.txt &&
|
||||
grep -q "version https://git-lfs.github.com/spec/v1" lfs_pointer.txt &&
|
||||
grep -q "oid sha256:" lfs_pointer.txt &&
|
||||
grep -q "size" lfs_pointer.txt &&
|
||||
|
||||
# 6. Verify non-matched file is unchanged
|
||||
git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
|
||||
test "$(cat readme_check.txt)" = "regular text file" &&
|
||||
|
||||
# 7. Make sure the LFS pointer file is unsmeared when checked out
|
||||
git -C gitrepo reset --hard HEAD &&
|
||||
ls gitrepo &&
|
||||
test "$(cat gitrepo/payload.bin)" = "binary payload"
|
||||
'
|
||||
|
||||
test_expect_success 'git_lfs_importer skips files not matching patterns' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create source with various files
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "text" > file.txt &&
|
||||
echo "data" > file.dat &&
|
||||
echo "iso content" > image.iso &&
|
||||
hg add . &&
|
||||
hg commit -m "multiple files"
|
||||
) &&
|
||||
|
||||
# 2. Prepare git repo with LFS
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
*.iso filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git LFS configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Only .iso files should be converted
|
||||
cat > lfs-patterns.txt <<-EOF &&
|
||||
*.iso
|
||||
EOF
|
||||
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||
--first-commit-hash "$FIRST_HASH" --force
|
||||
) &&
|
||||
|
||||
# 4. Verify .iso is LFS pointer
|
||||
git -C gitrepo show HEAD:image.iso | grep -q "oid sha256:" &&
|
||||
|
||||
# 5. Verify .txt and .dat are unchanged
|
||||
test "$(git -C gitrepo show HEAD:file.txt)" = "text" &&
|
||||
test "$(git -C gitrepo show HEAD:file.dat)" = "data"
|
||||
'
|
||||
|
||||
test_expect_success 'git_lfs_importer handles directory patterns' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
|
||||
|
||||
# 1. Create repo with files in directory
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
mkdir -p assets/images &&
|
||||
echo "logo data" > assets/images/logo.bin &&
|
||||
echo "regular" > readme.txt &&
|
||||
hg add . &&
|
||||
hg commit -m "files in directories"
|
||||
) &&
|
||||
|
||||
# 2. Prepare git repo
|
||||
mkdir gitrepo &&
|
||||
(
|
||||
cd gitrepo &&
|
||||
git init -q &&
|
||||
git config core.ignoreCase false &&
|
||||
git lfs install --local &&
|
||||
|
||||
cat > .gitattributes <<-EOF &&
|
||||
assets/** filter=lfs diff=lfs merge=lfs -text
|
||||
EOF
|
||||
|
||||
git add .gitattributes &&
|
||||
git commit -q -m "Initialize Git LFS configuration"
|
||||
) &&
|
||||
|
||||
FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
|
||||
|
||||
# 3. Match directory pattern
|
||||
cat > lfs-patterns.txt <<-EOF &&
|
||||
assets/**
|
||||
EOF
|
||||
|
||||
(
|
||||
cd gitrepo &&
|
||||
hg-fast-export.sh \
|
||||
-r "../hgrepo" \
|
||||
--plugin "git_lfs_importer=../lfs-patterns.txt" \
|
||||
--first-commit-hash "$FIRST_HASH" --force
|
||||
) &&
|
||||
|
||||
# 4. Verify directory file is converted
|
||||
git -C gitrepo show HEAD:assets/images/logo.bin | grep -q "oid sha256:" &&
|
||||
|
||||
# 5. Verify file outside directory is unchanged
|
||||
test "$(git -C gitrepo show HEAD:readme.txt)" = "regular"
|
||||
'
|
||||
|
||||
test_done
|
||||
20
t/largefile_plugin.expected
Normal file
20
t/largefile_plugin.expected
Normal file
@@ -0,0 +1,20 @@
|
||||
blob
|
||||
mark :1
|
||||
data 7
|
||||
a_file
|
||||
|
||||
blob
|
||||
mark :2
|
||||
data 6
|
||||
large
|
||||
|
||||
reset refs/heads/master
|
||||
commit refs/heads/master
|
||||
mark :3
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||
data 3
|
||||
r0
|
||||
M 100644 :1 a.txt
|
||||
M 100644 :2 b.txt
|
||||
|
||||
69
t/largefile_plugin.t
Executable file
69
t/largefile_plugin.t
Executable file
@@ -0,0 +1,69 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Felipe Contreras
|
||||
# Copyright (c) 2023 Frej Drejhammar
|
||||
# Copyright (c) 2025 Günther Nußmüller
|
||||
#
|
||||
# Check that plugin invocation works with largefiles.
|
||||
# This test uses the echo_file_data_test_plugin to verify that the
|
||||
# file data is passed correctly, including the largefile status.
|
||||
#
|
||||
|
||||
test_description='Largefiles and plugin test'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
|
||||
git_create() {
|
||||
git init -q "$1" &&
|
||||
git -C "$1" config core.ignoreCase false
|
||||
}
|
||||
|
||||
git_convert() {
|
||||
(
|
||||
cd "$2" &&
|
||||
hg-fast-export.sh --repo "../$1" \
|
||||
-s --hgtags -n \
|
||||
--plugin ../../plugins/echo_file_data_test_plugin
|
||||
)
|
||||
}
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = Grevious Bodily Harmsworth <gbh@example.com>
|
||||
[extensions]
|
||||
largefiles =
|
||||
EOF
|
||||
}
|
||||
|
||||
commit0() {
|
||||
(
|
||||
cd hgrepo &&
|
||||
echo "a_file" > a.txt &&
|
||||
echo "large" > b.txt
|
||||
hg add a.txt &&
|
||||
hg add --large b.txt &&
|
||||
hg commit -d "2023-03-17 01:00Z" -m "r0"
|
||||
)
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'largefile and plugin' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||
|
||||
(
|
||||
hg init hgrepo &&
|
||||
commit0
|
||||
) &&
|
||||
git_create gitrepo &&
|
||||
git_convert hgrepo gitrepo &&
|
||||
|
||||
git -C gitrepo fast-export --all > actual &&
|
||||
|
||||
test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin.expected actual &&
|
||||
test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin_file_info.expected gitrepo/largefile_info.txt
|
||||
'
|
||||
|
||||
test_done
|
||||
12
t/largefile_plugin_file_info.expected
Normal file
12
t/largefile_plugin_file_info.expected
Normal file
@@ -0,0 +1,12 @@
|
||||
filename: b'b.txt'
|
||||
data size: 6 bytes
|
||||
ctx rev: 0
|
||||
ctx binary: False
|
||||
is largefile: True
|
||||
|
||||
filename: b'a.txt'
|
||||
data size: 7 bytes
|
||||
ctx rev: 0
|
||||
ctx binary: False
|
||||
is largefile: False
|
||||
|
||||
144
t/main.t
Executable file
144
t/main.t
Executable file
@@ -0,0 +1,144 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Felipe Contreras
|
||||
#
|
||||
|
||||
test_description='Main tests'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
check() {
|
||||
echo "$3" > expected &&
|
||||
git -C "$1" show -q --format='%s' "$2" > actual &&
|
||||
test_cmp expected actual
|
||||
}
|
||||
|
||||
git_clone() {
|
||||
(
|
||||
git init -q "$2" &&
|
||||
cd "$2" &&
|
||||
git config core.ignoreCase false &&
|
||||
hg-fast-export.sh --repo "../$1"
|
||||
)
|
||||
}
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = H G Wells <wells@example.com>
|
||||
EOF
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'basic' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo zero > content &&
|
||||
hg add content &&
|
||||
hg commit -m zero
|
||||
) &&
|
||||
|
||||
git_clone hgrepo gitrepo &&
|
||||
check gitrepo @ zero
|
||||
'
|
||||
|
||||
test_expect_success 'merge' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo a > content &&
|
||||
echo a > file1 &&
|
||||
hg add content file1 &&
|
||||
hg commit -m "origin" &&
|
||||
|
||||
echo b > content &&
|
||||
echo b > file2 &&
|
||||
hg add file2 &&
|
||||
hg rm file1 &&
|
||||
hg commit -m "right" &&
|
||||
|
||||
hg update -r0 &&
|
||||
echo c > content &&
|
||||
hg commit -m "left" &&
|
||||
|
||||
HGMERGE=true hg merge -r1 &&
|
||||
hg commit -m "merge"
|
||||
) &&
|
||||
|
||||
git_clone hgrepo gitrepo &&
|
||||
|
||||
cat > expected <<-EOF &&
|
||||
left
|
||||
c
|
||||
tree @:
|
||||
|
||||
content
|
||||
file2
|
||||
EOF
|
||||
|
||||
(
|
||||
cd gitrepo
|
||||
git show -q --format='%s' @^ &&
|
||||
git show @:content &&
|
||||
git show @:
|
||||
) > actual &&
|
||||
|
||||
test_cmp expected actual
|
||||
'
|
||||
|
||||
test_expect_success 'hg large file' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo "[extensions]" >> .hg/hgrc
|
||||
echo "largefiles =" >> .hg/hgrc
|
||||
echo a > content &&
|
||||
echo a > file1 &&
|
||||
hg add content &&
|
||||
hg add --large file1 &&
|
||||
hg commit -m "origin" &&
|
||||
|
||||
echo b > content &&
|
||||
echo b > file2 &&
|
||||
hg add --large file2 &&
|
||||
hg rm file1 &&
|
||||
hg commit -m "right" &&
|
||||
|
||||
hg update -r0 &&
|
||||
echo c > content &&
|
||||
hg commit -m "left" &&
|
||||
|
||||
HGMERGE=true hg merge -r1 &&
|
||||
hg commit -m "merge"
|
||||
) &&
|
||||
|
||||
git_clone hgrepo gitrepo &&
|
||||
|
||||
cat > expected <<-EOF &&
|
||||
left
|
||||
c
|
||||
tree @:
|
||||
|
||||
content
|
||||
file2
|
||||
EOF
|
||||
|
||||
(
|
||||
cd gitrepo
|
||||
git show -q --format='%s' @^ &&
|
||||
git show @:content &&
|
||||
git show @:
|
||||
) > actual &&
|
||||
|
||||
test_cmp expected actual
|
||||
'
|
||||
|
||||
test_done
|
||||
18
t/plugins/echo_file_data_test_plugin/__init__.py
Normal file
18
t/plugins/echo_file_data_test_plugin/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
import sys
|
||||
from mercurial import node
|
||||
|
||||
def build_filter(args):
|
||||
return Filter(args)
|
||||
|
||||
class Filter:
|
||||
def __init__(self, _):
|
||||
pass
|
||||
|
||||
def file_data_filter(self,file_data):
|
||||
with open('largefile_info.txt', 'a') as f:
|
||||
f.write(f"filename: {file_data['filename']}\n")
|
||||
f.write(f"data size: {len(file_data['data'])} bytes\n")
|
||||
f.write(f"ctx rev: {file_data['file_ctx'].rev()}\n")
|
||||
f.write(f"ctx binary: {file_data['file_ctx'].isbinary()}\n")
|
||||
f.write(f"is largefile: {file_data.get('is_largefile', False)}\n")
|
||||
f.write("\n")
|
||||
2
t/plugins/id
Executable file
2
t/plugins/id
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/bin/bash
|
||||
cat
|
||||
15
t/plugins/removefiles_test_plugin/__init__.py
Normal file
15
t/plugins/removefiles_test_plugin/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import subprocess
|
||||
import shlex
|
||||
import sys
|
||||
from mercurial import node
|
||||
|
||||
def build_filter(args):
|
||||
return Filter(args)
|
||||
|
||||
class Filter:
|
||||
def __init__(self, args):
|
||||
self.filter_contents = shlex.split(args)
|
||||
|
||||
def file_data_filter(self,file_data):
|
||||
if file_data['filename'].startswith(b'bad'):
|
||||
file_data['data'] = None
|
||||
15
t/plugins/rename_file_test_plugin/__init__.py
Normal file
15
t/plugins/rename_file_test_plugin/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import subprocess
|
||||
import shlex
|
||||
import sys
|
||||
from mercurial import node
|
||||
|
||||
def build_filter(args):
|
||||
return Filter(args)
|
||||
|
||||
class Filter:
|
||||
def __init__(self, args):
|
||||
self.filter_contents = shlex.split(args)
|
||||
|
||||
def file_data_filter(self,file_data):
|
||||
if file_data['filename'] == b'b.txt':
|
||||
file_data['filename'] = b'c.txt'
|
||||
42
t/set_origin.expected
Normal file
42
t/set_origin.expected
Normal file
@@ -0,0 +1,42 @@
|
||||
blob
|
||||
mark :1
|
||||
data 5
|
||||
zero
|
||||
|
||||
reset refs/heads/prefix/master
|
||||
commit refs/heads/prefix/master
|
||||
mark :2
|
||||
author H G Wells <wells@example.com> 1679014800 +0000
|
||||
committer H G Wells <wells@example.com> 1679014800 +0000
|
||||
data 5
|
||||
zero
|
||||
M 100644 :1 content
|
||||
|
||||
blob
|
||||
mark :3
|
||||
data 8
|
||||
branch1
|
||||
|
||||
commit refs/heads/prefix/branch1
|
||||
mark :4
|
||||
author H G Wells <wells@example.com> 1679018400 +0000
|
||||
committer H G Wells <wells@example.com> 1679018400 +0000
|
||||
data 29
|
||||
Added file in branch branch1
|
||||
from :2
|
||||
M 100644 :3 b8486c4feca589a4237a1ee428322d7109ede12e
|
||||
|
||||
blob
|
||||
mark :5
|
||||
data 8
|
||||
branch2
|
||||
|
||||
commit refs/heads/prefix/branch2
|
||||
mark :6
|
||||
author H G Wells <wells@example.com> 1679022000 +0000
|
||||
committer H G Wells <wells@example.com> 1679022000 +0000
|
||||
data 29
|
||||
Added file in branch branch2
|
||||
from :4
|
||||
M 100644 :5 fe786baee0d76603092c25609f2967b9c28a2cf2
|
||||
|
||||
59
t/set_origin.t
Executable file
59
t/set_origin.t
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Felipe Contreras
|
||||
# Copyright (c) 2025 Günther Nußmüller
|
||||
#
|
||||
|
||||
test_description='Set origin tests'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
check() {
|
||||
git -C "$1" fast-export --all > actual
|
||||
test_cmp "$SHARNESS_TEST_DIRECTORY"/set_origin.expected actual
|
||||
}
|
||||
|
||||
git_clone() {
|
||||
(
|
||||
git init -q "$2" &&
|
||||
cd "$2" &&
|
||||
git config core.ignoreCase false &&
|
||||
hg-fast-export.sh --repo "../$1" --origin "$3"
|
||||
)
|
||||
}
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = H G Wells <wells@example.com>
|
||||
EOF
|
||||
}
|
||||
|
||||
make-branch() {
|
||||
hg branch "$1"
|
||||
FILE=$(echo "$1" | sha1sum | cut -d " " -f 1)
|
||||
echo "$1" > $FILE
|
||||
hg add $FILE
|
||||
hg commit -d "2023-03-17 $2:00Z" -m "Added file in branch $1"
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'basic' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||
|
||||
(
|
||||
hg init hgrepo &&
|
||||
cd hgrepo &&
|
||||
echo zero > content &&
|
||||
hg add content &&
|
||||
hg commit -m zero -d "2023-03-17 01:00Z" &&
|
||||
make-branch branch1 02 &&
|
||||
make-branch branch2 03
|
||||
) &&
|
||||
|
||||
git_clone hgrepo gitrepo prefix &&
|
||||
check gitrepo
|
||||
'
|
||||
|
||||
test_done
|
||||
1
t/sharness
Submodule
1
t/sharness
Submodule
Submodule t/sharness added at e457513ae8
15
t/smoke-test.branchmap
Normal file
15
t/smoke-test.branchmap
Normal file
@@ -0,0 +1,15 @@
|
||||
"feature"="renamed-feature"
|
||||
"a?"="valid-0"
|
||||
"a/"="valid-1"
|
||||
"a/b"="valid-2"
|
||||
"a/?"="valid-3"
|
||||
"?a"="valid-4"
|
||||
"a."="valid-5"
|
||||
"a.b"="valid-6"
|
||||
".a"="valid-7"
|
||||
"/"="valid-8"
|
||||
"___3"="___a"
|
||||
"__2"="__b"
|
||||
"_1"="_c"
|
||||
"åäö"="abc"
|
||||
"Feature- 12V Vac \"Venom\""="venom"
|
||||
300
t/smoke-test.expected
Normal file
300
t/smoke-test.expected
Normal file
@@ -0,0 +1,300 @@
|
||||
blob
|
||||
mark :1
|
||||
data 5
|
||||
r0-a
|
||||
|
||||
blob
|
||||
mark :2
|
||||
data 5
|
||||
r0-b
|
||||
|
||||
reset refs/heads/master
|
||||
commit refs/heads/master
|
||||
mark :3
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
|
||||
data 3
|
||||
r0
|
||||
M 100644 :1 a.txt
|
||||
M 100644 :2 b.txt
|
||||
|
||||
blob
|
||||
mark :4
|
||||
data 5
|
||||
r1-c
|
||||
|
||||
blob
|
||||
mark :5
|
||||
data 5
|
||||
r1-d
|
||||
|
||||
commit refs/tags/2019_Spring_R2
|
||||
mark :6
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
|
||||
data 3
|
||||
r1
|
||||
from :3
|
||||
M 100644 :4 c.txt
|
||||
M 100644 :5 d.txt
|
||||
|
||||
blob
|
||||
mark :7
|
||||
data 56
|
||||
e92e41dde44f9dbbac08bbb83351a65b6728f128 2019 Spring R2
|
||||
|
||||
commit refs/heads/mainline
|
||||
mark :8
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679019000 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679019000 +0000
|
||||
data 52
|
||||
Added tag 2019 Spring R2 for changeset e92e41dde44f
|
||||
from :6
|
||||
M 100644 :7 .hgtags
|
||||
|
||||
blob
|
||||
mark :9
|
||||
data 5
|
||||
r2-e
|
||||
|
||||
blob
|
||||
mark :10
|
||||
data 5
|
||||
r2-f
|
||||
|
||||
commit refs/heads/mainline
|
||||
mark :11
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679022000 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679022000 +0000
|
||||
data 3
|
||||
r2
|
||||
from :8
|
||||
M 100644 :9 e.txt
|
||||
M 100644 :10 f.txt
|
||||
|
||||
commit refs/heads/mainline
|
||||
mark :12
|
||||
author badly-formed-user <devnull@localhost> 1679025600 +0000
|
||||
committer badly-formed-user <devnull@localhost> 1679025600 +0000
|
||||
data 3
|
||||
r3
|
||||
from :11
|
||||
M 100644 :9 g.txt
|
||||
M 100644 :10 h.txt
|
||||
|
||||
blob
|
||||
mark :13
|
||||
data 10
|
||||
feature-a
|
||||
|
||||
blob
|
||||
mark :14
|
||||
data 10
|
||||
feature-b
|
||||
|
||||
commit refs/heads/renamed-feature
|
||||
mark :15
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679029200 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679029200 +0000
|
||||
data 8
|
||||
feature
|
||||
from :12
|
||||
M 100644 :13 feature-a.txt
|
||||
M 100644 :14 feature-b.txt
|
||||
|
||||
blob
|
||||
mark :16
|
||||
data 3
|
||||
a?
|
||||
|
||||
commit refs/heads/valid-0
|
||||
mark :17
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679032800 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679032800 +0000
|
||||
data 24
|
||||
Added file in branch a?
|
||||
from :15
|
||||
M 100644 :16 c1086ce03e4f52aadd1c93b1d097da510138522a
|
||||
|
||||
blob
|
||||
mark :18
|
||||
data 3
|
||||
a/
|
||||
|
||||
commit refs/heads/valid-1
|
||||
mark :19
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679036400 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679036400 +0000
|
||||
data 24
|
||||
Added file in branch a/
|
||||
from :17
|
||||
M 100644 :18 85ed6fbb96d655df9f194bc9107f2d86210b9263
|
||||
|
||||
blob
|
||||
mark :20
|
||||
data 4
|
||||
a/b
|
||||
|
||||
commit refs/heads/valid-2
|
||||
mark :21
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679040000 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679040000 +0000
|
||||
data 25
|
||||
Added file in branch a/b
|
||||
from :19
|
||||
M 100644 :20 aae42d317509399fdda80c4d8e46774d152dbd04
|
||||
|
||||
blob
|
||||
mark :22
|
||||
data 4
|
||||
a/?
|
||||
|
||||
commit refs/heads/valid-3
|
||||
mark :23
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679043600 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679043600 +0000
|
||||
data 25
|
||||
Added file in branch a/?
|
||||
from :21
|
||||
M 100644 :22 ba54a8de7fe91c5e6e0a2dd1b9b37de0976ff5a7
|
||||
|
||||
blob
|
||||
mark :24
|
||||
data 3
|
||||
?a
|
||||
|
||||
commit refs/heads/valid-4
|
||||
mark :25
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679047200 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679047200 +0000
|
||||
data 24
|
||||
Added file in branch ?a
|
||||
from :23
|
||||
M 100644 :24 d4cde16119b586025976741e87775762a2598984
|
||||
|
||||
blob
|
||||
mark :26
|
||||
data 3
|
||||
a.
|
||||
|
||||
commit refs/heads/valid-5
|
||||
mark :27
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679050800 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679050800 +0000
|
||||
data 24
|
||||
Added file in branch a.
|
||||
from :25
|
||||
M 100644 :26 b4ce96ddcee0706a8c51130917f910b2b29faf77
|
||||
|
||||
blob
|
||||
mark :28
|
||||
data 4
|
||||
a.b
|
||||
|
||||
commit refs/heads/valid-6
|
||||
mark :29
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679054400 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679054400 +0000
|
||||
data 25
|
||||
Added file in branch a.b
|
||||
from :27
|
||||
M 100644 :28 97051191e1a92daa11165ef10770bf964268c58b
|
||||
|
||||
blob
|
||||
mark :30
|
||||
data 3
|
||||
.a
|
||||
|
||||
commit refs/heads/valid-7
|
||||
mark :31
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679058000 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679058000 +0000
|
||||
data 24
|
||||
Added file in branch .a
|
||||
from :29
|
||||
M 100644 :30 a667f8feec02fdfa6649772f844a24cf1ad5ebec
|
||||
|
||||
blob
|
||||
mark :32
|
||||
data 2
|
||||
/
|
||||
|
||||
commit refs/heads/valid-8
|
||||
mark :33
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679061600 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679061600 +0000
|
||||
data 23
|
||||
Added file in branch /
|
||||
from :31
|
||||
M 100644 :32 8f27084b6294ddbe28dbcbf98f798730e8a79289
|
||||
|
||||
blob
|
||||
mark :34
|
||||
data 5
|
||||
___3
|
||||
|
||||
commit refs/heads/___a
|
||||
mark :35
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679065200 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679065200 +0000
|
||||
data 26
|
||||
Added file in branch ___3
|
||||
from :33
|
||||
M 100644 :34 9b171494eb6e5ce325934b1656e286ca0510a697
|
||||
|
||||
blob
|
||||
mark :36
|
||||
data 4
|
||||
__2
|
||||
|
||||
commit refs/heads/__b
|
||||
mark :37
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679068800 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679068800 +0000
|
||||
data 25
|
||||
Added file in branch __2
|
||||
from :35
|
||||
M 100644 :36 5dca703b71d2613c6bb3262b9b1741d6165e4a2f
|
||||
|
||||
blob
|
||||
mark :38
|
||||
data 3
|
||||
_1
|
||||
|
||||
commit refs/heads/_c
|
||||
mark :39
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679072400 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679072400 +0000
|
||||
data 24
|
||||
Added file in branch _1
|
||||
from :37
|
||||
M 100644 :38 2fee90e148a2afbd911b67ced9b6240151f904ec
|
||||
|
||||
blob
|
||||
mark :40
|
||||
data 25
|
||||
Feature- 12V Vac "Venom"
|
||||
|
||||
commit refs/heads/venom
|
||||
mark :41
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679076000 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679076000 +0000
|
||||
data 46
|
||||
Added file in branch Feature- 12V Vac "Venom"
|
||||
from :39
|
||||
M 100644 :40 b01def8779aed4be2f4b7325a89992a9aa566fec
|
||||
|
||||
blob
|
||||
mark :42
|
||||
data 7
|
||||
åäö
|
||||
|
||||
commit refs/heads/abc
|
||||
mark :43
|
||||
author Grevious Bodily Harmsworth <gbh@example.com> 1679079600 +0000
|
||||
committer Grevious Bodily Harmsworth <gbh@example.com> 1679079600 +0000
|
||||
data 28
|
||||
Added file in branch åäö
|
||||
from :41
|
||||
M 100644 :42 a0d01fcbff5d86327d542687dcfd8b299d054147
|
||||
|
||||
163
t/smoke-test.t
Executable file
163
t/smoke-test.t
Executable file
@@ -0,0 +1,163 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2023 Felipe Contreras
|
||||
# Copyright (c) 2023 Frej Drejhammar
|
||||
#
|
||||
# Smoke test used to sanity test changes to fast-export.
|
||||
#
|
||||
|
||||
test_description='Smoke test'
|
||||
|
||||
. "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
|
||||
|
||||
check() {
|
||||
echo "$3" > expected &&
|
||||
git -C "$1" show -q --format='%s' "$2" > actual &&
|
||||
test_cmp expected actual
|
||||
}
|
||||
|
||||
git_create() {
|
||||
git init -q "$1" &&
|
||||
git -C "$1" config core.ignoreCase false
|
||||
}
|
||||
|
||||
git_convert() {
|
||||
(
|
||||
cd "$2" &&
|
||||
hg-fast-export.sh --repo "../$1" \
|
||||
-s --hgtags -n \
|
||||
-B "$SHARNESS_TEST_DIRECTORY"/smoke-test.branchmap \
|
||||
-T "$SHARNESS_TEST_DIRECTORY"/smoke-test.tagsmap
|
||||
)
|
||||
}
|
||||
|
||||
setup() {
|
||||
cat > "$HOME"/.hgrc <<-EOF
|
||||
[ui]
|
||||
username = Grevious Bodily Harmsworth <gbh@example.com>
|
||||
EOF
|
||||
}
|
||||
|
||||
commit0() {
|
||||
(
|
||||
cd hgrepo &&
|
||||
echo "r0-a" > a.txt &&
|
||||
echo "r0-b" > b.txt &&
|
||||
hg add a.txt b.txt &&
|
||||
hg commit -d "2023-03-17 01:00Z" -m "r0" &&
|
||||
hg bookmark bm0
|
||||
)
|
||||
}
|
||||
|
||||
commit1() {
|
||||
(
|
||||
cd hgrepo &&
|
||||
echo "r1-c" > c.txt &&
|
||||
echo "r1-d" > d.txt &&
|
||||
hg branch mainline &&
|
||||
hg add c.txt d.txt &&
|
||||
hg commit -d "2023-03-17 02:00Z" -m "r1" &&
|
||||
hg tag -d "2023-03-17 02:10Z" "2019 Spring R2"
|
||||
)
|
||||
}
|
||||
|
||||
commit2() {
|
||||
(
|
||||
cd hgrepo &&
|
||||
echo "r2-e" > e.txt &&
|
||||
echo "r2-f" > f.txt &&
|
||||
hg add e.txt f.txt &&
|
||||
hg commit -d "2023-03-17 03:00Z" -m "r2" &&
|
||||
hg bookmark bm1
|
||||
)
|
||||
}
|
||||
|
||||
commit3() {
|
||||
(
|
||||
cd hgrepo &&
|
||||
echo "r2-e" > g.txt &&
|
||||
echo "r2-f" > h.txt &&
|
||||
hg add g.txt h.txt &&
|
||||
hg commit -d "2023-03-17 04:00Z" -u "badly-formed-user" -m "r3"
|
||||
)
|
||||
}
|
||||
|
||||
commit_rest() {
|
||||
(
|
||||
cd hgrepo &&
|
||||
|
||||
hg branch feature &&
|
||||
echo "feature-a" > feature-a.txt &&
|
||||
echo "feature-b" > feature-b.txt &&
|
||||
hg add feature-a.txt feature-b.txt &&
|
||||
hg commit -d "2023-03-17 05:00Z" -m "feature" &&
|
||||
hg bookmark bm2 &&
|
||||
|
||||
# Now create strangely named branches
|
||||
make-branch "a?" 06 &&
|
||||
make-branch "a/" 07 &&
|
||||
make-branch "a/b" 08 &&
|
||||
make-branch "a/?" 09 &&
|
||||
make-branch "?a" 10 &&
|
||||
make-branch "a." 11 &&
|
||||
make-branch "a.b" 12 &&
|
||||
make-branch ".a" 13 &&
|
||||
make-branch "/" 14 &&
|
||||
make-branch "___3" 15 &&
|
||||
make-branch "__2" 16 &&
|
||||
make-branch "_1" 17 &&
|
||||
make-branch "Feature- 12V Vac \"Venom\"" 18 &&
|
||||
make-branch "åäö" 19 &&
|
||||
|
||||
hg bookmark bm-for-the-rest
|
||||
)
|
||||
}
|
||||
|
||||
make-branch() {
|
||||
hg branch "$1"
|
||||
FILE=$(echo "$1" | sha1sum | cut -d " " -f 1)
|
||||
echo "$1" > $FILE
|
||||
hg add $FILE
|
||||
hg commit -d "2023-03-17 $2:00Z" -m "Added file in branch $1"
|
||||
}
|
||||
|
||||
setup
|
||||
|
||||
test_expect_success 'all in one' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||
|
||||
(
|
||||
hg init hgrepo &&
|
||||
commit0 &&
|
||||
commit1 &&
|
||||
commit2 &&
|
||||
commit3 &&
|
||||
commit_rest
|
||||
) &&
|
||||
git_create gitrepo &&
|
||||
git_convert hgrepo gitrepo &&
|
||||
git -C gitrepo fast-export --all > actual &&
|
||||
|
||||
test_cmp "$SHARNESS_TEST_DIRECTORY"/smoke-test.expected actual
|
||||
'
|
||||
|
||||
test_expect_success 'incremental' '
|
||||
test_when_finished "rm -rf hgrepo gitrepo" &&
|
||||
|
||||
hg init hgrepo &&
|
||||
commit0 &&
|
||||
git_create gitrepo &&
|
||||
git_convert hgrepo gitrepo &&
|
||||
commit1 &&
|
||||
git_convert hgrepo gitrepo &&
|
||||
commit2 &&
|
||||
commit3 &&
|
||||
git_convert hgrepo gitrepo &&
|
||||
commit_rest &&
|
||||
git_convert hgrepo gitrepo &&
|
||||
git -C gitrepo fast-export --all > actual &&
|
||||
|
||||
test_cmp "$SHARNESS_TEST_DIRECTORY"/smoke-test.expected actual
|
||||
'
|
||||
|
||||
test_done
|
||||
1
t/smoke-test.tagsmap
Normal file
1
t/smoke-test.tagsmap
Normal file
@@ -0,0 +1 @@
|
||||
"2019 Spring R2"="2019_Spring_R2"
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
223
tests/test_drop_plugin.py
Normal file
223
tests/test_drop_plugin.py
Normal file
@@ -0,0 +1,223 @@
|
||||
import sys, os, subprocess
|
||||
from tempfile import TemporaryDirectory
|
||||
from unittest import TestCase
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class CommitDropTest(TestCase):
|
||||
def test_drop_single_commit_by_hash(self):
|
||||
hash1 = self.create_commit('commit 1')
|
||||
self.create_commit('commit 2')
|
||||
|
||||
self.drop(hash1)
|
||||
|
||||
self.assertEqual(['commit 2'], self.git.log())
|
||||
|
||||
def test_drop_commits_by_desc(self):
|
||||
self.create_commit('commit 1 is good')
|
||||
self.create_commit('commit 2 is bad')
|
||||
self.create_commit('commit 3 is good')
|
||||
self.create_commit('commit 4 is bad')
|
||||
|
||||
self.drop('.*bad')
|
||||
|
||||
expected = ['commit 1 is good', 'commit 3 is good']
|
||||
self.assertEqual(expected, self.git.log())
|
||||
|
||||
def test_drop_sequential_commits_in_single_plugin_instance(self):
|
||||
self.create_commit('commit 1')
|
||||
hash2 = self.create_commit('commit 2')
|
||||
hash3 = self.create_commit('commit 3')
|
||||
hash4 = self.create_commit('commit 4')
|
||||
self.create_commit('commit 5')
|
||||
|
||||
self.drop(','.join((hash2, hash3, hash4)))
|
||||
|
||||
expected = ['commit 1', 'commit 5']
|
||||
self.assertEqual(expected, self.git.log())
|
||||
|
||||
def test_drop_sequential_commits_in_multiple_plugin_instances(self):
|
||||
self.create_commit('commit 1')
|
||||
hash2 = self.create_commit('commit 2')
|
||||
hash3 = self.create_commit('commit 3')
|
||||
hash4 = self.create_commit('commit 4')
|
||||
self.create_commit('commit 5')
|
||||
|
||||
self.drop(hash2, hash3, hash4)
|
||||
|
||||
expected = ['commit 1', 'commit 5']
|
||||
self.assertEqual(expected, self.git.log())
|
||||
|
||||
def test_drop_nonsequential_commits(self):
|
||||
self.create_commit('commit 1')
|
||||
hash2 = self.create_commit('commit 2')
|
||||
self.create_commit('commit 3')
|
||||
hash4 = self.create_commit('commit 4')
|
||||
|
||||
self.drop(','.join((hash2, hash4)))
|
||||
|
||||
expected = ['commit 1', 'commit 3']
|
||||
self.assertEqual(expected, self.git.log())
|
||||
|
||||
def test_drop_head(self):
|
||||
self.create_commit('first')
|
||||
self.create_commit('middle')
|
||||
hash_last = self.create_commit('last')
|
||||
|
||||
self.drop(hash_last)
|
||||
|
||||
self.assertEqual(['first', 'middle'], self.git.log())
|
||||
|
||||
def test_drop_merge_commit(self):
|
||||
initial_hash = self.create_commit('initial')
|
||||
self.create_commit('branch A')
|
||||
self.hg.checkout(initial_hash)
|
||||
self.create_commit('branch B')
|
||||
self.hg.merge()
|
||||
merge_hash = self.create_commit('merge to drop')
|
||||
self.create_commit('last')
|
||||
|
||||
self.drop(merge_hash)
|
||||
|
||||
expected_commits = ['initial', 'branch A', 'branch B', 'last']
|
||||
self.assertEqual(expected_commits, self.git.log())
|
||||
self.assertEqual(['branch B', 'branch A'], self.git_parents('last'))
|
||||
|
||||
def test_drop_different_commits_in_multiple_plugin_instances(self):
|
||||
self.create_commit('good commit')
|
||||
bad_hash = self.create_commit('bad commit')
|
||||
self.create_commit('awful commit')
|
||||
self.create_commit('another good commit')
|
||||
|
||||
self.drop('^awful.*', bad_hash)
|
||||
|
||||
expected = ['good commit', 'another good commit']
|
||||
self.assertEqual(expected, self.git.log())
|
||||
|
||||
def test_drop_same_commit_in_multiple_plugin_instances(self):
|
||||
self.create_commit('good commit')
|
||||
bad_hash = self.create_commit('bad commit')
|
||||
self.create_commit('another good commit')
|
||||
|
||||
self.drop('^bad.*', bad_hash)
|
||||
|
||||
expected = ['good commit', 'another good commit']
|
||||
self.assertEqual(expected, self.git.log())
|
||||
|
||||
def setUp(self):
|
||||
self.tempdir = TemporaryDirectory()
|
||||
|
||||
self.hg = HgDriver(Path(self.tempdir.name) / 'hgrepo')
|
||||
self.hg.init()
|
||||
|
||||
self.git = GitDriver(Path(self.tempdir.name) / 'gitrepo')
|
||||
self.git.init()
|
||||
|
||||
self.export = ExportDriver(self.hg.repodir, self.git.repodir)
|
||||
|
||||
def tearDown(self):
|
||||
self.tempdir.cleanup()
|
||||
|
||||
def create_commit(self, message):
|
||||
self.write_file_data('Data for %r.' % message)
|
||||
return self.hg.commit(message)
|
||||
|
||||
def write_file_data(self, data, filename='test_file.txt'):
|
||||
path = self.hg.repodir / filename
|
||||
with path.open('w') as f:
|
||||
print(data, file=f)
|
||||
|
||||
def drop(self, *spec):
|
||||
self.export.run_with_drop(*spec)
|
||||
|
||||
def git_parents(self, message):
|
||||
matches = self.git.grep_log(message)
|
||||
if len(matches) != 1:
|
||||
raise Exception('No unique commit with message %r.' % message)
|
||||
subject, parents = self.git.details(matches[0])
|
||||
return [self.git.details(p)[0] for p in parents]
|
||||
|
||||
|
||||
class ExportDriver:
|
||||
def __init__(self, sourcedir, targetdir, *, quiet=True):
|
||||
self.sourcedir = Path(sourcedir)
|
||||
self.targetdir = Path(targetdir)
|
||||
self.quiet = quiet
|
||||
self.python_executable = str(
|
||||
Path.cwd() / os.environ.get('PYTHON', sys.executable))
|
||||
self.script = Path(__file__).parent / '../hg-fast-export.sh'
|
||||
|
||||
def run_with_drop(self, *plugin_args):
|
||||
cmd = [self.script, '-r', str(self.sourcedir)]
|
||||
for arg in plugin_args:
|
||||
cmd.extend(['--plugin', 'drop=' + arg])
|
||||
output = subprocess.DEVNULL if self.quiet else None
|
||||
subprocess.run(cmd, check=True, cwd=str(self.targetdir),
|
||||
env={'PYTHON': self.python_executable},
|
||||
stdout=output, stderr=output)
|
||||
|
||||
|
||||
class HgDriver:
|
||||
def __init__(self, repodir):
|
||||
self.repodir = Path(repodir)
|
||||
|
||||
def init(self):
|
||||
self.repodir.mkdir()
|
||||
self.run_command('init')
|
||||
|
||||
def commit(self, message):
|
||||
self.run_command('commit', '-A', '-m', message)
|
||||
return self.run_command('id', '--id', '--debug').strip()
|
||||
|
||||
def log(self):
|
||||
output = self.run_command('log', '-T', '{desc}\n')
|
||||
commits = output.strip().splitlines()
|
||||
commits.reverse()
|
||||
return commits
|
||||
|
||||
def checkout(self, rev):
|
||||
self.run_command('checkout', '-r', rev)
|
||||
|
||||
def merge(self):
|
||||
self.run_command('merge', '--tool', ':local')
|
||||
|
||||
def run_command(self, *args):
|
||||
p = subprocess.run(('hg', '-yq') + args,
|
||||
cwd=str(self.repodir),
|
||||
check=True,
|
||||
text=True,
|
||||
capture_output=True)
|
||||
return p.stdout
|
||||
|
||||
|
||||
class GitDriver:
|
||||
def __init__(self, repodir):
|
||||
self.repodir = Path(repodir)
|
||||
|
||||
def init(self):
|
||||
self.repodir.mkdir()
|
||||
self.run_command('init')
|
||||
|
||||
def log(self):
|
||||
output = self.run_command('log', '--format=%s', '--reverse')
|
||||
return output.strip().splitlines()
|
||||
|
||||
def grep_log(self, pattern):
|
||||
output = self.run_command('log', '--format=%H',
|
||||
'-F', '--grep', pattern)
|
||||
return output.strip().splitlines()
|
||||
|
||||
def details(self, commit_hash):
|
||||
fmt = '%s%n%P'
|
||||
output = self.run_command('show', '-s', '--format=' + fmt,
|
||||
commit_hash)
|
||||
subject, parents = output.splitlines()
|
||||
return subject, parents.split()
|
||||
|
||||
def run_command(self, *args):
|
||||
p = subprocess.run(('git', '--no-pager') + args,
|
||||
cwd=str(self.repodir),
|
||||
check=True,
|
||||
text=True,
|
||||
capture_output=True)
|
||||
return p.stdout
|
||||
156
tests/test_git_lfs_importer_plugin.py
Normal file
156
tests/test_git_lfs_importer_plugin.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import sys
|
||||
|
||||
sys.path.append("./plugins")
|
||||
|
||||
import hashlib
|
||||
import pathlib
|
||||
import time
|
||||
import unittest
|
||||
import tempfile
|
||||
import os
|
||||
import pathspec
|
||||
|
||||
from git_lfs_importer import Filter, build_filter
|
||||
|
||||
|
||||
class TestGitLfsImporterPlugin(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# create an isolated temp dir and chdir into it for each test
|
||||
self._orig_cwd = os.getcwd()
|
||||
self._tmpdir = tempfile.TemporaryDirectory()
|
||||
self.tmp_path = pathlib.Path(self._tmpdir.name)
|
||||
os.chdir(self.tmp_path)
|
||||
|
||||
def tearDown(self):
|
||||
# restore cwd and cleanup
|
||||
os.chdir(self._orig_cwd)
|
||||
self._tmpdir.cleanup()
|
||||
|
||||
def empty_spec(self):
|
||||
return pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, [])
|
||||
|
||||
# --------------------------------------------------------
|
||||
# GIVEN-WHEN-THEN TESTS for Filter.file_data_filter
|
||||
# --------------------------------------------------------
|
||||
|
||||
def test_skips_deletions(self):
|
||||
flt = Filter(self.empty_spec())
|
||||
file_data = {"filename": b"file.txt", "data": None}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertIsNone(file_data["data"])
|
||||
self.assertFalse((self.tmp_path / ".git").exists())
|
||||
|
||||
def test_skips_files_that_do_not_match_spec(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
original = b"not matched"
|
||||
file_data = {"filename": b"file.txt", "data": original}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertEqual(file_data["data"], original)
|
||||
self.assertFalse((self.tmp_path / ".git").exists())
|
||||
|
||||
def test_converts_only_matched_files_to_lfs_pointer(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
data = b"hello world"
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
expected_pointer = (
|
||||
f"version https://git-lfs.github.com/spec/v1\n"
|
||||
f"oid sha256:{sha}\n"
|
||||
f"size {len(data)}\n"
|
||||
).encode("utf-8")
|
||||
file_data = {"filename": b"payload.bin", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertEqual(file_data["data"], expected_pointer)
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
self.assertEqual(lfs_file.read_bytes(), data)
|
||||
|
||||
def test_does_not_convert_unmatched_directory(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["assets/**"])
|
||||
flt = Filter(spec)
|
||||
data = b"outside directory"
|
||||
file_data = {"filename": b"src/images/logo.png", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertEqual(file_data["data"], data)
|
||||
self.assertFalse((self.tmp_path / ".git").exists())
|
||||
|
||||
def test_converts_matched_directory(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["assets/**"])
|
||||
flt = Filter(spec)
|
||||
data = b"inside directory"
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
file_data = {"filename": b"assets/images/logo.png", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertIn(b"version https://git-lfs.github.com/spec/v1", file_data["data"])
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
self.assertEqual(lfs_file.read_bytes(), data)
|
||||
|
||||
def test_does_not_overwrite_existing_blob(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
data = b"abc"
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
lfs_dir = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4]
|
||||
lfs_dir.mkdir(parents=True, exist_ok=True)
|
||||
lfs_file = lfs_dir / sha
|
||||
lfs_file.write_bytes(data)
|
||||
before_mtime = lfs_file.stat().st_mtime_ns
|
||||
time.sleep(0.01) # Ensure timestamp difference
|
||||
|
||||
file_data = {"filename": b"abc.bin", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
expected_pointer_prefix = b"version https://git-lfs.github.com/spec/v1"
|
||||
self.assertTrue(file_data["data"].startswith(expected_pointer_prefix))
|
||||
after_mtime = lfs_file.stat().st_mtime_ns
|
||||
self.assertEqual(after_mtime, before_mtime)
|
||||
|
||||
def test_empty_file_converted_when_matched(self):
|
||||
spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
|
||||
flt = Filter(spec)
|
||||
data = b""
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
file_data = {"filename": b"empty.bin", "data": data}
|
||||
|
||||
flt.file_data_filter(file_data)
|
||||
|
||||
self.assertIn(b"size 0", file_data["data"])
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
self.assertEqual(lfs_file.read_bytes(), data)
|
||||
|
||||
# --------------------------------------------------------
|
||||
# Optional: GIVEN-WHEN-THEN for build_filter
|
||||
# --------------------------------------------------------
|
||||
|
||||
def test_build_filter_reads_patterns_file(self):
|
||||
patterns_file = self.tmp_path / "lfs_patterns.txt"
|
||||
patterns_file.write_text("*.bin\nassets/**\n", encoding="utf-8")
|
||||
|
||||
flt = build_filter(str(patterns_file))
|
||||
|
||||
data_match = b"match me"
|
||||
sha_match = hashlib.sha256(data_match).hexdigest()
|
||||
fd_match = {"filename": b"assets/payload.bin", "data": data_match}
|
||||
flt.file_data_filter(fd_match)
|
||||
self.assertIn(b"oid sha256:", fd_match["data"])
|
||||
lfs_file = pathlib.Path(".git/lfs/objects") / sha_match[:2] / sha_match[2:4] / sha_match
|
||||
self.assertTrue(lfs_file.is_file())
|
||||
|
||||
data_skip = b"skip me"
|
||||
fd_skip = {"filename": b"docs/readme.md", "data": data_skip}
|
||||
flt.file_data_filter(fd_skip)
|
||||
self.assertEqual(fd_skip["data"], data_skip)
|
||||
Reference in New Issue
Block a user