Merge branch 'frej/fix-gh348'

Support Mercurial 7.2
In Mercurial 7.2 the iteritems() method of the branchmap has been removed. Switch to iterating over the branches and then fetching heads by the branchheads() method. In 7.2, a call to mercurial.initialization.init() is also needed to process a repo using the largefiles extension. Thanks to Michael Cho (@cho-m) for suggesting the initialization fix. Closes #348 Co-developed-by: Michael Cho <michael@michaelcho.dev>
2026-02-26 06:20:42 +01:00 · 2026-02-14 20:55:09 +01:00 · 2026-02-14 20:53:20 +01:00 · 2026-01-17 20:38:59 +01:00 · 2026-01-12 16:33:37 -05:00 · 2026-01-12 16:33:20 -05:00
50 changed files with 2873 additions and 341 deletions
--- a/.github/contributing.md
+++ b/.github/contributing.md
@@ -0,0 +1,28 @@
 When submitting a patch make sure the commits in your pull request:
 * Have good commit messages
  Please read Chris Beams' blog post [How to Write a Git Commit
  Message](https://chris.beams.io/posts/git-commit/) on how to write a
  good commit message. Although the article recommends at most 50
  characters for the subject, up to 72 characters are frequently
  accepted for fast-export.
 * Adhere to good [commit
 hygiene](http://www.ericbmerritt.com/2011/09/21/commit-hygiene-and-git.html)
  When developing a pull request for hg-fast-export, base your work on
  the current `master` branch and rebase your work if it no longer can
  be merged into the current `master` without conflicts. Never merge
  `master` into your development branch, rebase if your work needs
  updates from `master`.
  When a pull request is modified due to review feedback, please
  incorporate the changes into the proper commit. A good reference on
  how to modify history is in the [Pro Git book, Section
  7.6](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History).
 Please do not submit a pull request if you are not willing to spend
 the time required to address review comments or revise the patch until
 it follows the guidelines above. A _take it or leave it_ approach to
 contributing wastes both your and the maintainer's time.
--- a/.github/requirements-earliest.txt
+++ b/.github/requirements-earliest.txt
@@ -0,0 +1,4 @@
 mercurial==5.2
 # Required for git_lfs_importer plugin
 pathspec==0.11.2
--- a/.github/requirements-latest.txt
+++ b/.github/requirements-latest.txt
@@ -0,0 +1,4 @@
 mercurial
 # Required for git_lfs_importer plugin
 pathspec==0.12.1
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,71 @@
 name: CI
 on:
  push:
    branches: [master]
  pull_request:
    # The branches below must be a subset of the branches above
    branches: [master]
 jobs:
  test-earliest:
    name: Run test suite on the earliest supported Python version
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v4
        name: Checkout repository
        with:
          fetch-depth: 1
          submodules: 'recursive'
      - uses: actions/setup-python@v5
        id: earliest
        with:
          python-version: '3.7.x'
          check-latest: true
          cache: 'pip'
          cache-dependency-path: '**/requirements-earliest.txt'
      - name: Install dependencies
        run: |
            python -m pip install --upgrade pip
            pip install -r .github/requirements-earliest.txt
      - name: Report selected versions
        run: |
          echo Selected '${{ steps.earliest.outputs.python-version }}'
          ./hg-fast-export.sh --debug
      - name: Run tests on earliest supported Python version
        run: make -C t
  test-latest:
    name: Run test suite on the latest supported python version
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        name: Checkout repository
        with:
          fetch-depth: 1
          submodules: 'recursive'
      - uses: actions/setup-python@v5
        id: latest
        with:
          python-version: '3.x'
          check-latest: true
          cache: 'pip'
          cache-dependency-path: '**/requirements-latest.txt'
      - name: Install dependencies
        run: |
            python -m pip install --upgrade pip
            pip install -r .github/requirements-latest.txt
      - name: Report selected version
        run: |
          echo Selected '${{ steps.latest.outputs.python-version }}'
          ./hg-fast-export.sh --debug
      - name: Run tests on 3.x
        run: make -C t
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
 [submodule "t/sharness"]
 	path = t/sharness
 	url = https://github.com/felipec/sharness.git
--- a/README-SUBMODULES.md
+++ b/README-SUBMODULES.md
@@ -27,10 +27,10 @@ command line option.
 ## Example
-Example mercurial repo folder structure (~/mercurial):
+Example mercurial repo folder structure (~/mercurial) containing two subrepos:
    src/...
-    subrepo/subrepo1
+    subrepos/subrepo1
-    subrepo/subrepo2
+    subrepos/subrepo2
 ### Setup
 Create an empty new folder where all the converted git modules will be imported:
@@ -41,18 +41,18 @@ Create an empty new folder where all the converted git modules will be imported:
    mkdir submodule1
    cd submodule1
    git init
-    hg-fast-export.sh -r ~/mercurial/subrepo1
+    hg-fast-export.sh -r ~/mercurial/subrepos/subrepo1
    cd ..
    mkdir submodule2
    cd submodule2
    git init
-    hg-fast-export.sh -r ~/mercurial/subrepo2
+    hg-fast-export.sh -r ~/mercurial/subrepos/subrepo2
 ### Create mapping file
    cd ~/imported-gits
    cat > submodule-mappings << EOF
-    "subrepo/subrepo1"="../submodule1"
+    "subrepos/subrepo1"="../submodule1"
-    "subrepo/subrepo2"="../submodule2"
+    "subrepos/subrepo2"="../submodule2"
    EOF
 ### Convert main repository
@@ -60,16 +60,16 @@ Create an empty new folder where all the converted git modules will be imported:
    mkdir git-main-repo
    cd git-main-repo
    git init
-    hg-fast-export.sh -r ~/mercurial --subrepo-map=../submodule-mappings
+    hg-fast-export.sh -r ~/mercurial --subrepo-map=~/imported-gits/submodule-mappings
 ### Result
-The resulting repository will now contain the subrepo/subrepo1 and
+The resulting repository will now contain the submodules at the paths
-subrepo/subrepo1 submodules. The created .gitmodules file will look
+`subrepos/subrepo1` and `subrepos/subrepo2`. The created .gitmodules
-like:
+file will look like:
-    [submodule "subrepo/subrepo1"]
+    [submodule "subrepos/subrepo1"]
-          path = subrepo/subrepo1
+          path = subrepos/subrepo1
          url = ../submodule1
-    [submodule "subrepo/subrepo2"]
+    [submodule "subrepos/subrepo2"]
-          path = subrepo/subrepo2
+          path = subrepos/subrepo2
          url = ../submodule2
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-hg-fast-export.(sh|py) - mercurial to git converter using git-fast-import
+hg-fast-export.sh - mercurial to git converter using git-fast-import
 =========================================================================
 Legal
@@ -29,8 +29,8 @@ first time.
 System Requirements
 -------------------
-This project depends on Python 2.7 and the Mercurial >= 4.6
+This project depends on Python (>=3.7) and the Mercurial package (>=
-package. If Python is not installed, install it before proceeding. The
+5.2).  If Python is not installed, install it before proceeding. The
 Mercurial package can be installed with `pip install mercurial`.
 On windows the bash that comes with "Git for Windows" is known to work
@@ -42,11 +42,10 @@ Usage
 Using hg-fast-export is quite simple for a mercurial repository <repo>:
 ```
-mkdir repo-git # or whatever
+git init repo-git # or whatever
 cd repo-git
 git init
 hg-fast-export.sh -r <local-repo>
-git checkout HEAD
+git checkout
 ```
 Please note that hg-fast-export does not automatically check out the
@@ -79,10 +78,10 @@ author information than git, an author mapping file can be given to
 hg-fast-export to fix up malformed author strings. The file is
 specified using the -A option. The file should contain lines of the
 form `"<key>"="<value>"`. Inside the key and value strings, all escape
-sequences understood by the python `string_escape` encoding are
+sequences understood by the python `unicode_escape` encoding are
-supported. (Versions of fast-export prior to v171002 had a different
+supported; strings are otherwise assumed to be UTF8-encoded.
-syntax, the old syntax can be enabled by the flag
+(Versions of fast-export prior to v171002 had a different syntax, the
-`--mappings-are-raw`.)
+old syntax can be enabled by the flag `--mappings-are-raw`.)
 The example authors.map below will translate `User
 <garbage<tab><user@example.com>` to `User <user@example.com>`.
@@ -93,6 +92,9 @@ The example authors.map below will translate `User
 -- End of authors.map --
 ```
 If you have many Mercurial repositories, Chris J Billington's
 [hg-export-tool] allows you to batch convert them.
 Tag and Branch Naming
 ---------------------
@@ -129,10 +131,58 @@ is to convert line endings in text files from CRLF to git's preferred LF:
 # $2 = Mercurial's hash of the file
 # $3 = "1" if Mercurial reports the file as binary, otherwise "0"
-if [ "$3" == "1" ]; then cat; else dos2unix; fi
+if [ "$3" == "1" ]; then cat; else dos2unix -q; fi
 # -q option in call to dos2unix allows to avoid returning an
 # error code when handling non-ascii based text files (like UTF-16
 # encoded text files)
 -- End of crlf-filter.sh --
 ```
 Mercurial Largefiles Extension
 ------------------------------
 ### Handling Mercurial Largefiles during Migration
 When migrating from Mercurial to Git, largefiles are exported as ordinary
 files by default. To ensure a successful migration and manage repository
 size, follow the requirements below.
 #### 1. Pre-Export: Ensure File Availability
 Before starting the export, you must have all largefiles from all
 Mercurial commits available locally. Use one of these methods:
 * **For a new clone:** `hg clone --all-largefiles <repo-url>`
 * **For an existing repo:** `hg lfpull --rev "all()"`
 #### 2. Choosing Your LFS Strategy
 If you want your files to be versioned in Git LFS rather than as standard
 Git blobs, you have two primary paths:
 * **[git_lfs_importer plugin](./plugins/git_lfs_importer/README.md)
  (During Conversion)**
  Recommended for large repos. This performs Just-In-Time (JIT) conversion
  by identifying large files during the export and writing LFS pointers
  immediately, skipping the need for a second pass. This also supports
  **incremental conversion**, making it much more efficient for ongoing
  migrations.
 * **[git lfs migrate import](https://github.com/git-lfs/git-lfs/blob/main/docs/man/git-lfs-migrate.adoc)
  (After Conversion)**
  A standard two-step process: first, export the full history from Mercurial
  to Git, then run a separate full history rewrite to move files into LFS.
 ### Why use the git_lfs_importer plugin?
 For "monorepos" or very large repositories (100GiB+), the traditional
 two-step process can take days. By integrating the LFS conversion
 directly into the history export, the plugin eliminates the massive
 time overhead of a secondary history rewrite and allows for incremental
 progress.
 For detailed setup, see the
 [git_lfs_importer](./plugins/git_lfs_importer/README.md)
 plugin documentation.
 Plugins
 -----------------
@@ -163,9 +213,18 @@ defined filter methods in the [dos2unix](./plugins/dos2unix) and
 [branch_name_in_commit](./plugins/branch_name_in_commit) plugins.
 ```
-commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc}
+commit_data = {
  'author': author,
  'branch': branch,
  'committer': 'committer',
  'desc': desc,
  'extra': extra,
  'hg_hash': hg_hash,
  'parents': parents,
  'revision': revision,
 }
-def commit_message_filter(self,commit_data):
+def commit_message_filter(self, commit_data):
 ```
 The `commit_message_filter` method is called for each commit, after parsing
 from hg, but before outputting to git. The dictionary `commit_data` contains the
@@ -174,9 +233,14 @@ values in the dictionary after filters have been run are used to create the git
 commit.
 ```
-file_data = {'filename':filename,'file_ctx':file_ctx,'d':d}
+file_data = {
  'data': file_contents,
  'file_ctx': file_ctx,
  'filename': filename,
  'is_largefile': largefile_status,
 }
-def file_data_filter(self,file_data):
+def file_data_filter(self, file_data):
 ```
 The `file_data_filter` method is called for each file within each commit.
 The dictionary `file_data` contains the above attributes about the file, and
@@ -184,6 +248,17 @@ can be modified by any filter. `file_ctx` is the filecontext from the
 mercurial python library.  After all filters have been run, the values
 are used to add the file to the git commit.
 The `file_data_filter` method is also called when files are deleted,
 but in this case the `data` and `file_ctx` keys map to None. This is
 so that a filter which modifies file names can apply the same name
 transformations when files are deleted.
 The `is_largefile` entry within the `file_data` dictionary will contain
 `True` if the original file was a largefile and has been converted
 to a normal file before the plugins were invoked. In this case, the `file_ctx`
 will still point to the filecontext for the original, unconverted file, while
 `filename` and `data` will contain the already converted information.
 Submodules
 ----------
 See README-SUBMODULES.md for how to convert subrepositories into git
@@ -194,7 +269,15 @@ Notes/Limitations
 hg-fast-export supports multiple branches but only named branches with
 exactly one head each. Otherwise commits to the tip of these heads
-within the branch will get flattened into merge commits.
+within the branch will get flattened into merge commits. There are a
 few options to deal with this:
 1. Chris J Billington's [hg-export-tool] can help you to handle branches with
   duplicate heads.
 2. Use the [head2branch plugin](./plugins/head2branch) to create a new named
   branch from an unnamed head.
 3. You can ignore unnamed heads with the `--ignore-unnamed-heads` option, which
   is appropriate in situations such as the extra heads being close commits
   (abandoned, unmerged changes).
 hg-fast-export will ignore any files or directories tracked by mercurial
 called `.git`, and will print a warning if it encounters one. Git cannot
@@ -213,8 +296,8 @@ possible to use hg-fast-export on remote repositories
 Design
 ------
-hg-fast-export.py was designed in a way that doesn't require a 2-pass
+hg-fast-export was designed in a way that doesn't require a 2-pass
-mechanism or any prior repository analysis: if just feeds what it
+mechanism or any prior repository analysis: it just feeds what it
 finds into git-fast-import. This also implies that it heavily relies
 on strictly linear ordering of changesets from hg, i.e. its
 append-only storage model so that changesets hg-fast-export already
@@ -223,15 +306,37 @@ saw never get modified.
 Submitting Patches
 ------------------
-Please use the [issue-tracker](https://github.com/frej/fast-export) at
+Please create a pull request at
-github to report bugs and submit patches.
+[Github](https://github.com/frej/fast-export/pulls) to submit patches.
-Please read
+When submitting a patch make sure the commits in your pull request:
-[https://chris.beams.io/posts/git-commit/](https://chris.beams.io/posts/git-commit/)
+
-on how to write a good commit message before submitting a pull request
+* Have good commit messages
-for review. Although the article recommends at most 50 characters for
+
-the subject, up to 72 characters are frequently accepted for
+  Please read Chris Beams' blog post [How to Write a Git Commit
-fast-export.
+  Message](https://chris.beams.io/posts/git-commit/) on how to write a
  good commit message. Although the article recommends at most 50
  characters for the subject, up to 72 characters are frequently
  accepted for fast-export.
 * Adhere to good [commit
 hygiene](http://www.ericbmerritt.com/2011/09/21/commit-hygiene-and-git.html)
  When developing a pull request for hg-fast-export, base your work on
  the current `master` branch and rebase your work if it no longer can
  be merged into the current `master` without conflicts. Never merge
  `master` into your development branch, rebase if your work needs
  updates from `master`.
  When a pull request is modified due to review feedback, please
  incorporate the changes into the proper commit. A good reference on
  how to modify history is in the [Pro Git book, Section
  7.6](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History).
 Please do not submit a pull request if you are not willing to spend
 the time required to address review comments or revise the patch until
 it follows the guidelines above. A _take it or leave it_ approach to
 contributing wastes both your and the maintainer's time.
 Frequent Problems
 =================
@@ -274,3 +379,12 @@ Frequent Problems
  By design fast export does not touch your working directory, so to
  git it looks like you have deleted all files, when in fact they have
  never been checked out. Just do a checkout of the branch you want.
 * `Error: repository has at least one unnamed head: hg r<N>`
  By design, hg-fast-export cannot deal with extra heads on a branch.
  There are a few options depending on whether the extra heads are
  in-use/open or normally closed. See [Notes/Limitations](#noteslimitations)
  section for more details.
 [hg-export-tool]: https://github.com/chrisjbillington/hg-export-tool
--- a/hg-fast-export.py
+++ b/hg-fast-export.py
@@ -1,28 +1,21 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
 # Copyright (c) 2025 Siemens
 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
 from mercurial import node
 from mercurial.scmutil import revsymbol
 from hg2git import setup_repo,fixup_user,get_branch,get_changeset
 from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name
 from optparse import OptionParser
 import re
 import sys
 import os
 from binascii import hexlify
 import pluginloader
-
+from hgext.largefiles import lfutil
 if sys.platform == "win32":
  # On Windows, sys.stdout is initially opened in text mode, which means that
  # when a LF (\n) character is written to sys.stdout, it will be converted
  # into CRLF (\r\n).  That makes git blow up, so use this platform-specific
  # code to change the mode of sys.stdout to binary.
  import msvcrt
  msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 # silly regex to catch Signed-off-by lines in log message
-sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$')
+sob_re=re.compile(b'^Signed-[Oo]ff-[Bb]y: (.+)$')
 # insert 'checkpoint' command after this many commits or none at all if 0
 cfg_checkpoint_count=0
 # write some progress message every this many file contents written
@@ -36,63 +29,43 @@ submodule_mappings=None
 auto_sanitize = None
 def gitmode(flags):
-  return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644'
+  return b'l' in flags and b'120000' or b'x' in flags and b'100755' or b'100644'
-def wr_no_nl(msg=''):
+def wr_no_nl(msg=b''):
  assert isinstance(msg, bytes)
  if msg:
-    sys.stdout.write(msg)
+    sys.stdout.buffer.write(msg)
-def wr(msg=''):
+def wr(msg=b''):
-  wr_no_nl(msg)
+  wr_no_nl(msg + b'\n')
  sys.stdout.write('\n')
  #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
 def wr_data(data):
  wr(b'data %d' % (len(data)))
  wr(data)
 def checkpoint(count):
  count=count+1
  if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0:
-    sys.stderr.write("Checkpoint after %d commits\n" % count)
+    sys.stderr.buffer.write(b"Checkpoint after %d commits\n" % count)
-    wr('checkpoint')
+    wr(b'checkpoint')
    wr()
  return count
 def revnum_to_revref(rev, old_marks):
  """Convert an hg revnum to a git-fast-import rev reference (an SHA1
  or a mark)"""
-  return old_marks.get(rev) or ':%d' % (rev+1)
+  return old_marks.get(rev) or b':%d' % (rev+1)
-def file_mismatch(f1,f2):
+def get_filechanges(repo,revision,parents,files):
  """See if two revisions of a file are not equal."""
  return node.hex(f1)!=node.hex(f2)
 def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch):
  """Loop over our repository and find all changed and missing files."""
  for left in dleft.keys():
    right=dright.get(left,None)
    if right==None:
      # we have the file but our parent hasn't: add to left set
      l.append(left)
    elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)):
      # we have it but checksums mismatch: add to center set
      c.append(left)
  for right in dright.keys():
    left=dleft.get(right,None)
    if left==None:
      # if parent has file but we don't: add to right set
      r.append(right)
    # change is already handled when comparing child against parent
  return l,c,r
 def get_filechanges(repo,revision,parents,mleft):
  """Given some repository and revision, find all changed/deleted files."""
-  l,c,r=[],[],[]
+  if not parents:
-  for p in parents:
+    # first revision: feed in full manifest
-    if p<0: continue
+    return files,[]
-    mright=revsymbol(repo,str(p)).manifest()
+  else:
-    l,c,r=split_dict(mleft,mright,l,c,r)
+    # take the changes from the first parent
-  l.sort()
+    f=repo.status(parents[0],revision)
-  c.sort()
+    return f.modified+f.added,f.removed
  r.sort()
  return l,c,r
 def get_author(logmessage,committer,authors):
  """As git distincts between author and committer of a patch, try to
@@ -110,7 +83,7 @@ def get_author(logmessage,committer,authors):
  "Signed-off-by: foo" and thus matching our detection regex. Prevent
  that."""
-  loglines=logmessage.split('\n')
+  loglines=logmessage.split(b'\n')
  i=len(loglines)
  # from tail walk to top skipping empty lines
  while i>=0:
@@ -138,23 +111,23 @@ def remove_gitmodules(ctx):
  # be to only remove the submodules of the first parent.
  for parent_ctx in ctx.parents():
    for submodule in parent_ctx.substate.keys():
-      wr('D %s' % submodule)
+      wr(b'D %s' % submodule)
-  wr('D .gitmodules')
+  wr(b'D .gitmodules')
 def refresh_git_submodule(name,subrepo_info):
-  wr('M 160000 %s %s' % (subrepo_info[1],name))
+  wr(b'M 160000 %s %s' % (subrepo_info[1],name))
-  sys.stderr.write("Adding/updating submodule %s, revision %s\n"
+  sys.stderr.buffer.write(
-                   % (name,subrepo_info[1]))
+    b"Adding/updating submodule %s, revision %s\n" % (name, subrepo_info[1])
-  return '[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name,
+  )
-    subrepo_info[0])
+  return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name, name, subrepo_info[0])
 def refresh_hg_submodule(name,subrepo_info):
-  gitRepoLocation=submodule_mappings[name] + "/.git"
+  gitRepoLocation=submodule_mappings[name] + b"/.git"
  # Populate the cache to map mercurial revision to git revision
  if not name in subrepo_cache:
-    subrepo_cache[name]=(load_cache(gitRepoLocation+"/hg2git-mapping"),
+    subrepo_cache[name]=(load_cache(gitRepoLocation+b"/hg2git-mapping"),
-                         load_cache(gitRepoLocation+"/hg2git-marks",
+                         load_cache(gitRepoLocation+b"/hg2git-marks",
                                    lambda s: int(s)-1))
  (mapping_cache,marks_cache)=subrepo_cache[name]
@@ -162,71 +135,110 @@ def refresh_hg_submodule(name,subrepo_info):
  if subrepo_hash in mapping_cache:
    revnum=mapping_cache[subrepo_hash]
    gitSha=marks_cache[int(revnum)]
-    wr('M 160000 %s %s' % (gitSha,name))
+    wr(b'M 160000 %s %s' % (gitSha,name))
-    sys.stderr.write("Adding/updating submodule %s, revision %s->%s\n"
+    sys.stderr.buffer.write(
-                     % (name,subrepo_hash,gitSha))
+      b"Adding/updating submodule %s, revision %s->%s\n"
-    return '[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name,
+      % (name, subrepo_hash, gitSha)
    )
    return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name,
      submodule_mappings[name])
  else:
-    sys.stderr.write("Warning: Could not find hg revision %s for %s in git %s\n" %
+    sys.stderr.buffer.write(
-      (subrepo_hash,name,gitRepoLocation))
+      b"Warning: Could not find hg revision %s for %s in git %s\n"
-    return ''
+      % (subrepo_hash, name, gitRepoLocation,)
    )
    return b''
 def refresh_gitmodules(ctx):
  """Updates list of ctx submodules according to .hgsubstate file"""
  remove_gitmodules(ctx)
-  gitmodules=""
+  gitmodules=b""
  # Create the .gitmodules file and all submodules
  for name,subrepo_info in ctx.substate.items():
-    if subrepo_info[2]=='git':
+    if subrepo_info[2]==b'git':
      gitmodules+=refresh_git_submodule(name,subrepo_info)
    elif submodule_mappings and name in submodule_mappings:
      gitmodules+=refresh_hg_submodule(name,subrepo_info)
  if len(gitmodules):
-    wr('M 100644 inline .gitmodules')
+    wr(b'M 100644 inline .gitmodules')
-    wr('data %d' % (len(gitmodules)+1))
+    wr_data(gitmodules)
-    wr(gitmodules)
+
 def is_largefile(filename):
  return filename[:6] == b'.hglf/'
 def largefile_orig_name(filename):
  return filename[6:]
 def largefile_data(ctx, file, filename):
  lf_file_ctx=ctx.filectx(file)
  lf_hash=lf_file_ctx.data().strip(b'\n')
  sys.stderr.write("Detected large file hash %s\n" % lf_hash.decode())
  #should detect where the large files are located
  file_with_data = lfutil.findfile(ctx.repo(), lf_hash)
  if file_with_data is None:
    # Autodownloading from the mercurial repository would be an issue as there
    # is a good chance that we may need to input some username and password.
    # This will surely break fast-export as there will be some unexpected
    # output.
    sys.stderr.write("Large file wasn't found in local cache.\n")
    sys.stderr.write("Please clone with --all-largefiles\n")
    sys.stderr.write("or pull all large files with 'hg lfpull --rev "
            "\"all()\"'\n")
    # closing in the middle of import will revert everything to the last checkpoint
    sys.exit(3)
  with open(os.path.normpath(file_with_data), 'rb') as file_with_data_handle:
    return file_with_data_handle.read()
 def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}):
  count=0
  max=len(files)
  is_submodules_refreshed=False
  for file in files:
-    if not is_submodules_refreshed and (file=='.hgsub' or file=='.hgsubstate'):
+    if not is_submodules_refreshed and (file==b'.hgsub' or file==b'.hgsubstate'):
      is_submodules_refreshed=True
      refresh_gitmodules(ctx)
    # Skip .hgtags files. They only get us in trouble.
-    if not hgtags and file == ".hgtags":
+    if not hgtags and file == b".hgtags":
-      sys.stderr.write('Skip %s\n' % (file))
+      sys.stderr.buffer.write(b'Skip %s\n' % file)
      continue
    if encoding:
      filename=file.decode(encoding).encode('utf8')
    else:
      filename=file
-    if '.git' in filename.split(os.path.sep):
+    if b'.git' in filename.split(b'/'): # Even on Windows, the path separator is / here.
-      sys.stderr.write('Ignoring file %s which cannot be tracked by git\n' % filename)
+      sys.stderr.buffer.write(
        b'Ignoring file %s which cannot be tracked by git\n' % filename
      )
      continue
    largefile = False
    file_ctx=ctx.filectx(file)
    if is_largefile(filename):
      largefile = True
      filename = largefile_orig_name(filename)
      d = largefile_data(ctx, file, filename)
    else:
      d=file_ctx.data()
    if plugins and plugins['file_data_filters']:
-      file_data = {'filename':filename,'file_ctx':file_ctx,'data':d}
+      file_data = {'filename':filename,'file_ctx':file_ctx,'data':d, 'is_largefile':largefile}
      for filter in plugins['file_data_filters']:
        filter(file_data)
      d=file_data['data']
      filename=file_data['filename']
      file_ctx=file_data['file_ctx']
-    wr('M %s inline %s' % (gitmode(manifest.flags(file)),
+    if d is not None:
      wr(b'M %s inline %s' % (gitmode(manifest.flags(file)),
                             strip_leading_slash(filename)))
-    wr('data %d' % len(d)) # had some trouble with size()
+      wr(b'data %d' % len(d)) # had some trouble with size()
      wr(d)
      count+=1
      if count%cfg_export_boundary==0:
-      sys.stderr.write('Exported %d/%d files\n' % (count,max))
+        sys.stderr.buffer.write(b'Exported %d/%d files\n' % (count,max))
  if max>cfg_export_boundary:
-    sys.stderr.write('Exported %d/%d files\n' % (count,max))
+    sys.stderr.buffer.write(b'Exported %d/%d files\n' % (count,max))
 def sanitize_name(name,what="branch", mapping={}):
  """Sanitize input roughly according to git-check-ref-format(1)"""
@@ -246,164 +258,172 @@ def sanitize_name(name,what="branch", mapping={}):
  def dot(name):
    if not name: return name
-    if name[0] == '.': return '_'+name[1:]
+    if name[0:1] == b'.': return b'_'+name[1:]
    return name
  if not auto_sanitize:
    return mapping.get(name,name)
  n=mapping.get(name,name)
-  p=re.compile('([[ ~^:?\\\\*]|\.\.)')
+  p=re.compile(b'([\\[ ~^:?\\\\*]|\\.\\.)')
-  n=p.sub('_', n)
+  n=p.sub(b'_', n)
-  if n[-1] in ('/', '.'): n=n[:-1]+'_'
+  if n[-1:] in (b'/', b'.'): n=n[:-1]+b'_'
-  n='/'.join(map(dot,n.split('/')))
+  n=b'/'.join([dot(s) for s in n.split(b'/')])
-  p=re.compile('_+')
+  p=re.compile(b'_+')
-  n=p.sub('_', n)
+  n=p.sub(b'_', n)
  if n!=name:
-    sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n))
+    sys.stderr.buffer.write(
      b'Warning: sanitized %s [%s] to [%s]\n' % (what.encode(), name, n)
    )
  return n
 def strip_leading_slash(filename):
-  if filename[0] == '/':
+  if filename[0:1] == b'/':
    return filename[1:]
  return filename
 def export_commit(ui,repo,revision,old_marks,max,count,authors,
                  branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',
-                  plugins={}):
+                  first_commit_hash="",plugins={}):
  def get_branchname(name):
-    if brmap.has_key(name):
+    if name in brmap:
      return brmap[name]
    n=sanitize_name(name, "branch", branchesmap)
    brmap[name]=n
    return n
-  (revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors,encoding)
+  ctx=repo[revision]
-  if repo[revnode].hidden():
+
  if ctx.hidden():
    return count
  (_,user,(time,timezone),files,desc,branch,extra)=get_changeset(ui,repo,revision,authors,encoding)
  branch=get_branchname(branch)
  parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
  author = get_author(desc,user,authors)
  hg_hash=ctx.hex()
  if plugins and plugins['commit_message_filters']:
-    commit_data = {'branch': branch, 'parents': parents, 'author': author, 'desc': desc}
+    commit_data = {'branch': branch, 'parents': parents,
                   'author': author, 'desc': desc,
                   'revision': revision, 'hg_hash': hg_hash,
                   'committer': user, 'extra': extra}
    for filter in plugins['commit_message_filters']:
      filter(commit_data)
    branch = commit_data['branch']
    parents = commit_data['parents']
    author = commit_data['author']
    user = commit_data['committer']
    desc = commit_data['desc']
  if len(parents)==0 and revision != 0:
-    wr('reset refs/heads/%s' % branch)
+    wr(b'reset refs/heads/%s' % branch)
-  wr('commit refs/heads/%s' % branch)
+  wr(b'commit refs/heads/%s' % branch)
-  wr('mark :%d' % (revision+1))
+  wr(b'mark :%d' % (revision+1))
  if sob:
-    wr('author %s %d %s' % (author,time,timezone))
+    wr(b'author %s %d %s' % (author,time,timezone))
-  wr('committer %s %d %s' % (user,time,timezone))
+  wr(b'committer %s %d %s' % (user,time,timezone))
-  wr('data %d' % (len(desc)+1)) # wtf?
+  wr_data(desc + b'\n')
  wr(desc)
  wr()
  ctx=revsymbol(repo,str(revision))
  man=ctx.manifest()
  added,changed,removed,type=[],[],[],''
-  if len(parents) == 0:
+  if not parents:
    # first revision: feed in full manifest
    added=man.keys()
    added.sort()
    type='full'
    if revision == 0 and first_commit_hash:
      wr(b'from %s' % first_commit_hash.encode())
      type='simple delta'
  else:
-    wr('from %s' % revnum_to_revref(parents[0], old_marks))
+    wr(b'from %s' % revnum_to_revref(parents[0], old_marks))
    if len(parents) == 1:
      # later non-merge revision: feed in changed manifest
      # if we have exactly one parent, just take the changes from the
      # manifest without expensively comparing checksums
      f=repo.status(parents[0],revnode)
      added,changed,removed=f.added,f.modified,f.removed
      type='simple delta'
    else: # a merge with two parents
-      wr('merge %s' % revnum_to_revref(parents[1], old_marks))
+      wr(b'merge %s' % revnum_to_revref(parents[1], old_marks))
      # later merge revision: feed in changed manifest
      # for many files comparing checksums is expensive so only do it for
      # merges where we really need it due to hg's revlog logic
      added,changed,removed=get_filechanges(repo,revision,parents,man)
      type='thorough delta'
-  sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
+  modified,removed=get_filechanges(repo,revision,parents,files)
      (branch,type,revision+1,max,len(added),len(changed),len(removed)))
-  for filename in removed:
+  sys.stderr.buffer.write(
    b'%s: Exporting %s revision %d/%d with %d/%d modified/removed files\n'
    % (branch, type.encode(), revision + 1, max, len(modified), len(removed))
  )
  for file in removed:
    if fn_encoding:
-      filename=filename.decode(fn_encoding).encode('utf8')
+      filename=file.decode(fn_encoding).encode('utf8')
-    filename=strip_leading_slash(filename)
+    else:
-    if filename=='.hgsub':
+      filename=file
      remove_gitmodules(ctx)
    wr('D %s' % filename)
-  export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins)
+    if plugins and plugins['file_data_filters']:
-  export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins)
+      file_data = {'filename':filename, 'file_ctx':None, 'data':None}
      for filter in plugins['file_data_filters']:
        filter(file_data)
      filename=file_data['filename']
    filename=strip_leading_slash(filename)
    if filename==b'.hgsub':
      remove_gitmodules(ctx)
    if is_largefile(filename):
      filename=largefile_orig_name(filename)
    wr(b'D %s' % filename)
  export_file_contents(ctx,man,modified,hgtags,fn_encoding,plugins)
  wr()
  return checkpoint(count)
 def export_note(ui,repo,revision,count,authors,encoding,is_first):
-  (revnode,_,user,(time,timezone),_,_,_,_)=get_changeset(ui,repo,revision,authors,encoding)
+  ctx = repo[revision]
-  if repo[revnode].hidden():
+
  if ctx.hidden():
    return count
-  parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
+  (_,user,(time,timezone),_,_,_,_)=get_changeset(ui,repo,revision,authors,encoding)
-  wr('commit refs/notes/hg')
+  wr(b'commit refs/notes/hg')
-  wr('committer %s %d %s' % (user,time,timezone))
+  wr(b'committer %s %d %s' % (user,time,timezone))
-  wr('data 0')
+  wr(b'data 0')
  if is_first:
-    wr('from refs/notes/hg^0')
+    wr(b'from refs/notes/hg^0')
-  wr('N inline :%d' % (revision+1))
+  wr(b'N inline :%d' % (revision+1))
-  hg_hash=revsymbol(repo,str(revision)).hex()
+  hg_hash=ctx.hex()
-  wr('data %d' % (len(hg_hash)))
+  wr_data(hg_hash)
  wr_no_nl(hg_hash)
  wr()
  return checkpoint(count)
  wr('data %d' % (len(desc)+1)) # wtf?
  wr(desc)
  wr()
 def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap):
  l=repo.tagslist()
  for tag,node in l:
    # Remap the branch name
    tag=sanitize_name(tag,"tag",tagsmap)
    # ignore latest revision
-    if tag=='tip': continue
+    if tag==b'tip': continue
    # ignore tags to nodes that are missing (ie, 'in the future')
-    if node.encode('hex_codec') not in mapping_cache:
+    if hexlify(node) not in mapping_cache:
-      sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec')))
+      sys.stderr.buffer.write(b'Tag %s refers to unseen node %s\n' % (tag, hexlify(node)))
      continue
-    rev=int(mapping_cache[node.encode('hex_codec')])
+    rev=int(mapping_cache[hexlify(node)])
    ref=revnum_to_revref(rev, old_marks)
    if ref==None:
-      sys.stderr.write('Failed to find reference for creating tag'
+      sys.stderr.buffer.write(
-          ' %s at r%d\n' % (tag,rev))
+        b'Failed to find reference for creating tag %s at r%d\n' % (tag, rev)
      )
      continue
-    sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref))
+    sys.stderr.buffer.write(b'Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag, rev, ref))
-    wr('reset refs/tags/%s' % tag)
+    wr(b'reset refs/tags/%s' % tag)
-    wr('from %s' % ref)
+    wr(b'from %s' % ref)
    wr()
    count=checkpoint(count)
  return count
 def load_mapping(name, filename, mapping_is_raw):
-  raw_regexp=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
+  raw_regexp=re.compile(b'^([^=]+)[ ]*=[ ]*(.+)$')
-  string_regexp='"(((\\.)|(\\")|[^"])*)"'
+  string_regexp=b'"(((\\.)|(\\")|[^"])*)"'
-  quoted_regexp=re.compile('^'+string_regexp+'[ ]*=[ ]*'+string_regexp+'$')
+  quoted_regexp=re.compile(b'^'+string_regexp+b'[ ]*=[ ]*'+string_regexp+b'$')
  def parse_raw_line(line):
    m=raw_regexp.match(line)
@@ -411,26 +431,40 @@ def load_mapping(name, filename, mapping_is_raw):
      return None
    return (m.group(1).strip(), m.group(2).strip())
  def process_unicode_escape_sequences(s):
    # Replace unicode escape sequences in the otherwise UTF8-encoded bytestring s with
    # the UTF8-encoded characters they represent. We need to do an additional
    # .decode('utf8').encode('ascii', 'backslashreplace') to convert any non-ascii
    # characters into their escape sequences so that the subsequent
    # .decode('unicode-escape') succeeds:
    return (
      s.decode('utf8')
      .encode('ascii', 'backslashreplace')
      .decode('unicode-escape')
      .encode('utf8')
    )
  def parse_quoted_line(line):
    m=quoted_regexp.match(line)
    if m==None:
-      return None
+      return
-    return (m.group(1).decode('string_escape'),
+
-            m.group(5).decode('string_escape'))
+    return (process_unicode_escape_sequences(m.group(1)),
            process_unicode_escape_sequences(m.group(5)))
  cache={}
  if not os.path.exists(filename):
    sys.stderr.write('Could not open mapping file [%s]\n' % (filename))
    return cache
-  f=open(filename,'r')
+  f=open(filename,'rb')
  l=0
  a=0
  for line in f.readlines():
    l+=1
    line=line.strip()
-    if l==1 and line[0]=='#' and line=='# quoted-escaped-strings':
+    if l==1 and line[0:1]==b'#' and line==b'# quoted-escaped-strings':
      continue
-    elif line=='' or line[0]=='#':
+    elif line==b'' or line[0:1]==b'#':
      continue
    m=parse_raw_line(line) if mapping_is_raw else parse_quoted_line(line)
    if m==None:
@@ -452,9 +486,11 @@ def branchtip(repo, heads):
      break
  return tip
-def verify_heads(ui,repo,cache,force,branchesmap):
+def verify_heads(ui,repo,cache,force,ignore_unnamed_heads,branchesmap):
  branches={}
-  for bn, heads in repo.branchmap().iteritems():
+
  for bn in repo.branchmap():
    heads = repo.branchmap().branchheads(bn)
    branches[bn] = branchtip(repo, heads)
  l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
  l.sort()
@@ -465,26 +501,38 @@ def verify_heads(ui,repo,cache,force,branchesmap):
    sanitized_name=sanitize_name(b,"branch",branchesmap)
    sha1=get_git_sha1(sanitized_name)
    c=cache.get(sanitized_name)
-    if sha1!=c:
+    if not c and sha1:
-      sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:'
+      sys.stderr.buffer.write(
-        '\n%s (repo) != %s (cache)\n' % (b,sha1,c))
+        b'Error: Branch [%s] already exists and was not created by hg-fast-export, '
        b'export would overwrite unrelated branch\n' % b)
      if not force: return False
    elif sha1!=c:
      sys.stderr.buffer.write(
        b'Error: Branch [%s] modified outside hg-fast-export:'
        b'\n%s (repo) != %s (cache)\n' % (b, b'<None>' if sha1 is None else sha1, c)
      )
      if not force: return False
  # verify that branch has exactly one head
  t={}
-  for h in repo.filtered('visible').heads():
+  unnamed_heads=False
-    (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h)
+  for h in repo.filtered(b'visible').heads():
    branch=get_branch(repo[h].branch())
    if t.get(branch,False):
-      sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' %
+      sys.stderr.buffer.write(
-          repo.changelog.rev(h))
+        b'Error: repository has an unnamed head: hg r%d\n'
-      if not force: return False
+        % repo.changelog.rev(h)
      )
      unnamed_heads=True
      if not force and not ignore_unnamed_heads: return False
    t[branch]=True
-
+  if unnamed_heads and not force and not ignore_unnamed_heads: return False
  return True
 def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
           authors={},branchesmap={},tagsmap={},
-           sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='',
+           sob=False,force=False,ignore_unnamed_heads=False,hgtags=False,
           notes=False,encoding='',fn_encoding='',first_commit_hash='',
           plugins={}):
  def check_cache(filename, contents):
    if len(contents) == 0:
@@ -500,12 +548,12 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
  if len(state_cache) != 0:
    for (name, data) in [(marksfile, old_marks),
                         (mappingfile, mapping_cache),
-                         (headsfile, state_cache)]:
+                         (headsfile, heads_cache)]:
      check_cache(name, data)
  ui,repo=setup_repo(repourl)
-  if not verify_heads(ui,repo,heads_cache,force,branchesmap):
+  if not verify_heads(ui,repo,heads_cache,force,ignore_unnamed_heads,branchesmap):
    return 1
  try:
@@ -513,26 +561,26 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
  except AttributeError:
    tip=len(repo)
-  min=int(state_cache.get('tip',0))
+  min=int(state_cache.get(b'tip',0))
  max=_max
  if _max<0 or max>tip:
    max=tip
  for rev in range(0,max):
-  	(revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors)
+    ctx=repo[rev]
-  	if repo[revnode].hidden():
+    if ctx.hidden():
      continue
-  	mapping_cache[revnode.encode('hex_codec')] = str(rev)
+    mapping_cache[ctx.hex()] = b"%d" % rev
  if submodule_mappings:
-    # Make sure that all submodules are registered in the submodule-mappings file
+    # Make sure that all mercurial submodules are registered in the submodule-mappings file
    for rev in range(0,max):
-      ctx=revsymbol(repo,str(rev))
+      ctx=repo[rev]
      if ctx.hidden():
        continue
      if ctx.substate:
        for key in ctx.substate:
-          if key not in submodule_mappings:
+          if ctx.substate[key][2]=='hg' and key not in submodule_mappings:
            sys.stderr.write("Error: %s not found in submodule-mappings\n" % (key))
            return 1
@@ -540,14 +588,14 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
  brmap={}
  for rev in range(min,max):
    c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
-                    sob,brmap,hgtags,encoding,fn_encoding,
+                    sob,brmap,hgtags,encoding,fn_encoding,first_commit_hash,
                    plugins)
  if notes:
    for rev in range(min,max):
      c=export_note(ui,repo,rev,c,authors, encoding, rev == min and min != 0)
-  state_cache['tip']=max
+  state_cache[b'tip']=max
-  state_cache['repo']=repourl
+  state_cache[b'repo']=repourl
  save_cache(tipfile,state_cache)
  save_cache(mappingfile,mapping_cache)
@@ -591,7 +639,9 @@ if __name__=='__main__':
  parser.add_option("-T","--tags",dest="tagsfile",
      help="Read tags map from TAGSFILE")
  parser.add_option("-f","--force",action="store_true",dest="force",
-      default=False,help="Ignore validation errors by force")
+      default=False,help="Ignore validation errors by force, implies --ignore-unnamed-heads")
  parser.add_option("--ignore-unnamed-heads",action="store_true",dest="ignore_unnamed_heads",
      default=False,help="Ignore unnamed head errors")
  parser.add_option("-M","--default-branch",dest="default_branch",
      help="Set the default branch")
  parser.add_option("-o","--origin",dest="origin_name",
@@ -612,6 +662,8 @@ if __name__=='__main__':
      help="Add a plugin with the given init string <name=init>")
  parser.add_option("--subrepo-map", type="string", dest="subrepo_map",
      help="Provide a mapping file between the subrepository name and the submodule name")
  parser.add_option("--first-commit-hash", type="string", dest="first_commit_hash",
      help="Allow importing into an existing git repository by specifying the hash of the first commit")
  (options,args)=parser.parse_args()
@@ -687,6 +739,9 @@ if __name__=='__main__':
  sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,
                  options.headsfile, options.statusfile,
                  authors=a,branchesmap=b,tagsmap=t,
-                  sob=options.sob,force=options.force,hgtags=options.hgtags,
+                  sob=options.sob,force=options.force,
                  ignore_unnamed_heads=options.ignore_unnamed_heads,
                  hgtags=options.hgtags,
                  notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,
                  first_commit_hash=options.first_commit_hash,
                  plugins=plugins_dict))
--- a/hg-fast-export.sh
+++ b/hg-fast-export.sh
@@ -28,29 +28,32 @@ SFX_STATE="state"
 GFI_OPTS=""
 if [ -z "${PYTHON}" ]; then
-    # $PYTHON is not set, so we try to find a working python 2.7 to
+    # $PYTHON is not set, so we try to find a working python with mercurial:
-    # use. PEP 394 tells us to use 'python2', otherwise try plain
+    for python_cmd in python3 python; do
-    # 'python'.
+        if command -v $python_cmd > /dev/null; then
-    if command -v python2 > /dev/null; then
+            $python_cmd -c 'from mercurial.scmutil import revsymbol' 2> /dev/null
-	PYTHON="python2"
+            if [ $? -eq 0 ]; then
-    elif command -v python > /dev/null; then
+                PYTHON=$python_cmd
-	PYTHON="python"
+                break
    else
        echo "Could not find any python interpreter, please use the 'PYTHON'" \
 	     "environment variable to specify the interpreter to use."
        exit 1
            fi
        fi
    done
 fi
-
+if [ -z "${PYTHON}" ]; then
-# Check that the python specified by the user or autodetected above is
+    echo "Could not find a python interpreter with the mercurial module >= 4.6 available. " \
-# >= 2.7 and < 3.
+        "Please use the 'PYTHON' environment variable to specify the interpreter to use."
 if ! ${PYTHON} -c 'import sys; v=sys.version_info; exit(0 if v.major == 2 and v.minor >= 7 else 1)' > /dev/null 2>&1 ; then
    echo "${PYTHON} is not a working python 2.7 interpreter, please use the" \
 	 "'PYTHON' environment variable to specify the interpreter to use."
    exit 1
 fi
-USAGE="[--quiet] [-r <repo>] [--force] [-m <max>] [-s] [--hgtags] [-A <file>] [-B <file>] [-T <file>] [-M <name>] [-o <name>] [--hg-hash] [-e <encoding>]"
+"${PYTHON}" -c 'import sys; exit(sys.version_info.major==3 and sys.version_info.minor >= 7)'
 if [ $? -eq 0 ]; then
    echo "Could not find an interpreter for a supported Python version (>= 3.7)" \
        "Please use the 'PYTHON' environment variable to specify the interpreter to use."
    exit 1
 fi
 USAGE="[--quiet] [-r <repo>] [--force] [--ignore-unnamed-heads] [-m <max>] [-s] [--hgtags] [-A <file>] [-B <file>] [-T <file>] [-M <name>] [-o <name>] [--hg-hash] [-e <encoding>]"
 LONG_USAGE="Import hg repository <repo> up to either tip or <max>
 If <repo> is omitted, use last hg repository as obtained from state file,
 GIT_DIR/$PFX-$SFX_STATE by default.
@@ -84,6 +87,8 @@ Options:
 	                         with <file-path> <hg-hash> <is-binary> as arguments
 	--plugin <plugin=init>  Add a plugin with the given init string (repeatable)
 	--plugin-path <plugin-path> Add an additional plugin lookup path
  --first-commit-hash <git-commit-hash>  Use the given git commit hash as the
                                          first commit's parent (for grafting)
 "
 case "$1" in
    -h|--help)
@@ -91,6 +96,14 @@ case "$1" in
      echo ""
      echo "$LONG_USAGE"
      exit 0
      ;;
    --debug)
      echo -n "Using Python: "
      "${PYTHON}" --version
      echo -n "Using Mercurial: "
      hg --version
      exit 0
 esac
 IS_BARE=$(git rev-parse --is-bare-repository) \
--- a/hg-reset.py
+++ b/hg-reset.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
 # License: GPLv2
@@ -7,6 +7,7 @@ from mercurial import node
 from hg2git import setup_repo,load_cache,get_changeset,get_git_sha1
 from optparse import OptionParser
 import sys
 from binascii import hexlify
 def heads(ui,repo,start=None,stop=None,max=None):
  # this is copied from mercurial/revlog.py and differs only in
@@ -24,7 +25,7 @@ def heads(ui,repo,start=None,stop=None,max=None):
  heads = {startrev: 1}
  parentrevs = repo.changelog.parentrevs
-  for r in xrange(startrev + 1, max):
+  for r in range(startrev + 1, max):
    for p in parentrevs(r):
      if p in reachable:
        if r not in stoprevs:
@@ -33,7 +34,7 @@ def heads(ui,repo,start=None,stop=None,max=None):
      if p in heads and p not in stoprevs:
        del heads[p]
-  return [(repo.changelog.node(r),str(r)) for r in heads]
+  return [(repo.changelog.node(r), b"%d" % r) for r in heads]
 def get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,max):
  h=heads(ui,repo,max=max)
@@ -44,11 +45,11 @@ def get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,max):
    _,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev)
    del stale[branch]
    git_sha1=get_git_sha1(branch)
-    cache_sha1=marks_cache.get(str(int(rev)+1))
+    cache_sha1=marks_cache.get(b"%d" % (int(rev)+1))
    if git_sha1!=None and git_sha1==cache_sha1:
-      unchanged.append([branch,cache_sha1,rev,desc.split('\n')[0],user])
+      unchanged.append([branch,cache_sha1,rev,desc.split(b'\n')[0],user])
    else:
-      changed.append([branch,cache_sha1,rev,desc.split('\n')[0],user])
+      changed.append([branch,cache_sha1,rev,desc.split(b'\n')[0],user])
  changed.sort()
  unchanged.sort()
  return stale,changed,unchanged
@@ -57,20 +58,20 @@ def get_tags(ui,repo,marks_cache,mapping_cache,max):
  l=repo.tagslist()
  good,bad=[],[]
  for tag,node in l:
-    if tag=='tip': continue
+    if tag==b'tip': continue
-    rev=int(mapping_cache[node.encode('hex_codec')])
+    rev=int(mapping_cache[hexlify(node)])
-    cache_sha1=marks_cache.get(str(int(rev)+1))
+    cache_sha1=marks_cache.get(b"%d" % (int(rev)+1))
    _,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev)
    if int(rev)>int(max):
-      bad.append([tag,branch,cache_sha1,rev,desc.split('\n')[0],user])
+      bad.append([tag,branch,cache_sha1,rev,desc.split(b'\n')[0],user])
    else:
-      good.append([tag,branch,cache_sha1,rev,desc.split('\n')[0],user])
+      good.append([tag,branch,cache_sha1,rev,desc.split(b'\n')[0],user])
  good.sort()
  bad.sort()
  return good,bad
 def mangle_mark(mark):
-  return str(int(mark)-1)
+  return b"%d" % (int(mark)-1)
 if __name__=='__main__':
  def bail(parser,opt):
@@ -107,7 +108,7 @@ if __name__=='__main__':
  state_cache=load_cache(options.statusfile)
  mapping_cache = load_cache(options.mappingfile)
-  l=int(state_cache.get('tip',options.revision))
+  l=int(state_cache.get(b'tip',options.revision))
  if options.revision+1>l:
    sys.stderr.write('Revision is beyond last revision imported: %d>%d\n' % (options.revision,l))
    sys.exit(1)
@@ -117,19 +118,39 @@ if __name__=='__main__':
  stale,changed,unchanged=get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,options.revision+1)
  good,bad=get_tags(ui,repo,marks_cache,mapping_cache,options.revision+1)
-  print "Possibly stale branches:"
+  print("Possibly stale branches:")
-  map(lambda b: sys.stdout.write('\t%s\n' % b),stale.keys())
+  for b in stale:
    sys.stdout.write('\t%s\n' % b.decode('utf8'))
-  print "Possibly stale tags:"
+  print("Possibly stale tags:")
-  map(lambda b: sys.stdout.write('\t%s on %s (r%s)\n' % (b[0],b[1],b[3])),bad)
+  for b in bad:
    sys.stdout.write(
      '\t%s on %s (r%s)\n'
      % (b[0].decode('utf8'), b[1].decode('utf8'), b[3].decode('utf8'))
    )
-  print "Unchanged branches:"
+  print("Unchanged branches:")
-  map(lambda b: sys.stdout.write('\t%s (r%s)\n' % (b[0],b[2])),unchanged)
+  for b in unchanged:
    sys.stdout.write('\t%s (r%s)\n' % (b[0].decode('utf8'),b[2].decode('utf8')))
-  print "Unchanged tags:"
+  print("Unchanged tags:")
-  map(lambda b: sys.stdout.write('\t%s on %s (r%s)\n' % (b[0],b[1],b[3])),good)
+  for b in good:
    sys.stdout.write(
      '\t%s on %s (r%s)\n'
      % (b[0].decode('utf8'), b[1].decode('utf8'), b[3].decode('utf8'))
    )
-  print "Reset branches in '%s' to:" % options.headsfile
+  print("Reset branches in '%s' to:" % options.headsfile)
-  map(lambda b: sys.stdout.write('\t:%s %s\n\t\t(r%s: %s: %s)\n' % (b[0],b[1],b[2],b[4],b[3])),changed)
+  for b in changed:
    sys.stdout.write(
      '\t:%s %s\n\t\t(r%s: %s: %s)\n'
      % (
        b[0].decode('utf8'),
        b[1].decode('utf8'),
        b[2].decode('utf8'),
        b[4].decode('utf8'),
        b[3].decode('utf8'),
      )
    )
-  print "Reset ':tip' in '%s' to '%d'" % (options.statusfile,options.revision)
+  print("Reset ':tip' in '%s' to '%d'" % (options.statusfile,options.revision))
--- a/hg-reset.sh
+++ b/hg-reset.sh
@@ -11,7 +11,24 @@ SFX_MAPPING="mapping"
 SFX_HEADS="heads"
 SFX_STATE="state"
 QUIET=""
-PYTHON=${PYTHON:-python}
+
 if [ -z "${PYTHON}" ]; then
    # $PYTHON is not set, so we try to find a working python with mercurial:
    for python_cmd in python2 python python3; do
        if command -v $python_cmd > /dev/null; then
            $python_cmd -c 'import mercurial' 2> /dev/null
            if [ $? -eq 0 ]; then
                PYTHON=$python_cmd
                break
            fi
        fi
    done
 fi
 if [ -z "${PYTHON}" ]; then
    echo "Could not find a python interpreter with the mercurial module available. " \
        "Please use the 'PYTHON'environment variable to specify the interpreter to use."
    exit 1
 fi
 USAGE="[-r <repo>] -R <rev>"
 LONG_USAGE="Print SHA1s of latest changes per branch up to <rev> useful
--- a/hg2git.py
+++ b/hg2git.py
@@ -1,11 +1,11 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 # Copyright (c) 2007, 2008 Rocco Rutte <pdmef@gmx.net> and others.
 # License: MIT <http://www.opensource.org/licenses/mit-license.php>
 from mercurial import hg,util,ui,templatefilters
 from mercurial import error as hgerror
-from mercurial.scmutil import revsymbol,binnode
+from mercurial.scmutil import binnode
 import re
 import os
@@ -13,47 +13,55 @@ import sys
 import subprocess
 # default git branch name
-cfg_master='master'
+cfg_master=b'master'
 # default origin name
-origin_name=''
+origin_name=b''
 # silly regex to see if user field has email address
-user_re=re.compile('([^<]+) (<[^>]*>)$')
+user_re=re.compile(b'([^<]+) (<[^>]*>)$')
 # silly regex to clean out user names
-user_clean_re=re.compile('^["]([^"]+)["]$')
+user_clean_re=re.compile(b'^["]([^"]+)["]$')
 def set_default_branch(name):
  global cfg_master
-  cfg_master = name
+  cfg_master = name.encode('utf8')
 def set_origin_name(name):
  global origin_name
-  origin_name = name
+  origin_name = name.encode('utf8')
 def setup_repo(url):
  try:
    # Mercurial >= 7.2 requires explicit initialization for largefile
    # support to work.
    from mercurial import initialization
    initialization.init()
  except ImportError:
    pass
  try:
    myui=ui.ui(interactive=False)
  except TypeError:
    myui=ui.ui()
-    myui.setconfig('ui', 'interactive', 'off')
+    myui.setconfig(b'ui', b'interactive', b'off')
    # Avoids a warning when the repository has obsolete markers
-    myui.setconfig('experimental', 'evolution.createmarkers', True)
+    myui.setconfig(b'experimental', b'evolution.createmarkers', True)
-  return myui,hg.repository(myui,url).unfiltered()
+  return myui,hg.repository(myui, os.fsencode(url)).unfiltered()
 def fixup_user(user,authors):
-  user=user.strip("\"")
+  user=user.strip(b"\"")
  if authors!=None:
    # if we have an authors table, try to get mapping
    # by defaulting to the current value of 'user'
    user=authors.get(user,user)
-  name,mail,m='','',user_re.match(user)
+  name,mail,m=b'',b'',user_re.match(user)
  if m==None:
    # if we don't have 'Name <mail>' syntax, extract name
    # and mail from hg helpers. this seems to work pretty well.
    # if email doesn't contain @, replace it with devnull@localhost
    name=templatefilters.person(user)
-    mail='<%s>' % templatefilters.email(user)
+    mail=b'<%s>' % templatefilters.email(user)
-    if '@' not in mail:
+    if b'@' not in mail:
-      mail = '<devnull@localhost>'
+      mail = b'<devnull@localhost>'
  else:
    # if we have 'Name <mail>' syntax, everything is fine :)
    name,mail=m.group(1),m.group(2)
@@ -62,34 +70,25 @@ def fixup_user(user,authors):
  m2=user_clean_re.match(name)
  if m2!=None:
    name=m2.group(1)
-  return '%s %s' % (name,mail)
+  return b'%s %s' % (name,mail)
 def get_branch(name):
  # 'HEAD' is the result of a bug in mutt's cvs->hg conversion,
  # other CVS imports may need it, too
-  if name=='HEAD' or name=='default' or name=='':
+  if name==b'HEAD' or name==b'default' or name==b'':
    name=cfg_master
  if origin_name:
-    return origin_name + '/' + name
+    return origin_name + b'/' + name
  return name
 def get_changeset(ui,repo,revision,authors={},encoding=''):
-  # Starting with Mercurial 4.6 lookup no longer accepts raw hashes
+  (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(revision)
  # for lookups. Work around it by changing our behaviour depending on
  # how it fails
  try:
    node=repo.lookup(revision)
  except hgerror.ProgrammingError:
    node=binnode(revsymbol(repo,str(revision))) # We were given a numeric rev
  except hgerror.RepoLookupError:
    node=revision # We got a raw hash
  (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node)
  if encoding:
    user=user.decode(encoding).encode('utf8')
    desc=desc.decode(encoding).encode('utf8')
-  tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60))
+  tz=b"%+03d%02d" % (-timezone // 3600, ((-timezone % 3600) // 60))
-  branch=get_branch(extra.get('branch','master'))
+  branch=get_branch(extra.get(b'branch', b''))
-  return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra)
+  return (manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra)
 def mangle_key(key):
  return key
@@ -98,29 +97,35 @@ def load_cache(filename,get_key=mangle_key):
  cache={}
  if not os.path.exists(filename):
    return cache
-  f=open(filename,'r')
+  f=open(filename,'rb')
  l=0
  for line in f.readlines():
    l+=1
-    fields=line.split(' ')
+    fields=line.split(b' ')
-    if fields==None or not len(fields)==2 or fields[0][0]!=':':
+    if fields==None or not len(fields)==2 or fields[0][0:1]!=b':':
      sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
      continue
    # put key:value in cache, key without ^:
-    cache[get_key(fields[0][1:])]=fields[1].split('\n')[0]
+    cache[get_key(fields[0][1:])]=fields[1].split(b'\n')[0]
  f.close()
  return cache
 def save_cache(filename,cache):
-  f=open(filename,'w+')
+  f=open(filename,'wb')
-  map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys())
+  for key, value in cache.items():
    if not isinstance(key, bytes):
      key = str(key).encode('utf8')
    if not isinstance(value, bytes):
      value = str(value).encode('utf8')
    f.write(b':%s %s\n' % (key, value))
  f.close()
 def get_git_sha1(name,type='heads'):
  try:
    # use git-rev-parse to support packed refs
-    ref="refs/%s/%s" % (type,name)
+    ref="refs/%s/%s" % (type,name.decode('utf8'))
-    l=subprocess.check_output(["git", "rev-parse", "--verify", "--quiet", ref])
+    l=subprocess.check_output(["git", "rev-parse", "--verify",
                               "--quiet", ref.encode('utf8')])
    if l == None or len(l) == 0:
      return None
    return l[0:40]
--- a/pluginloader/init.py
+++ b/pluginloader/init.py
@@ -1,19 +1,23 @@
 import os
-import imp
+import importlib.machinery
 import importlib.util
 PluginFolder = os.path.join(os.path.dirname(os.path.realpath(__file__)),"..","plugins")
 MainModule = "__init__"
 def get_plugin(name, plugin_path):
-    search_dirs = [PluginFolder]
+    search_dirs = [PluginFolder, '.']
    if plugin_path:
        search_dirs = [plugin_path] + search_dirs
    for dir in search_dirs:
        location = os.path.join(dir, name)
        if not os.path.isdir(location) or not MainModule + ".py" in os.listdir(location):
            continue
-        info = imp.find_module(MainModule, [location])
+        spec = importlib.machinery.PathFinder.find_spec(MainModule, [location])
-        return {"name": name, "info": info, "path": location}
+        return {"name": name, "spec": spec, "path": location}
    raise Exception("Could not find plugin with name " + name)
 def load_plugin(plugin):
-    return imp.load_module(MainModule, *plugin["info"])
+    spec = plugin["spec"]
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module
--- a/plugins/branch_name_in_commit/init.py
+++ b/plugins/branch_name_in_commit/init.py
@@ -15,9 +15,11 @@ class Filter:
            raise ValueError("Unknown args: " + ','.join(args))
    def commit_message_filter(self, commit_data):
-        if not (self.skip_master and commit_data['branch'] == 'master'):
+        if not (self.skip_master and commit_data['branch'] == b'master'):
            if self.start:
-                sep = ': ' if self.sameline else '\n' 
+                sep = b': ' if self.sameline else b'\n'
                commit_data['desc'] = commit_data['branch'] + sep + commit_data['desc']
            if self.end:
-                commit_data['desc'] = commit_data['desc'] + '\n' + commit_data['branch']
+                commit_data['desc'] = (
                    commit_data['desc'] + b'\n' + commit_data['branch']
                )
--- a/plugins/dos2unix/init.py
+++ b/plugins/dos2unix/init.py
@@ -6,6 +6,8 @@ class Filter():
        pass
    def file_data_filter(self,file_data):
        if file_data['file_ctx'] == None:
            return
        file_ctx = file_data['file_ctx']
        if not file_ctx.isbinary():
-            file_data['data'] = file_data['data'].replace('\r\n', '\n')
+            file_data['data'] = file_data['data'].replace(b'\r\n', b'\n')
--- a/plugins/drop/README.md
+++ b/plugins/drop/README.md
@@ -0,0 +1,12 @@
 ## Drop commits from output
 To use the plugin, add the command line flag `--plugin drop=<spec>`.
 The flag can be given multiple times to drop more than one commit.
 The <spec> value can be either
 - a comma-separated list of hg hashes in the full form (40
   hexadecimal characters) to drop the corresponding changesets, or
 - a regular expression pattern to drop all changesets with matching
   descriptions.
--- a/plugins/drop/init.py
+++ b/plugins/drop/init.py
@@ -0,0 +1,61 @@
 from __future__ import print_function
 import sys, re
 def build_filter(args):
    if re.match(r'([A-Fa-f0-9]{40}(,|$))+$', args):
        return RevisionIdFilter(args.split(','))
    else:
        return DescriptionFilter(args)
 def log(fmt, *args):
    print(fmt % args, file=sys.stderr)
    sys.stderr.flush()
 class FilterBase(object):
    def __init__(self):
        self.remapped_parents = {}
    def commit_message_filter(self, commit_data):
        rev = commit_data['revision']
        mapping = self.remapped_parents
        parent_revs = [rp for p in commit_data['parents']
                       for rp in mapping.get(p, [p])]
        commit_data['parents'] = parent_revs
        if self.should_drop_commit(commit_data):
            log('Dropping revision %i.', rev)
            self.remapped_parents[rev] = parent_revs
            # Head commits cannot be dropped because they have no
            # children, so detach them to a separate branch.
            commit_data['branch'] = b'dropped-hg-head'
            commit_data['parents'] = []
    def should_drop_commit(self, commit_data):
        return False
 class RevisionIdFilter(FilterBase):
    def __init__(self, revision_hash_list):
        super(RevisionIdFilter, self).__init__()
        self.unwanted_hg_hashes = {h.encode('ascii', 'strict')
                                   for h in revision_hash_list}
    def should_drop_commit(self, commit_data):
        return commit_data['hg_hash'] in self.unwanted_hg_hashes
 class DescriptionFilter(FilterBase):
    def __init__(self, pattern):
        super(DescriptionFilter, self).__init__()
        self.pattern = re.compile(pattern.encode('ascii', 'strict'))
    def should_drop_commit(self, commit_data):
        return self.pattern.match(commit_data['desc'])
--- a/plugins/git_lfs_importer/README.md
+++ b/plugins/git_lfs_importer/README.md
@@ -0,0 +1,218 @@
 # git_lfs_importer Plugin
 This plugin automatically converts matching files to use Git LFS
 (Large File Storage) during the Mercurial to Git conversion process.
 ## Overview
 The git_lfs_importer plugin intercepts file data during the hg-fast-export
 process and converts files matching specified patterns into Git LFS pointers.
 This allows you to seamlessly migrate a Mercurial repository to Git while
 simultaneously adopting LFS for large files.
 Why use git_lfs_importer?
 For large repositories, traditional migration requires two sequential,
 long-running steps:
 1. Full history conversion from Mercurial to Git.
 2. Full history rewrite using git lfs import.
 This two-step process can take hours or even days for massive
 monorepos (e.g., 100GiB+).
 This plugin eliminates the second, time-consuming history rewrite. It performs
 the LFS conversion incrementally (Just-In-Time). During the initial export, the
 plugin identifies large files and immediately writes LFS pointers into the Git
 history. This results in significantly faster conversions and allows for
 efficient incremental imports of new changesets.
 ## Prerequisites
 ### Dependencies
 This plugin requires the `pathspec` package:
 ```bash
 pip install pathspec
 ```
 ### Git Repository Setup
 The destination Git repository must be pre-initialized with:
 1. A `.gitattributes` file configured for LFS tracking
 2. Git LFS properly installed and initialized
 Example `.gitattributes`:
 ```
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.iso filter=lfs diff=lfs merge=lfs -text
 large_files/** filter=lfs diff=lfs merge=lfs -text
 ```
 ## Usage
 ### Step 1: Create the Destination Git Repository
 ```bash
 # Create a new git repository
 git init my-repo
 cd my-repo
 # Initialize Git LFS
 git lfs install
 # Create and commit a .gitattributes file
 cat > .gitattributes << EOF
 *.bin binary diff=lfs merge=lfs -text
 *.iso binary diff=lfs merge=lfs -text
 EOF
 git add .gitattributes
 git commit -m "Initialize Git LFS configuration"
 # Get the commit hash (needed for --first-commit-hash)
 git rev-parse HEAD
 ```
 ### Step 2: Create an LFS Specification File
 Create a file (e.g., `lfs-spec.txt`) listing the patterns of files to convert
 to LFS. This uses gitignore-style glob patterns:
 ```
 *.bin
 *.iso
 *.tar.gz
 large_files/**
 *.mp4
 ```
 ### Step 3: Run hg-fast-export with the Plugin
 ```bash
 hg-fast-export.sh \
  -r <mercurial-repo-path> \
  --plugin git_lfs_importer=lfs-spec.txt \
  --first-commit-hash <git-commit-hash> \
  --force
 ```
 Replace `<git-commit-hash>` with the hash obtained from Step 1.
 ## How It Works
 1. **Pattern Matching**: Files are matched against patterns in the
   LFS specification file using gitignore-style matching
 2. **File Processing**: For each matching file:
   - Calculates SHA256 hash of the file content
   - Stores the actual file content in `.git/lfs/objects/<hash-prefix>/<hash>`
   - Replaces the file data with an LFS pointer containing:
     - LFS version specification
     - SHA256 hash of the original content
     - Original file size
 3. **Git Fast-Import**: The LFS pointer is committed instead of the actual
   file content
 ## Important Notes
 ### First Commit Hash Requirement
 The `--first-commit-hash` option must be provided with the Git commit hash that
 contains your `.gitattributes` file. This allows the plugin to chain from the
 existing Git history rather than creating a completely new history.
 ### Deletions
 The plugin safely handles file deletions (data=None) and does not process them.
 ### Large Files and Largefiles
 If the Mercurial repository uses Mercurial's largefiles extension, those files
 are already converted to their original content before reaching this plugin,
 allowing the plugin to apply LFS conversion if they match the patterns.
 ## Example Workflow
 ```bash
 # Configuration variables
 HG_REPO=/path/to/mercurial/repo
 GIT_DIR_NAME=my-project-git
 LFS_PATTERN_FILE=../lfs-patterns.txt
 # 1. Prepare destination git repo
 mkdir "$GIT_DIR_NAME"
 cd "$GIT_DIR_NAME"
 git init
 git lfs install
 # Create .gitattributes
 cat > .gitattributes << EOF
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.iso filter=lfs diff=lfs merge=lfs -text
 EOF
 git add .gitattributes
 git commit -m "Add LFS configuration"
 FIRST_HASH=$(git rev-parse HEAD)
 # 2. Create LFS patterns file
 cat > "$LFS_PATTERN_FILE" << EOF
 *.bin
 *.iso
 build/artifacts/**
 EOF
 # 3. Run conversion
 /path/to/hg-fast-export.sh \
  -r "$HG_REPO" \
  --plugin "git_lfs_importer=$LFS_PATTERN_FILE" \
  --first-commit-hash $FIRST_HASH \
  --force
 # 4. Verify
 git log --oneline
 git lfs ls-files
 ```
 ## Troubleshooting
 ### LFS Files Not Tracked
 Verify that:
 - The `.gitattributes` file exists in the destination repository
 - Patterns in `.gitattributes` match the files being converted
 - `git lfs install` was run in the repository
 ### "pathspec" Module Not Found
 Install the required dependency:
 ```bash
 pip install pathspec
 ```
 ### Conversion Fails at Import
 Ensure the `--first-commit-hash` value is:
 - A valid commit hash in the destination repository
 - From a commit that exists before the conversion starts
 - The hash of the commit containing `.gitattributes`
 ### Force Requirement
 You only need to pass the `--force` option when converting the *first*
 Mercurial commit into a non-empty Git repository. By default, `hg-fast-export`
 prevents importing Mercurial commits onto a non-empty Git repo to avoid
 creating conflicting histories. Passing `--force` overrides that safety check
 and allows the exporter to write the LFS pointer objects and integrate the
 converted data with the existing Git history.
 If you are doing an incremental conversion (i.e., running the script a second
 time to import new changesets into an already converted repository),
 the --force flag is not required.
 Omitting `--force` when attempting to import the first Mercurial commit into a
 non-empty repository will cause the importer to refuse the operation.
 ## See Also
 - [Git LFS Documentation](https://git-lfs.github.com/)
 - [gitignore Pattern Format](https://git-scm.com/docs/gitignore)
 - [hg-fast-export Documentation](../README.md)
--- a/plugins/git_lfs_importer/init.py
+++ b/plugins/git_lfs_importer/init.py
@@ -0,0 +1,49 @@
 import pathlib
 import hashlib
 import pathspec
 def build_filter(args):
    with open(args) as f:
        lfs_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, f)
    return Filter(lfs_spec)
 class Filter:
    def __init__(self, lfs_spec):
        self.lfs_spec = lfs_spec
    def file_data_filter(self, file_data):
        """
        file_data: {
            'filename': <str>,
            'file_ctx': <mercurial.filectx or None>,
            'data': <bytes or None>,
            'is_largefile': <bool>
        }
        May be called for deletions (data=None, file_ctx=None).
        """
        filename = file_data.get('filename')
        data = file_data.get('data')
        # Skip deletions or filtered files early
        if data is None or not self.lfs_spec.match_file(filename.decode("utf-8")):
            return
        # Get the file path
        sha256hash = hashlib.sha256(data).hexdigest()
        lfs_path = pathlib.Path(f".git/lfs/objects/{sha256hash[0:2]}/{sha256hash[2:4]}")
        lfs_path.mkdir(parents=True, exist_ok=True)
        lfs_file_path = lfs_path / sha256hash
        # The binary blob is already in LFS
        if not lfs_file_path.is_file():
            (lfs_path / sha256hash).write_bytes(data)
        # Write the LFS pointer
        file_data['data'] = (
            f"version https://git-lfs.github.com/spec/v1\n"
            f"oid sha256:{sha256hash}\n"
            f"size {len(data)}\n"
        ).encode("utf-8")
--- a/plugins/head2branch/README.md
+++ b/plugins/head2branch/README.md
@@ -0,0 +1,13 @@
 ## Convert Head to Branch
 `fast-export` can only handle one head per branch. This plugin makes it possible
 to create a new branch from a head by specifying the new branch name and
 the first divergent commit for that head.
 Note: the hg hash must be in the full form, 40 hexadecimal characters.
 Note: you must run `fast-export` with `--ignore-unnamed-heads` option,
 otherwise, the conversion will fail.
 To use the plugin, add the command line flag `--plugin head2branch=name,<hg_hash>`.
 The flag can be given multiple times to name more than one head.
--- a/plugins/head2branch/init.py
+++ b/plugins/head2branch/init.py
@@ -0,0 +1,24 @@
 import sys
 def build_filter(args):
    return Filter(args)
 class Filter:
    def __init__(self, args):
        args = args.split(',')
        self.branch_name = args[0].encode('ascii', 'replace')
        self.starting_commit_hash = args[1].encode('ascii', 'strict')
        self.branch_parents = set()
    def commit_message_filter(self, commit_data):
        hg_hash = commit_data['hg_hash']
        rev = commit_data['revision']
        rev_parents = commit_data['parents']
        if (hg_hash == self.starting_commit_hash
            or any(rp in self.branch_parents for rp in rev_parents)
            ):
            self.branch_parents.add(rev)
            commit_data['branch'] = self.branch_name
            sys.stderr.write('\nchanging r%s to branch %r\n' % (rev, self.branch_name))
            sys.stderr.flush()
--- a/plugins/issue_prefix/init.py
+++ b/plugins/issue_prefix/init.py
@@ -7,9 +7,11 @@ def build_filter(args):
 class Filter:
    def __init__(self, args):
        if not isinstance(args, bytes):
            args = args.encode('utf8') 
        self.prefix = args
    def commit_message_filter(self, commit_data):
-        for match in re.findall('#[1-9][0-9]+', commit_data['desc']):
+        for match in re.findall(b'#[1-9][0-9]+', commit_data['desc']):
            commit_data['desc'] = commit_data['desc'].replace(
-                match, '#%s%s' % (self.prefix, match[1:]))
+                match, b'#%s%s' % (self.prefix, match[1:]))
--- a/plugins/overwrite_null_messages/init.py
+++ b/plugins/overwrite_null_messages/init.py
@@ -4,13 +4,13 @@ def build_filter(args):
 class Filter:
    def __init__(self, args):
        if args == '':
-            message = '<empty commit message>'
+            message = b'<empty commit message>'
        else:
-            message = args
+            message = args.encode('utf8')
        self.message = message
    def commit_message_filter(self,commit_data):
        # Only write the commit message if the recorded commit
        # message is null.
-        if commit_data['desc'] == '\x00':
+        if commit_data['desc'] == b'\x00':
            commit_data['desc'] = self.message
--- a/plugins/shell_filter_file_contents/init.py
+++ b/plugins/shell_filter_file_contents/init.py
@@ -15,6 +15,8 @@ class Filter:
        d = file_data['data']
        file_ctx = file_data['file_ctx']
        filename = file_data['filename']
        if file_ctx == None:
            return
        filter_cmd = self.filter_contents + [filename, node.hex(file_ctx.filenode()), '1' if file_ctx.isbinary() else '0']
        try:
            filter_proc = subprocess.Popen(filter_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
--- a/t/.gitignore
+++ b/t/.gitignore
@@ -0,0 +1 @@
 /test-results/
--- a/t/Makefile
+++ b/t/Makefile
@@ -0,0 +1,12 @@
 T = $(wildcard *.t)
 test: $(T)
 	@$(MAKE) --silent clean
 $(T): clean
 	./$@ $(TEST_OPTS)
 clean:
 	@rm -fr test-results
 .PHONY: test $(T) clean
--- a/t/file_data_filter-removefiles.expected
+++ b/t/file_data_filter-removefiles.expected
@@ -0,0 +1,30 @@
 blob
 mark :1
 data 7
 good_a
 reset refs/heads/master
 commit refs/heads/master
 mark :2
 author Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
 data 3
 r0
 M 100644 :1 good_a.txt
 commit refs/heads/master
 mark :3
 author Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
 data 3
 r1
 from :2
 commit refs/heads/master
 mark :4
 author Grevious Bodily Harmsworth <gbh@example.com> 1679022000 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679022000 +0000
 data 3
 r2
 from :3
--- a/t/file_data_filter-removefiles.t
+++ b/t/file_data_filter-removefiles.t
@@ -0,0 +1,91 @@
 #!/bin/bash
 #
 # Copyright (c) 2023 Felipe Contreras
 # Copyright (c) 2023 Frej Drejhammar
 # Copyright (c) 2024 Stephan Hohe
 #
 # Check that files that file_data_filter sets to None are removed from repository
 #
 test_description='Remove files from file_data_filter plugin test'
 . "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
 check() {
 	echo "$3" > expected &&
 	git -C "$1" show -q --format='%s' "$2" > actual &&
 	test_cmp expected actual
 }
 git_create() {
 	git init -q "$1" &&
 	git -C "$1" config core.ignoreCase false
 }
 git_convert() {
 	(
 	cd "$2" &&
 	hg-fast-export.sh --repo "../$1" \
 			  -s --hgtags -n \
 			  --plugin ../../plugins/removefiles_test_plugin
 	)
 }
 setup() {
 	cat > "$HOME"/.hgrc <<-EOF
 	[ui]
 	username = Grevious Bodily Harmsworth <gbh@example.com>
 	EOF
 }
 commit0() {
 	(
 	# Test inital revision with suppressed file
 	cd hgrepo &&
 	echo "good_a" > good_a.txt &&
 	echo "bad_a" > bad_a.txt &&
 	hg add good_a.txt bad_a.txt &&
 	hg commit -d "2023-03-17 01:00Z" -m "r0"
 	)
 }
 commit1() {
 	(
 	# Test modifying suppressed file
 	# Test adding suppressed file
 	cd hgrepo &&
 	echo "bad_a_modif" > bad_a.txt &&
 	echo "bad_b" > bad_b.txt &&
 	hg add bad_b.txt &&
 	hg commit -d "2023-03-17 02:00Z" -m "r1"
 	)
 }
 commit2() {
 	(
 	# Test removing suppressed file
 	cd hgrepo &&
 	hg rm bad_a.txt &&
 	hg commit -d "2023-03-17 03:00Z" -m "r2"
 	)
 }
 setup
 test_expect_success 'all in one' '
 	test_when_finished "rm -rf hgrepo gitrepo" &&
 	(
 	hg init hgrepo &&
 	commit0 &&
 	commit1 &&
 	commit2
 	) &&
 	git_create gitrepo &&
 	git_convert hgrepo gitrepo &&
 	git -C gitrepo fast-export --all > actual &&
 	test_cmp "$SHARNESS_TEST_DIRECTORY"/file_data_filter-removefiles.expected actual
 '
 test_done
--- a/t/file_data_filter.expected
+++ b/t/file_data_filter.expected
@@ -0,0 +1,29 @@
 blob
 mark :1
 data 7
 a_file
 blob
 mark :2
 data 17
 a_file_to_rename
 reset refs/heads/master
 commit refs/heads/master
 mark :3
 author Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
 data 3
 r0
 M 100644 :1 a.txt
 M 100644 :2 c.txt
 commit refs/heads/master
 mark :4
 author Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
 data 3
 r1
 from :3
 D c.txt
--- a/t/file_data_filter.t
+++ b/t/file_data_filter.t
@@ -0,0 +1,84 @@
 #!/bin/bash
 #
 # Copyright (c) 2023 Felipe Contreras
 # Copyright (c) 2023 Frej Drejhammar
 #
 # Check that the file_data_filter is called for removed files.
 #
 test_description='Smoke test'
 . "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
 check() {
 	echo "$3" > expected &&
 	git -C "$1" show -q --format='%s' "$2" > actual &&
 	test_cmp expected actual
 }
 git_create() {
 	git init -q "$1" &&
 	git -C "$1" config core.ignoreCase false
 }
 git_convert() {
 	(
 	cd "$2" &&
 	hg-fast-export.sh --repo "../$1" \
 			  -s --hgtags -n \
 			  --plugin ../../plugins/rename_file_test_plugin \
 			  --plugin dos2unix \
 			  --plugin shell_filter_file_contents=../../plugins/id
 	)
 }
 setup() {
 	cat > "$HOME"/.hgrc <<-EOF
 	[ui]
 	username = Grevious Bodily Harmsworth <gbh@example.com>
 	EOF
 }
 commit0() {
 	(
 	cd hgrepo &&
 	echo "a_file" > a.txt &&
 	echo "a_file_to_rename" > b.txt &&
 	hg add a.txt b.txt &&
 	hg commit -d "2023-03-17 01:00Z" -m "r0"
 	)
 }
 commit1() {
 	(
 	cd hgrepo &&
 	hg remove b.txt &&
 	hg commit -d "2023-03-17 02:00Z" -m "r1"
 	)
 }
 make-branch() {
    hg branch "$1"
    FILE=$(echo "$1" | sha1sum | cut -d " " -f 1)
    echo "$1" > $FILE
    hg add $FILE
    hg commit -d "2023-03-17 $2:00Z" -m "Added file in branch $1"
 }
 setup
 test_expect_success 'all in one' '
 	test_when_finished "rm -rf hgrepo gitrepo" &&
 	(
 	hg init hgrepo &&
 	commit0 &&
 	commit1
 	) &&
 	git_create gitrepo &&
 	git_convert hgrepo gitrepo &&
 	git -C gitrepo fast-export --all > actual &&
 	test_cmp "$SHARNESS_TEST_DIRECTORY"/file_data_filter.expected actual
 '
 test_done
--- a/t/first_commit_hash_option.t
+++ b/t/first_commit_hash_option.t
@@ -0,0 +1,117 @@
 #!/bin/bash
 #
 # Copyright (c) 2025
 #
 test_description='git_lfs_importer plugin integration tests'
 . "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
 setup() {
 	cat > "$HOME"/.hgrc <<-EOF
 	[ui]
 	username = Test User <test@example.com>
 	EOF
    # Git config for the destination repo commits
    git config --global user.email "test@example.com"
    git config --global user.name "Test User"
 }
 setup
 test_expect_success 'Mercurial history is imported over the provided commit' '
 	test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
 	# 1. Create source Mercurial repository with binary files
 	(
 	hg init hgrepo &&
 	cd hgrepo &&
 	echo "regular text file" > readme.txt &&
 	hg add readme.txt &&
 	hg commit -m "initial commit"
 	) &&
 	# 2. Prepare destination git repo with LFS setup
 	mkdir gitrepo &&
 	(
 	cd gitrepo &&
 	git init -q &&
 	git config core.ignoreCase false &&
 	git lfs install --local &&
 	git switch --create master &&
 	cat > .gitattributes <<-EOF &&
 	* -text
 	EOF
 	git add .gitattributes &&
 	git commit -q -m "Initialize Git configuration"
 	) &&
 	FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
 	# 3. Run hg-fast-export
 	(
 	cd gitrepo &&
 	hg-fast-export.sh \
 		-r "../hgrepo" \
 		--first-commit-hash "$FIRST_HASH" --force \
 		-M master
 	) &&
 	# 4. Verify git file is still present
 	git -C gitrepo show HEAD:.gitattributes > gitattributes_check.txt &&
 	test "$(cat gitattributes_check.txt)" = "* -text" &&
 	# 5. Verify hg file is imported
 	git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
 	test "$(cat readme_check.txt)" = "regular text file"
 '
 test_expect_success 'Mercurial history has priority over git' '
 	test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
 	# 1. Create source Mercurial repository with binary files
 	(
 	hg init hgrepo &&
 	cd hgrepo &&
 	echo "hg readme file" > readme.txt &&
 	hg add readme.txt &&
 	hg commit -m "initial commit"
 	) &&
 	# 2. Prepare destination git repo with LFS setup
 	mkdir gitrepo &&
 	(
 	cd gitrepo &&
 	git init -q &&
 	git config core.ignoreCase false &&
 	git lfs install --local &&
 	git switch --create master &&
 	cat > readme.txt <<-EOF &&
 	git readme file
 	EOF
 	git add readme.txt &&
 	git commit -q -m "Initialize Git readme file"
 	) &&
 	FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
 	# 3. Run hg-fast-export
 	(
 	cd gitrepo &&
 	hg-fast-export.sh \
 		-r "../hgrepo" \
 		--first-commit-hash "$FIRST_HASH" --force \
 		-M master
 	) &&
 	# 5. Verify hg file is imported
 	git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
 	test "$(cat readme_check.txt)" = "hg readme file"
 '
 test_done
--- a/t/git_lfs_importer_plugin.t
+++ b/t/git_lfs_importer_plugin.t
@@ -0,0 +1,189 @@
 #!/bin/bash
 #
 # Copyright (c) 2025
 #
 test_description='git_lfs_importer plugin integration tests'
 . "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
 setup() {
 	cat > "$HOME"/.hgrc <<-EOF
 	[ui]
 	username = Test User <test@example.com>
 	EOF
    # Git config for the destination repo commits
    git config --global user.email "test@example.com"
    git config --global user.name "Test User"
 }
 setup
 test_expect_success 'git_lfs_importer converts matched binary files to LFS pointers and pointers are properly smudged when checkouting' '
 	test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
 	# 1. Create source Mercurial repository with binary files
 	(
 	hg init hgrepo &&
 	cd hgrepo &&
 	echo "regular text file" > readme.txt &&
 	echo "binary payload" > payload.bin &&
 	hg add readme.txt payload.bin &&
 	hg commit -m "initial commit with binary"
 	) &&
 	# 2. Prepare destination git repo with LFS setup
 	mkdir gitrepo &&
 	(
 	cd gitrepo &&
 	git init -q &&
 	git config core.ignoreCase false &&
 	git lfs install --local &&
 	cat > .gitattributes <<-EOF &&
 	*.bin filter=lfs diff=lfs merge=lfs -text
 	EOF
 	git add .gitattributes &&
 	git commit -q -m "Initialize Git LFS configuration"
 	) &&
 	FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
 	# 3. Create LFS patterns file
 	cat > lfs-patterns.txt <<-EOF &&
 	*.bin
 	EOF
 	# 4. Run hg-fast-export with git_lfs_importer plugin
 	(
 	cd gitrepo &&
 	hg-fast-export.sh \
 		-r "../hgrepo" \
 		--plugin "git_lfs_importer=../lfs-patterns.txt" \
 		--first-commit-hash "$FIRST_HASH" --force
 	) &&
 	# 5. Verify conversion: payload.bin should be an LFS pointer
 	git -C gitrepo show HEAD:payload.bin > lfs_pointer.txt &&
 	grep -q "version https://git-lfs.github.com/spec/v1" lfs_pointer.txt &&
 	grep -q "oid sha256:" lfs_pointer.txt &&
 	grep -q "size" lfs_pointer.txt &&
 	# 6. Verify non-matched file is unchanged
 	git -C gitrepo show HEAD:readme.txt > readme_check.txt &&
 	test "$(cat readme_check.txt)" = "regular text file" &&
 	# 7. Make sure the LFS pointer file is unsmeared when checked out
 	git -C gitrepo reset --hard HEAD &&
 	ls gitrepo &&
 	test "$(cat gitrepo/payload.bin)" = "binary payload"
 '
 test_expect_success 'git_lfs_importer skips files not matching patterns' '
 	test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
 	# 1. Create source with various files
 	(
 	hg init hgrepo &&
 	cd hgrepo &&
 	echo "text" > file.txt &&
 	echo "data" > file.dat &&
 	echo "iso content" > image.iso &&
 	hg add . &&
 	hg commit -m "multiple files"
 	) &&
 	# 2. Prepare git repo with LFS
 	mkdir gitrepo &&
 	(
 	cd gitrepo &&
 	git init -q &&
 	git config core.ignoreCase false &&
 	git lfs install --local &&
 	cat > .gitattributes <<-EOF &&
 	*.iso filter=lfs diff=lfs merge=lfs -text
 	EOF
 	git add .gitattributes &&
 	git commit -q -m "Initialize Git LFS configuration"
 	) &&
 	FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
 	# 3. Only .iso files should be converted
 	cat > lfs-patterns.txt <<-EOF &&
 	*.iso
 	EOF
 	(
 	cd gitrepo &&
 	hg-fast-export.sh \
 		-r "../hgrepo" \
 		--plugin "git_lfs_importer=../lfs-patterns.txt" \
 		--first-commit-hash "$FIRST_HASH" --force
 	) &&
 	# 4. Verify .iso is LFS pointer
 	git -C gitrepo show HEAD:image.iso | grep -q "oid sha256:" &&
 	# 5. Verify .txt and .dat are unchanged
 	test "$(git -C gitrepo show HEAD:file.txt)" = "text" &&
 	test "$(git -C gitrepo show HEAD:file.dat)" = "data"
 '
 test_expect_success 'git_lfs_importer handles directory patterns' '
 	test_when_finished "rm -rf hgrepo gitrepo lfs-patterns.txt" &&
 	# 1. Create repo with files in directory
 	(
 	hg init hgrepo &&
 	cd hgrepo &&
 	mkdir -p assets/images &&
 	echo "logo data" > assets/images/logo.bin &&
 	echo "regular" > readme.txt &&
 	hg add . &&
 	hg commit -m "files in directories"
 	) &&
 	# 2. Prepare git repo
 	mkdir gitrepo &&
 	(
 	cd gitrepo &&
 	git init -q &&
 	git config core.ignoreCase false &&
 	git lfs install --local &&
 	cat > .gitattributes <<-EOF &&
 	assets/** filter=lfs diff=lfs merge=lfs -text
 	EOF
 	git add .gitattributes &&
 	git commit -q -m "Initialize Git LFS configuration"
 	) &&
 	FIRST_HASH=$(git -C gitrepo rev-parse HEAD) &&
 	# 3. Match directory pattern
 	cat > lfs-patterns.txt <<-EOF &&
 	assets/**
 	EOF
 	(
 	cd gitrepo &&
 	hg-fast-export.sh \
 		-r "../hgrepo" \
 		--plugin "git_lfs_importer=../lfs-patterns.txt" \
 		--first-commit-hash "$FIRST_HASH" --force
 	) &&
 	# 4. Verify directory file is converted
 	git -C gitrepo show HEAD:assets/images/logo.bin | grep -q "oid sha256:" &&
 	# 5. Verify file outside directory is unchanged
 	test "$(git -C gitrepo show HEAD:readme.txt)" = "regular"
 '
 test_done
--- a/t/largefile_plugin.expected
+++ b/t/largefile_plugin.expected
@@ -0,0 +1,20 @@
 blob
 mark :1
 data 7
 a_file
 blob
 mark :2
 data 6
 large
 reset refs/heads/master
 commit refs/heads/master
 mark :3
 author Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
 data 3
 r0
 M 100644 :1 a.txt
 M 100644 :2 b.txt
--- a/t/largefile_plugin.t
+++ b/t/largefile_plugin.t
@@ -0,0 +1,69 @@
 #!/bin/bash
 #
 # Copyright (c) 2023 Felipe Contreras
 # Copyright (c) 2023 Frej Drejhammar
 # Copyright (c) 2025 Günther Nußmüller
 #
 # Check that plugin invocation works with largefiles.
 # This test uses the echo_file_data_test_plugin to verify that the
 # file data is passed correctly, including the largefile status.
 #
 test_description='Largefiles and plugin test'
 . "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
 git_create() {
 	git init -q "$1" &&
 	git -C "$1" config core.ignoreCase false
 }
 git_convert() {
 	(
 	cd "$2" &&
 	hg-fast-export.sh --repo "../$1" \
 			  -s --hgtags -n \
 			  --plugin ../../plugins/echo_file_data_test_plugin
 	)
 }
 setup() {
 	cat > "$HOME"/.hgrc <<-EOF
 	[ui]
 	username = Grevious Bodily Harmsworth <gbh@example.com>
 	[extensions]
 	largefiles =
 	EOF
 }
 commit0() {
 	(
 	cd hgrepo &&
 	echo "a_file" > a.txt &&
 	echo "large" > b.txt
 	hg add a.txt &&
 	hg add --large b.txt &&
 	hg commit -d "2023-03-17 01:00Z" -m "r0"
 	)
 }
 setup
 test_expect_success 'largefile and plugin' '
 	test_when_finished "rm -rf hgrepo gitrepo" &&
 	(
 	hg init hgrepo &&
 	commit0
 	) &&
 	git_create gitrepo &&
 	git_convert hgrepo gitrepo &&
 	git -C gitrepo fast-export --all > actual &&
 	test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin.expected actual &&
 	test_cmp "$SHARNESS_TEST_DIRECTORY"/largefile_plugin_file_info.expected gitrepo/largefile_info.txt
 '
 test_done
--- a/t/largefile_plugin_file_info.expected
+++ b/t/largefile_plugin_file_info.expected
@@ -0,0 +1,12 @@
 filename: b'b.txt'
 data size: 6 bytes
 ctx rev: 0
 ctx binary: False
 is largefile: True
 filename: b'a.txt'
 data size: 7 bytes
 ctx rev: 0
 ctx binary: False
 is largefile: False
--- a/t/main.t
+++ b/t/main.t
@@ -0,0 +1,144 @@
 #!/bin/bash
 #
 # Copyright (c) 2023 Felipe Contreras
 #
 test_description='Main tests'
 . "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
 check() {
 	echo "$3" > expected &&
 	git -C "$1" show -q --format='%s' "$2" > actual &&
 	test_cmp expected actual
 }
 git_clone() {
 	(
 	git init -q "$2" &&
 	cd "$2" &&
 	git config core.ignoreCase false &&
 	hg-fast-export.sh --repo "../$1"
 	)
 }
 setup() {
 	cat > "$HOME"/.hgrc <<-EOF
 	[ui]
 	username = H G Wells <wells@example.com>
 	EOF
 }
 setup
 test_expect_success 'basic' '
 	test_when_finished "rm -rf hgrepo gitrepo" &&
 	(
 	hg init hgrepo &&
 	cd hgrepo &&
 	echo zero > content &&
 	hg add content &&
 	hg commit -m zero
 	) &&
 	git_clone hgrepo gitrepo &&
 	check gitrepo @ zero
 '
 test_expect_success 'merge' '
 	test_when_finished "rm -rf hgrepo gitrepo" &&
 	(
 	hg init hgrepo &&
 	cd hgrepo &&
 	echo a > content &&
 	echo a > file1 &&
 	hg add content file1 &&
 	hg commit -m "origin" &&
 	echo b > content &&
 	echo b > file2 &&
 	hg add file2 &&
 	hg rm file1 &&
 	hg commit -m "right" &&
 	hg update -r0 &&
 	echo c > content &&
 	hg commit -m "left" &&
 	HGMERGE=true hg merge -r1 &&
 	hg commit -m "merge"
 	) &&
 	git_clone hgrepo gitrepo &&
 	cat > expected <<-EOF &&
 	left
 	c
 	tree @:
 	content
 	file2
 	EOF
 	(
 	cd gitrepo
 	git show -q --format='%s' @^ &&
 	git show @:content &&
 	git show @:
 	) > actual &&
 	test_cmp expected actual
 '
 test_expect_success 'hg large file' '
 	test_when_finished "rm -rf hgrepo gitrepo" &&
 	(
 	hg init hgrepo &&
 	cd hgrepo &&
 	echo "[extensions]" >> .hg/hgrc
 	echo "largefiles =" >> .hg/hgrc
 	echo a > content &&
 	echo a > file1 &&
 	hg add content &&
 	hg add --large file1 &&
 	hg commit -m "origin" &&
 	echo b > content &&
 	echo b > file2 &&
 	hg add --large file2 &&
 	hg rm file1 &&
 	hg commit -m "right" &&
 	hg update -r0 &&
 	echo c > content &&
 	hg commit -m "left" &&
 	HGMERGE=true hg merge -r1 &&
 	hg commit -m "merge"
 	) &&
 	git_clone hgrepo gitrepo &&
 	cat > expected <<-EOF &&
 	left
 	c
 	tree @:
 	content
 	file2
 	EOF
 	(
 	cd gitrepo
 	git show -q --format='%s' @^ &&
 	git show @:content &&
 	git show @:
 	) > actual &&
 	test_cmp expected actual
 '
 test_done
--- a/t/plugins/echo_file_data_test_plugin/init.py
+++ b/t/plugins/echo_file_data_test_plugin/init.py
@@ -0,0 +1,18 @@
 import sys
 from mercurial import node
 def build_filter(args):
    return Filter(args)
 class Filter:
    def __init__(self, _):
        pass
    def file_data_filter(self,file_data):
        with open('largefile_info.txt', 'a') as f:
            f.write(f"filename: {file_data['filename']}\n")
            f.write(f"data size: {len(file_data['data'])} bytes\n")
            f.write(f"ctx rev: {file_data['file_ctx'].rev()}\n")
            f.write(f"ctx binary: {file_data['file_ctx'].isbinary()}\n")
            f.write(f"is largefile: {file_data.get('is_largefile', False)}\n")
            f.write("\n")
--- a/t/plugins/id
+++ b/t/plugins/id
@@ -0,0 +1,2 @@
 #!/bin/bash
 cat
--- a/t/plugins/removefiles_test_plugin/init.py
+++ b/t/plugins/removefiles_test_plugin/init.py
@@ -0,0 +1,15 @@
 import subprocess
 import shlex
 import sys
 from mercurial import node
 def build_filter(args):
    return Filter(args)
 class Filter:
    def __init__(self, args):
        self.filter_contents = shlex.split(args)
    def file_data_filter(self,file_data):
        if file_data['filename'].startswith(b'bad'):
            file_data['data'] = None
--- a/t/plugins/rename_file_test_plugin/init.py
+++ b/t/plugins/rename_file_test_plugin/init.py
@@ -0,0 +1,15 @@
 import subprocess
 import shlex
 import sys
 from mercurial import node
 def build_filter(args):
    return Filter(args)
 class Filter:
    def __init__(self, args):
        self.filter_contents = shlex.split(args)
    def file_data_filter(self,file_data):
        if file_data['filename'] == b'b.txt':
            file_data['filename'] = b'c.txt'
--- a/t/set_origin.expected
+++ b/t/set_origin.expected
@@ -0,0 +1,42 @@
 blob
 mark :1
 data 5
 zero
 reset refs/heads/prefix/master
 commit refs/heads/prefix/master
 mark :2
 author H G Wells <wells@example.com> 1679014800 +0000
 committer H G Wells <wells@example.com> 1679014800 +0000
 data 5
 zero
 M 100644 :1 content
 blob
 mark :3
 data 8
 branch1
 commit refs/heads/prefix/branch1
 mark :4
 author H G Wells <wells@example.com> 1679018400 +0000
 committer H G Wells <wells@example.com> 1679018400 +0000
 data 29
 Added file in branch branch1
 from :2
 M 100644 :3 b8486c4feca589a4237a1ee428322d7109ede12e
 blob
 mark :5
 data 8
 branch2
 commit refs/heads/prefix/branch2
 mark :6
 author H G Wells <wells@example.com> 1679022000 +0000
 committer H G Wells <wells@example.com> 1679022000 +0000
 data 29
 Added file in branch branch2
 from :4
 M 100644 :5 fe786baee0d76603092c25609f2967b9c28a2cf2
--- a/t/set_origin.t
+++ b/t/set_origin.t
@@ -0,0 +1,59 @@
 #!/bin/bash
 #
 # Copyright (c) 2023 Felipe Contreras
 # Copyright (c) 2025 Günther Nußmüller
 #
 test_description='Set origin tests'
 . "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
 check() {
 	git -C "$1" fast-export --all > actual
 	test_cmp "$SHARNESS_TEST_DIRECTORY"/set_origin.expected actual
 }
 git_clone() {
 	(
 	git init -q "$2" &&
 	cd "$2" &&
 	git config core.ignoreCase false &&
 	hg-fast-export.sh --repo "../$1" --origin "$3"
 	)
 }
 setup() {
 	cat > "$HOME"/.hgrc <<-EOF
 	[ui]
 	username = H G Wells <wells@example.com>
 	EOF
 }
 make-branch() {
 	hg branch "$1"
 	FILE=$(echo "$1" | sha1sum | cut -d " " -f 1)
 	echo "$1" > $FILE
 	hg add $FILE
 	hg commit -d "2023-03-17 $2:00Z" -m "Added file in branch $1"
 }
 setup
 test_expect_success 'basic' '
 	test_when_finished "rm -rf hgrepo gitrepo" &&
 	(
 	hg init hgrepo &&
 	cd hgrepo &&
 	echo zero > content &&
 	hg add content &&
 	hg commit -m zero -d "2023-03-17 01:00Z" &&
 	make-branch branch1 02 &&
 	make-branch branch2 03
 	) &&
 	git_clone hgrepo gitrepo prefix &&
 	check gitrepo
 '
 test_done
--- a/t/sharness
+++ b/t/sharness
--- a/t/smoke-test.branchmap
+++ b/t/smoke-test.branchmap
@@ -0,0 +1,15 @@
 "feature"="renamed-feature"
 "a?"="valid-0"
 "a/"="valid-1"
 "a/b"="valid-2"
 "a/?"="valid-3"
 "?a"="valid-4"
 "a."="valid-5"
 "a.b"="valid-6"
 ".a"="valid-7"
 "/"="valid-8"
 "___3"="___a"
 "__2"="__b"
 "_1"="_c"
 "åäö"="abc"
 "Feature- 12V Vac \"Venom\""="venom"
--- a/t/smoke-test.expected
+++ b/t/smoke-test.expected
@@ -0,0 +1,300 @@
 blob
 mark :1
 data 5
 r0-a
 blob
 mark :2
 data 5
 r0-b
 reset refs/heads/master
 commit refs/heads/master
 mark :3
 author Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679014800 +0000
 data 3
 r0
 M 100644 :1 a.txt
 M 100644 :2 b.txt
 blob
 mark :4
 data 5
 r1-c
 blob
 mark :5
 data 5
 r1-d
 commit refs/tags/2019_Spring_R2
 mark :6
 author Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679018400 +0000
 data 3
 r1
 from :3
 M 100644 :4 c.txt
 M 100644 :5 d.txt
 blob
 mark :7
 data 56
 e92e41dde44f9dbbac08bbb83351a65b6728f128 2019 Spring R2
 commit refs/heads/mainline
 mark :8
 author Grevious Bodily Harmsworth <gbh@example.com> 1679019000 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679019000 +0000
 data 52
 Added tag 2019 Spring R2 for changeset e92e41dde44f
 from :6
 M 100644 :7 .hgtags
 blob
 mark :9
 data 5
 r2-e
 blob
 mark :10
 data 5
 r2-f
 commit refs/heads/mainline
 mark :11
 author Grevious Bodily Harmsworth <gbh@example.com> 1679022000 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679022000 +0000
 data 3
 r2
 from :8
 M 100644 :9 e.txt
 M 100644 :10 f.txt
 commit refs/heads/mainline
 mark :12
 author badly-formed-user <devnull@localhost> 1679025600 +0000
 committer badly-formed-user <devnull@localhost> 1679025600 +0000
 data 3
 r3
 from :11
 M 100644 :9 g.txt
 M 100644 :10 h.txt
 blob
 mark :13
 data 10
 feature-a
 blob
 mark :14
 data 10
 feature-b
 commit refs/heads/renamed-feature
 mark :15
 author Grevious Bodily Harmsworth <gbh@example.com> 1679029200 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679029200 +0000
 data 8
 feature
 from :12
 M 100644 :13 feature-a.txt
 M 100644 :14 feature-b.txt
 blob
 mark :16
 data 3
 a?
 commit refs/heads/valid-0
 mark :17
 author Grevious Bodily Harmsworth <gbh@example.com> 1679032800 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679032800 +0000
 data 24
 Added file in branch a?
 from :15
 M 100644 :16 c1086ce03e4f52aadd1c93b1d097da510138522a
 blob
 mark :18
 data 3
 a/
 commit refs/heads/valid-1
 mark :19
 author Grevious Bodily Harmsworth <gbh@example.com> 1679036400 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679036400 +0000
 data 24
 Added file in branch a/
 from :17
 M 100644 :18 85ed6fbb96d655df9f194bc9107f2d86210b9263
 blob
 mark :20
 data 4
 a/b
 commit refs/heads/valid-2
 mark :21
 author Grevious Bodily Harmsworth <gbh@example.com> 1679040000 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679040000 +0000
 data 25
 Added file in branch a/b
 from :19
 M 100644 :20 aae42d317509399fdda80c4d8e46774d152dbd04
 blob
 mark :22
 data 4
 a/?
 commit refs/heads/valid-3
 mark :23
 author Grevious Bodily Harmsworth <gbh@example.com> 1679043600 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679043600 +0000
 data 25
 Added file in branch a/?
 from :21
 M 100644 :22 ba54a8de7fe91c5e6e0a2dd1b9b37de0976ff5a7
 blob
 mark :24
 data 3
 ?a
 commit refs/heads/valid-4
 mark :25
 author Grevious Bodily Harmsworth <gbh@example.com> 1679047200 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679047200 +0000
 data 24
 Added file in branch ?a
 from :23
 M 100644 :24 d4cde16119b586025976741e87775762a2598984
 blob
 mark :26
 data 3
 a.
 commit refs/heads/valid-5
 mark :27
 author Grevious Bodily Harmsworth <gbh@example.com> 1679050800 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679050800 +0000
 data 24
 Added file in branch a.
 from :25
 M 100644 :26 b4ce96ddcee0706a8c51130917f910b2b29faf77
 blob
 mark :28
 data 4
 a.b
 commit refs/heads/valid-6
 mark :29
 author Grevious Bodily Harmsworth <gbh@example.com> 1679054400 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679054400 +0000
 data 25
 Added file in branch a.b
 from :27
 M 100644 :28 97051191e1a92daa11165ef10770bf964268c58b
 blob
 mark :30
 data 3
 .a
 commit refs/heads/valid-7
 mark :31
 author Grevious Bodily Harmsworth <gbh@example.com> 1679058000 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679058000 +0000
 data 24
 Added file in branch .a
 from :29
 M 100644 :30 a667f8feec02fdfa6649772f844a24cf1ad5ebec
 blob
 mark :32
 data 2
 /
 commit refs/heads/valid-8
 mark :33
 author Grevious Bodily Harmsworth <gbh@example.com> 1679061600 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679061600 +0000
 data 23
 Added file in branch /
 from :31
 M 100644 :32 8f27084b6294ddbe28dbcbf98f798730e8a79289
 blob
 mark :34
 data 5
 ___3
 commit refs/heads/___a
 mark :35
 author Grevious Bodily Harmsworth <gbh@example.com> 1679065200 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679065200 +0000
 data 26
 Added file in branch ___3
 from :33
 M 100644 :34 9b171494eb6e5ce325934b1656e286ca0510a697
 blob
 mark :36
 data 4
 __2
 commit refs/heads/__b
 mark :37
 author Grevious Bodily Harmsworth <gbh@example.com> 1679068800 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679068800 +0000
 data 25
 Added file in branch __2
 from :35
 M 100644 :36 5dca703b71d2613c6bb3262b9b1741d6165e4a2f
 blob
 mark :38
 data 3
 _1
 commit refs/heads/_c
 mark :39
 author Grevious Bodily Harmsworth <gbh@example.com> 1679072400 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679072400 +0000
 data 24
 Added file in branch _1
 from :37
 M 100644 :38 2fee90e148a2afbd911b67ced9b6240151f904ec
 blob
 mark :40
 data 25
 Feature- 12V Vac "Venom"
 commit refs/heads/venom
 mark :41
 author Grevious Bodily Harmsworth <gbh@example.com> 1679076000 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679076000 +0000
 data 46
 Added file in branch Feature- 12V Vac "Venom"
 from :39
 M 100644 :40 b01def8779aed4be2f4b7325a89992a9aa566fec
 blob
 mark :42
 data 7
 åäö
 commit refs/heads/abc
 mark :43
 author Grevious Bodily Harmsworth <gbh@example.com> 1679079600 +0000
 committer Grevious Bodily Harmsworth <gbh@example.com> 1679079600 +0000
 data 28
 Added file in branch åäö
 from :41
 M 100644 :42 a0d01fcbff5d86327d542687dcfd8b299d054147
--- a/t/smoke-test.t
+++ b/t/smoke-test.t
@@ -0,0 +1,163 @@
 #!/bin/bash
 #
 # Copyright (c) 2023 Felipe Contreras
 # Copyright (c) 2023 Frej Drejhammar
 #
 # Smoke test used to sanity test changes to fast-export.
 #
 test_description='Smoke test'
 . "${SHARNESS_TEST_SRCDIR-$(dirname "$0")/sharness}"/sharness.sh || exit 1
 check() {
 	echo "$3" > expected &&
 	git -C "$1" show -q --format='%s' "$2" > actual &&
 	test_cmp expected actual
 }
 git_create() {
 	git init -q "$1" &&
 	git -C "$1" config core.ignoreCase false
 }
 git_convert() {
 	(
 	cd "$2" &&
 	hg-fast-export.sh --repo "../$1" \
 			  -s --hgtags -n \
 			  -B "$SHARNESS_TEST_DIRECTORY"/smoke-test.branchmap \
 			  -T "$SHARNESS_TEST_DIRECTORY"/smoke-test.tagsmap
 	)
 }
 setup() {
 	cat > "$HOME"/.hgrc <<-EOF
 	[ui]
 	username = Grevious Bodily Harmsworth <gbh@example.com>
 	EOF
 }
 commit0() {
 	(
 	cd hgrepo &&
 	echo "r0-a" > a.txt &&
 	echo "r0-b" > b.txt &&
 	hg add a.txt b.txt &&
 	hg commit -d "2023-03-17 01:00Z" -m "r0" &&
 	hg bookmark bm0
 	)
 }
 commit1() {
 	(
 	cd hgrepo &&
 	echo "r1-c" > c.txt &&
 	echo "r1-d" > d.txt &&
 	hg branch mainline &&
 	hg add c.txt d.txt &&
 	hg commit -d "2023-03-17 02:00Z" -m "r1" &&
 	hg tag -d "2023-03-17 02:10Z" "2019 Spring R2"
 	)
 }
 commit2() {
 	(
 	cd hgrepo &&
 	echo "r2-e" > e.txt &&
 	echo "r2-f" > f.txt &&
 	hg add e.txt f.txt &&
 	hg commit -d "2023-03-17 03:00Z" -m "r2" &&
 	hg bookmark bm1
 	)
 }
 commit3() {
 	(
 	cd hgrepo &&
 	echo "r2-e" > g.txt &&
 	echo "r2-f" > h.txt &&
 	hg add g.txt h.txt &&
 	hg commit -d "2023-03-17 04:00Z" -u "badly-formed-user" -m "r3"
 	)
 }
 commit_rest() {
 	(
 	cd hgrepo &&
 	hg branch feature &&
 	echo "feature-a" > feature-a.txt &&
 	echo "feature-b" > feature-b.txt &&
 	hg add feature-a.txt feature-b.txt &&
 	hg commit -d "2023-03-17 05:00Z" -m "feature" &&
 	hg bookmark bm2 &&
 	# Now create strangely named branches
 	make-branch "a?" 06 &&
 	make-branch "a/" 07 &&
 	make-branch "a/b" 08 &&
 	make-branch "a/?" 09 &&
 	make-branch "?a" 10 &&
 	make-branch "a." 11 &&
 	make-branch "a.b" 12 &&
 	make-branch ".a" 13 &&
 	make-branch "/" 14 &&
 	make-branch "___3" 15 &&
 	make-branch "__2" 16 &&
 	make-branch "_1" 17 &&
 	make-branch "Feature- 12V Vac \"Venom\"" 18 &&
 	make-branch "åäö" 19 &&
 	hg bookmark bm-for-the-rest
 	)
 }
 make-branch() {
    hg branch "$1"
    FILE=$(echo "$1" | sha1sum | cut -d " " -f 1)
    echo "$1" > $FILE
    hg add $FILE
    hg commit -d "2023-03-17 $2:00Z" -m "Added file in branch $1"
 }
 setup
 test_expect_success 'all in one' '
 	test_when_finished "rm -rf hgrepo gitrepo" &&
 	(
 	hg init hgrepo &&
 	commit0 &&
 	commit1 &&
 	commit2 &&
 	commit3 &&
 	commit_rest
 	) &&
 	git_create gitrepo &&
 	git_convert hgrepo gitrepo &&
 	git -C gitrepo fast-export --all > actual &&
 	test_cmp "$SHARNESS_TEST_DIRECTORY"/smoke-test.expected actual
 '
 test_expect_success 'incremental' '
 	test_when_finished "rm -rf hgrepo gitrepo" &&
 	hg init hgrepo &&
 	commit0 &&
 	git_create gitrepo &&
 	git_convert hgrepo gitrepo &&
 	commit1 &&
 	git_convert hgrepo gitrepo &&
 	commit2 &&
 	commit3 &&
 	git_convert hgrepo gitrepo &&
 	commit_rest &&
 	git_convert hgrepo gitrepo &&
 	git -C gitrepo fast-export --all > actual &&
 	test_cmp "$SHARNESS_TEST_DIRECTORY"/smoke-test.expected actual
 '
 test_done
--- a/t/smoke-test.tagsmap
+++ b/t/smoke-test.tagsmap
@@ -0,0 +1 @@
 "2019 Spring R2"="2019_Spring_R2"
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/test_drop_plugin.py
+++ b/tests/test_drop_plugin.py
@@ -0,0 +1,223 @@
 import sys, os, subprocess
 from tempfile import TemporaryDirectory
 from unittest import TestCase
 from pathlib import Path
 class CommitDropTest(TestCase):
    def test_drop_single_commit_by_hash(self):
        hash1 = self.create_commit('commit 1')
        self.create_commit('commit 2')
        self.drop(hash1)
        self.assertEqual(['commit 2'], self.git.log())
    def test_drop_commits_by_desc(self):
        self.create_commit('commit 1 is good')
        self.create_commit('commit 2 is bad')
        self.create_commit('commit 3 is good')
        self.create_commit('commit 4 is bad')
        self.drop('.*bad')
        expected = ['commit 1 is good', 'commit 3 is good']
        self.assertEqual(expected, self.git.log())
    def test_drop_sequential_commits_in_single_plugin_instance(self):
        self.create_commit('commit 1')
        hash2 = self.create_commit('commit 2')
        hash3 = self.create_commit('commit 3')
        hash4 = self.create_commit('commit 4')
        self.create_commit('commit 5')
        self.drop(','.join((hash2, hash3, hash4)))
        expected = ['commit 1', 'commit 5']
        self.assertEqual(expected, self.git.log())
    def test_drop_sequential_commits_in_multiple_plugin_instances(self):
        self.create_commit('commit 1')
        hash2 = self.create_commit('commit 2')
        hash3 = self.create_commit('commit 3')
        hash4 = self.create_commit('commit 4')
        self.create_commit('commit 5')
        self.drop(hash2, hash3, hash4)
        expected = ['commit 1', 'commit 5']
        self.assertEqual(expected, self.git.log())
    def test_drop_nonsequential_commits(self):
        self.create_commit('commit 1')
        hash2 = self.create_commit('commit 2')
        self.create_commit('commit 3')
        hash4 = self.create_commit('commit 4')
        self.drop(','.join((hash2, hash4)))
        expected = ['commit 1', 'commit 3']
        self.assertEqual(expected, self.git.log())
    def test_drop_head(self):
        self.create_commit('first')
        self.create_commit('middle')
        hash_last = self.create_commit('last')
        self.drop(hash_last)
        self.assertEqual(['first', 'middle'], self.git.log())
    def test_drop_merge_commit(self):
        initial_hash = self.create_commit('initial')
        self.create_commit('branch A')
        self.hg.checkout(initial_hash)
        self.create_commit('branch B')
        self.hg.merge()
        merge_hash = self.create_commit('merge to drop')
        self.create_commit('last')
        self.drop(merge_hash)
        expected_commits = ['initial', 'branch A', 'branch B', 'last']
        self.assertEqual(expected_commits, self.git.log())
        self.assertEqual(['branch B', 'branch A'], self.git_parents('last'))
    def test_drop_different_commits_in_multiple_plugin_instances(self):
        self.create_commit('good commit')
        bad_hash = self.create_commit('bad commit')
        self.create_commit('awful commit')
        self.create_commit('another good commit')
        self.drop('^awful.*', bad_hash)
        expected = ['good commit', 'another good commit']
        self.assertEqual(expected, self.git.log())
    def test_drop_same_commit_in_multiple_plugin_instances(self):
        self.create_commit('good commit')
        bad_hash = self.create_commit('bad commit')
        self.create_commit('another good commit')
        self.drop('^bad.*', bad_hash)
        expected = ['good commit', 'another good commit']
        self.assertEqual(expected, self.git.log())
    def setUp(self):
        self.tempdir = TemporaryDirectory()
        self.hg = HgDriver(Path(self.tempdir.name) / 'hgrepo')
        self.hg.init()
        self.git = GitDriver(Path(self.tempdir.name) / 'gitrepo')
        self.git.init()
        self.export = ExportDriver(self.hg.repodir, self.git.repodir)
    def tearDown(self):
        self.tempdir.cleanup()
    def create_commit(self, message):
        self.write_file_data('Data for %r.' % message)
        return self.hg.commit(message)
    def write_file_data(self, data, filename='test_file.txt'):
        path = self.hg.repodir / filename
        with path.open('w') as f:
            print(data, file=f)
    def drop(self, *spec):
        self.export.run_with_drop(*spec)
    def git_parents(self, message):
        matches = self.git.grep_log(message)
        if len(matches) != 1:
            raise Exception('No unique commit with message %r.' % message)
        subject, parents = self.git.details(matches[0])
        return [self.git.details(p)[0] for p in parents]
 class ExportDriver:
    def __init__(self, sourcedir, targetdir, *, quiet=True):
        self.sourcedir = Path(sourcedir)
        self.targetdir = Path(targetdir)
        self.quiet = quiet
        self.python_executable = str(
            Path.cwd() / os.environ.get('PYTHON', sys.executable))
        self.script = Path(__file__).parent / '../hg-fast-export.sh'
    def run_with_drop(self, *plugin_args):
        cmd = [self.script, '-r', str(self.sourcedir)]
        for arg in plugin_args:
            cmd.extend(['--plugin', 'drop=' + arg])
        output = subprocess.DEVNULL if self.quiet else None
        subprocess.run(cmd, check=True, cwd=str(self.targetdir),
                       env={'PYTHON': self.python_executable},
                       stdout=output, stderr=output)
 class HgDriver:
    def __init__(self, repodir):
        self.repodir = Path(repodir)
    def init(self):
        self.repodir.mkdir()
        self.run_command('init')
    def commit(self, message):
        self.run_command('commit', '-A', '-m', message)
        return self.run_command('id', '--id', '--debug').strip()
    def log(self):
        output = self.run_command('log', '-T', '{desc}\n')
        commits = output.strip().splitlines()
        commits.reverse()
        return commits
    def checkout(self, rev):
        self.run_command('checkout', '-r', rev)
    def merge(self):
        self.run_command('merge', '--tool', ':local')
    def run_command(self, *args):
        p = subprocess.run(('hg', '-yq') + args,
                           cwd=str(self.repodir),
                           check=True,
                           text=True,
                           capture_output=True)
        return p.stdout
 class GitDriver:
    def __init__(self, repodir):
        self.repodir = Path(repodir)
    def init(self):
        self.repodir.mkdir()
        self.run_command('init')
    def log(self):
        output = self.run_command('log', '--format=%s', '--reverse')
        return output.strip().splitlines()
    def grep_log(self, pattern):
        output = self.run_command('log', '--format=%H',
                                  '-F', '--grep', pattern)
        return output.strip().splitlines()
    def details(self, commit_hash):
        fmt = '%s%n%P'
        output = self.run_command('show', '-s', '--format=' + fmt,
                                  commit_hash)
        subject, parents = output.splitlines()
        return subject, parents.split()
    def run_command(self, *args):
        p = subprocess.run(('git', '--no-pager') + args,
                           cwd=str(self.repodir),
                           check=True,
                           text=True,
                           capture_output=True)
        return p.stdout
--- a/tests/test_git_lfs_importer_plugin.py
+++ b/tests/test_git_lfs_importer_plugin.py
@@ -0,0 +1,156 @@
 import sys
 sys.path.append("./plugins")
 import hashlib
 import pathlib
 import time
 import unittest
 import tempfile
 import os
 import pathspec
 from git_lfs_importer import Filter, build_filter
 class TestGitLfsImporterPlugin(unittest.TestCase):
    def setUp(self):
        # create an isolated temp dir and chdir into it for each test
        self._orig_cwd = os.getcwd()
        self._tmpdir = tempfile.TemporaryDirectory()
        self.tmp_path = pathlib.Path(self._tmpdir.name)
        os.chdir(self.tmp_path)
    def tearDown(self):
        # restore cwd and cleanup
        os.chdir(self._orig_cwd)
        self._tmpdir.cleanup()
    def empty_spec(self):
        return pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, [])
    # --------------------------------------------------------
    # GIVEN-WHEN-THEN TESTS for Filter.file_data_filter
    # --------------------------------------------------------
    def test_skips_deletions(self):
        flt = Filter(self.empty_spec())
        file_data = {"filename": b"file.txt", "data": None}
        flt.file_data_filter(file_data)
        self.assertIsNone(file_data["data"])
        self.assertFalse((self.tmp_path / ".git").exists())
    def test_skips_files_that_do_not_match_spec(self):
        spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
        flt = Filter(spec)
        original = b"not matched"
        file_data = {"filename": b"file.txt", "data": original}
        flt.file_data_filter(file_data)
        self.assertEqual(file_data["data"], original)
        self.assertFalse((self.tmp_path / ".git").exists())
    def test_converts_only_matched_files_to_lfs_pointer(self):
        spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
        flt = Filter(spec)
        data = b"hello world"
        sha = hashlib.sha256(data).hexdigest()
        expected_pointer = (
            f"version https://git-lfs.github.com/spec/v1\n"
            f"oid sha256:{sha}\n"
            f"size {len(data)}\n"
        ).encode("utf-8")
        file_data = {"filename": b"payload.bin", "data": data}
        flt.file_data_filter(file_data)
        self.assertEqual(file_data["data"], expected_pointer)
        lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
        self.assertTrue(lfs_file.is_file())
        self.assertEqual(lfs_file.read_bytes(), data)
    def test_does_not_convert_unmatched_directory(self):
        spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["assets/**"])
        flt = Filter(spec)
        data = b"outside directory"
        file_data = {"filename": b"src/images/logo.png", "data": data}
        flt.file_data_filter(file_data)
        self.assertEqual(file_data["data"], data)
        self.assertFalse((self.tmp_path / ".git").exists())
    def test_converts_matched_directory(self):
        spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["assets/**"])
        flt = Filter(spec)
        data = b"inside directory"
        sha = hashlib.sha256(data).hexdigest()
        file_data = {"filename": b"assets/images/logo.png", "data": data}
        flt.file_data_filter(file_data)
        self.assertIn(b"version https://git-lfs.github.com/spec/v1", file_data["data"])
        lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
        self.assertTrue(lfs_file.is_file())
        self.assertEqual(lfs_file.read_bytes(), data)
    def test_does_not_overwrite_existing_blob(self):
        spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
        flt = Filter(spec)
        data = b"abc"
        sha = hashlib.sha256(data).hexdigest()
        lfs_dir = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4]
        lfs_dir.mkdir(parents=True, exist_ok=True)
        lfs_file = lfs_dir / sha
        lfs_file.write_bytes(data)
        before_mtime = lfs_file.stat().st_mtime_ns
        time.sleep(0.01)  # Ensure timestamp difference
        file_data = {"filename": b"abc.bin", "data": data}
        flt.file_data_filter(file_data)
        expected_pointer_prefix = b"version https://git-lfs.github.com/spec/v1"
        self.assertTrue(file_data["data"].startswith(expected_pointer_prefix))
        after_mtime = lfs_file.stat().st_mtime_ns
        self.assertEqual(after_mtime, before_mtime)
    def test_empty_file_converted_when_matched(self):
        spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ["*.bin"])
        flt = Filter(spec)
        data = b""
        sha = hashlib.sha256(data).hexdigest()
        file_data = {"filename": b"empty.bin", "data": data}
        flt.file_data_filter(file_data)
        self.assertIn(b"size 0", file_data["data"])
        lfs_file = pathlib.Path(".git/lfs/objects") / sha[:2] / sha[2:4] / sha
        self.assertTrue(lfs_file.is_file())
        self.assertEqual(lfs_file.read_bytes(), data)
    # --------------------------------------------------------
    # Optional: GIVEN-WHEN-THEN for build_filter
    # --------------------------------------------------------
    def test_build_filter_reads_patterns_file(self):
        patterns_file = self.tmp_path / "lfs_patterns.txt"
        patterns_file.write_text("*.bin\nassets/**\n", encoding="utf-8")
        flt = build_filter(str(patterns_file))
        data_match = b"match me"
        sha_match = hashlib.sha256(data_match).hexdigest()
        fd_match = {"filename": b"assets/payload.bin", "data": data_match}
        flt.file_data_filter(fd_match)
        self.assertIn(b"oid sha256:", fd_match["data"])
        lfs_file = pathlib.Path(".git/lfs/objects") / sha_match[:2] / sha_match[2:4] / sha_match
        self.assertTrue(lfs_file.is_file())
        data_skip = b"skip me"
        fd_skip = {"filename": b"docs/readme.md", "data": data_skip}
        flt.file_data_filter(fd_skip)
        self.assertEqual(fd_skip["data"], data_skip)