9 Commits

Author SHA1 Message Date
Frej Drejhammar
e200cec39f Adapt to changes in Mercurial 4.6
Starting with Mercurial 4.6 repo.lookup() no longer accepts raw hashes
for lookups.
2018-06-10 15:51:09 +02:00
Gabriel
51d5f893db Add a section about system requirements to the README
Add @rinu's suggestion on how to run fast-export on Windows to the
README, this fixes #121.
2018-06-10 15:44:46 +02:00
ceqi
19aa906308 Update usage section example commands
Change <repo> to <local-repo> so that it's clear that we invoke from a local repository;
Add 'git checkout HEAD' command as we need to run it as the final step.

Thanks
2018-02-13 13:37:58 +00:00
Frej Drejhammar
50dc10770b Warn contributors from doing work that will no be merged
From time to time contributors spend time doing work that will not be
accepted as it duplicates functionality that is already provided with
the mapping files. Try to dissuade them from doing that by explaining
the reasons in the comment.
2018-02-01 07:03:03 +01:00
Martin Freund
90483e02e5 Quote $PYTHON variable to support paths with spaces 2018-01-24 11:43:22 +01:00
Frej Drejhammar
cc8fefe008 Change syntax of mapping files
This is done to allow escape sequences in the key and value strings.
2017-10-02 13:05:14 +02:00
Frej Drejhammar
e174c2a0b7 Refactor load_mapping() to move line parsing to inner function
This is done in preparation to allowing mappings to contain quoted
characters.
2017-09-29 18:50:41 +02:00
Frej Drejhammar
2536f87544 Avoid nuisance error printout from readlink test
2>&1 > /dev/null does not do what I expected, > /dev/null 2>&1 does.
2017-08-25 11:28:52 +02:00
Frej Drejhammar
17c8a22066 Don't break if the destination directory name contains a space 2017-08-18 16:19:27 +02:00
4 changed files with 77 additions and 23 deletions

View File

@@ -24,6 +24,16 @@ you want to report a security bug. That way the next person having the
same problem can benefit from the time spent solving the problem the
first time.
System Requirements
-------------------
This project depends on Python 2.7 and the Mercurial 4.6 package. If
Python is not installed, install it before proceeding. The Mercurial
package can be installed with `pip install mercurial`.
If you're on Windows, run the following commands in git bash (Git for
Windows).
Usage
-----
@@ -33,7 +43,8 @@ Using hg-fast-export is quite simple for a mercurial repository <repo>:
mkdir repo-git # or whatever
cd repo-git
git init
hg-fast-export.sh -r <repo>
hg-fast-export.sh -r <local-repo>
git checkout HEAD
```
Please note that hg-fast-export does not automatically check out the
@@ -65,12 +76,18 @@ As mercurial appears to be much less picky about the syntax of the
author information than git, an author mapping file can be given to
hg-fast-export to fix up malformed author strings. The file is
specified using the -A option. The file should contain lines of the
form `FromAuthor=ToAuthor`. The example authors.map below will
translate `User <garbage<user@example.com>` to `User <user@example.com>`.
form `"<key>"="<value>"`. Inside the key and value strings, all escape
sequences understood by the python `string_escape` encoding are
supported. (Versions of fast-export prior to v171002 had a different
syntax, the old syntax can be enabled by the flag
`--mappings-are-raw`.)
The example authors.map below will translate `User
<garbage<tab><user@example.com>` to `User <user@example.com>`.
```
-- Start of authors.map --
User <garbage<user@example.com>=User <user@example.com>
"User <garbage\t<user@example.com>"="User <user@example.com>"
-- End of authors.map --
```

View File

@@ -4,6 +4,7 @@
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
from mercurial import node
from mercurial.scmutil import revsymbol
from hg2git import setup_repo,fixup_user,get_branch,get_changeset
from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name
from optparse import OptionParser
@@ -78,7 +79,7 @@ def get_filechanges(repo,revision,parents,mleft):
l,c,r=[],[],[]
for p in parents:
if p<0: continue
mright=repo.changectx(p).manifest()
mright=revsymbol(repo,str(p)).manifest()
l,c,r=split_dict(mleft,mright,l,c,r)
l.sort()
c.sort()
@@ -153,9 +154,13 @@ def sanitize_name(name,what="branch", mapping={}):
# modifying names which previously were not touched it will break
# preexisting setups which are doing incremental imports.
#
# Use the -B and -T options to mangle branch and tag names
# instead. If you have a source repository where this is too much
# work to do manually, write a tool that does it for you.
# Fast-export tries to not inflict arbitrary naming policy on the
# user, instead it aims to provide mechanisms allowing the user to
# apply their own policy. Therefore do not add a transform which can
# already be implemented with the -B and -T options to mangle branch
# and tag names. If you have a source repository where this is too
# much work to do manually, write a tool that does it for you.
#
def dot(name):
if not name: return name
@@ -206,7 +211,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors,
wr(desc)
wr()
ctx=repo.changectx(str(revision))
ctx=revsymbol(repo,str(revision))
man=ctx.manifest()
added,changed,removed,type=[],[],[],''
@@ -221,7 +226,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors,
# later non-merge revision: feed in changed manifest
# if we have exactly one parent, just take the changes from the
# manifest without expensively comparing checksums
f=repo.status(repo.lookup(parents[0]),revnode)[:3]
f=repo.status(parents[0],revnode)[:3]
added,changed,removed=f[1],f[0],f[2]
type='simple delta'
else: # a merge with two parents
@@ -258,7 +263,7 @@ def export_note(ui,repo,revision,count,authors,encoding,is_first):
if is_first:
wr('from refs/notes/hg^0')
wr('N inline :%d' % (revision+1))
hg_hash=repo.changectx(str(revision)).hex()
hg_hash=revsymbol(repo,str(revision)).hex()
wr('data %d' % (len(hg_hash)))
wr_no_nl(hg_hash)
wr()
@@ -294,7 +299,24 @@ def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap):
count=checkpoint(count)
return count
def load_mapping(name, filename):
def load_mapping(name, filename, mapping_is_raw):
raw_regexp=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
string_regexp='"(((\\.)|(\\")|[^"])*)"'
quoted_regexp=re.compile('^'+string_regexp+'[ ]*=[ ]*'+string_regexp+'$')
def parse_raw_line(line):
m=raw_regexp.match(line)
if m==None:
return None
return (m.group(1).strip(), m.group(2).strip())
def parse_quoted_line(line):
m=quoted_regexp.match(line)
if m==None:
return None
return (m.group(1).decode('string_escape'),
m.group(5).decode('string_escape'))
cache={}
if not os.path.exists(filename):
sys.stderr.write('Could not open mapping file [%s]\n' % (filename))
@@ -302,18 +324,19 @@ def load_mapping(name, filename):
f=open(filename,'r')
l=0
a=0
lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
for line in f.readlines():
l+=1
line=line.strip()
if line=='' or line[0]=='#':
if l==1 and line[0]=='#' and line=='# quoted-escaped-strings':
continue
m=lre.match(line)
elif line=='' or line[0]=='#':
continue
m=parse_raw_line(line) if mapping_is_raw else parse_quoted_line(line)
if m==None:
sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
continue
# put key:value in cache, key without ^:
cache[m.group(1).strip()]=m.group(2).strip()
cache[m[0]]=m[1]
a+=1
f.close()
sys.stderr.write('Loaded %d %s\n' % (a, name))
@@ -460,6 +483,8 @@ if __name__=='__main__':
help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
parser.add_option("--fe",dest="fn_encoding",
help="Assume file names from Mercurial are encoded in <filename_encoding>")
parser.add_option("--mappings-are-raw",dest="raw_mappings", default=False,
help="Assume mappings are raw <key>=<value> lines")
(options,args)=parser.parse_args()
@@ -474,15 +499,15 @@ if __name__=='__main__':
a={}
if options.authorfile!=None:
a=load_mapping('authors', options.authorfile)
a=load_mapping('authors', options.authorfile, options.raw_mappings)
b={}
if options.branchesfile!=None:
b=load_mapping('branches', options.branchesfile)
b=load_mapping('branches', options.branchesfile, options.raw_mappings)
t={}
if options.tagsfile!=None:
t=load_mapping('tags', options.tagsfile)
t=load_mapping('tags', options.tagsfile, True)
if options.default_branch!=None:
set_default_branch(options.default_branch)

View File

@@ -8,7 +8,7 @@ if command -v greadlink > /dev/null; then
READLINK="greadlink" # Prefer greadlink over readlink
fi
if ! $READLINK -f "$(which "$0")" 2>&1 > /dev/null; then
if ! $READLINK -f "$(which "$0")" > /dev/null 2>&1 ; then
ROOT="$(dirname "$(which "$0")")"
if [ ! -f "$ROOT/hg-fast-export.py" ] ; then
echo "hg-fast-exports requires a readlink implementation which knows" \
@@ -55,6 +55,7 @@ Options:
Mercurial are encoded in <encoding>
--fe <filename_encoding> Assume filenames from Mercurial are encoded
in <filename_encoding>
--mappings-are-raw Assume mappings are raw <key>=<value> lines
"
case "$1" in
-h|--help)
@@ -70,7 +71,7 @@ if test "z$IS_BARE" != ztrue; then
# This is not a bare repo, cd to the toplevel
TOPLEVEL=$(git rev-parse --show-toplevel) \
|| (echo "Could not find git repo toplevel" ; exit 1)
cd $TOPLEVEL || exit 1
cd "$TOPLEVEL" || exit 1
fi
GIT_DIR=$(git rev-parse --git-dir) || (echo "Could not find git repo" ; exit 1)
@@ -152,7 +153,7 @@ $(
exec 4>&3 3>&1 1>&4 4>&-
{
_e1=0
GIT_DIR="$GIT_DIR" $PYTHON "$ROOT/hg-fast-export.py" \
GIT_DIR="$GIT_DIR" "$PYTHON" "$ROOT/hg-fast-export.py" \
--repo "$REPO" \
--marks "$GIT_DIR/$PFX-$SFX_MARKS" \
--mapping "$GIT_DIR/$PFX-$SFX_MAPPING" \

View File

@@ -4,6 +4,9 @@
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
from mercurial import hg,util,ui,templatefilters
from mercurial import error as hgerror
from mercurial.scmutil import revsymbol,binnode
import re
import os
import sys
@@ -69,7 +72,15 @@ def get_branch(name):
return name
def get_changeset(ui,repo,revision,authors={},encoding=''):
node=repo.lookup(revision)
# Starting with Mercurial 4.6 lookup no longer accepts raw hashes
# for lookups. Work around it by changing our behaviour depending on
# how it fails
try:
node=repo.lookup(revision)
except hgerror.ProgrammingError:
node=binnode(revsymbol(repo,str(revision))) # We were given a numeric rev
except hgerror.RepoLookupError:
node=revision # We got a raw hash
(manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node)
if encoding:
user=user.decode(encoding).encode('utf8')