Support Python 3

Port hg-fast-import to Python 2/3 polyglot code.

Since mercurial accepts and returns bytestrings for all repository data,
the approach I've taken here is to use bytestrings throughout the
hg-fast-import code. All strings pertaining to repository data are
bytestrings. This means the code is using the same string datatype for
this data on Python 3 as it did (and still does) on Python 2.

Repository data coming from subprocess calls to git, or read from files,
is also left as the bytestrings either returned from
subprocess.check_output or as read from the file in 'rb' mode.

Regexes and string literals that are used with repository data have
all had a b'' prefix added.

When repository data is used in error/warning messages, it is decoded
with the UTF8 codec for printing.

With this patch, hg-fast-export.py writes binary output to
sys.stdout.buffer on Python 3 - on Python 2 this doesn't exist and it
still uses sys.stdout.

The only strings that are left as "native" strings and not coerced to
bytestrings are filepaths passed in on the command line, and dictionary
keys for internal data structures used by hg-fast-import.py, that do
not originate in repository data.

Mapping files are read in 'rb' mode, and thus bytestrings are read from
them. When an encoding is given, their contents are decoded with that
encoding, but then immediately encoded again with UTF8 and they are
returned as the resulting bytestrings

Other necessary changes were:

 - indexing byestrings with a single index returns an integer on Python.
   These indexing operations have been replaced with a one-element
   slice: x[0] -> x[0:1] or x[-1] -> [-1:] so at to return a bytestring.

 - raw_hash.encode('hex_codec') replaced with binascii.hexlify(raw_hash)

 - str(integer) -> b'%d' % integer

 - 'string_escape' codec replaced with 'unicode_escape' (which was
    backported to python 2.7). Strings decoded with this codec were then
    immediately re-encoded with UTF8.

 - Calls to map() intended to execute their contents immediately were
   unwrapped or converted to list comprehensions, since map() is an
   iterator and does not execute until iterated over.

hg-fast-export.sh has been modified to not require Python 2. Instead, if
PYTHON has not been defined, it checks python2, python, then python3,
and uses the first one that exists and can import the mercurial module.
This commit is contained in:
chrisjbillington
2020-02-10 21:39:13 -05:00
parent 595587b245
commit b961f146df
10 changed files with 252 additions and 176 deletions

View File

@@ -29,9 +29,10 @@ first time.
System Requirements System Requirements
------------------- -------------------
This project depends on Python 2.7 and the Mercurial >= 4.6 This project depends on Python 2.7 or 3.5+, and the Mercurial >= 4.6
package. If Python is not installed, install it before proceeding. The package (>= 5.2, if Python 3.5+). If Python is not installed, install
Mercurial package can be installed with `pip install mercurial`. it before proceeding. TheMercurial package can be installed with
`pip install mercurial`.
On windows the bash that comes with "Git for Windows" is known to work On windows the bash that comes with "Git for Windows" is known to work
well. well.

View File

@@ -11,9 +11,13 @@ from optparse import OptionParser
import re import re
import sys import sys
import os import os
from binascii import hexlify
import pluginloader import pluginloader
PY2 = sys.version_info.major == 2
if PY2:
str = unicode
if sys.platform == "win32": if PY2 and sys.platform == "win32":
# On Windows, sys.stdout is initially opened in text mode, which means that # On Windows, sys.stdout is initially opened in text mode, which means that
# when a LF (\n) character is written to sys.stdout, it will be converted # when a LF (\n) character is written to sys.stdout, it will be converted
# into CRLF (\r\n). That makes git blow up, so use this platform-specific # into CRLF (\r\n). That makes git blow up, so use this platform-specific
@@ -22,7 +26,7 @@ if sys.platform == "win32":
msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
# silly regex to catch Signed-off-by lines in log message # silly regex to catch Signed-off-by lines in log message
sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$') sob_re=re.compile(b'^Signed-[Oo]ff-[Bb]y: (.+)$')
# insert 'checkpoint' command after this many commits or none at all if 0 # insert 'checkpoint' command after this many commits or none at all if 0
cfg_checkpoint_count=0 cfg_checkpoint_count=0
# write some progress message every this many file contents written # write some progress message every this many file contents written
@@ -35,30 +39,33 @@ submodule_mappings=None
# author/branch/tag names. # author/branch/tag names.
auto_sanitize = None auto_sanitize = None
stdout_buffer = sys.stdout if PY2 else sys.stdout.buffer
def gitmode(flags): def gitmode(flags):
return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644' return b'l' in flags and b'120000' or b'x' in flags and b'100755' or b'100644'
def wr_no_nl(msg=''): def wr_no_nl(msg=b''):
assert isinstance(msg, bytes)
if msg: if msg:
sys.stdout.write(msg) stdout_buffer.write(msg)
def wr(msg=''): def wr(msg=b''):
wr_no_nl(msg) wr_no_nl(msg)
sys.stdout.write('\n') stdout_buffer.write(b'\n')
#map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n')) #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n'))
def checkpoint(count): def checkpoint(count):
count=count+1 count=count+1
if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0: if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0:
sys.stderr.write("Checkpoint after %d commits\n" % count) sys.stderr.write("Checkpoint after %d commits\n" % count)
wr('checkpoint') wr(b'checkpoint')
wr() wr()
return count return count
def revnum_to_revref(rev, old_marks): def revnum_to_revref(rev, old_marks):
"""Convert an hg revnum to a git-fast-import rev reference (an SHA1 """Convert an hg revnum to a git-fast-import rev reference (an SHA1
or a mark)""" or a mark)"""
return old_marks.get(rev) or ':%d' % (rev+1) return old_marks.get(rev) or b':%d' % (rev+1)
def file_mismatch(f1,f2): def file_mismatch(f1,f2):
"""See if two revisions of a file are not equal.""" """See if two revisions of a file are not equal."""
@@ -87,7 +94,7 @@ def get_filechanges(repo,revision,parents,mleft):
l,c,r=[],[],[] l,c,r=[],[],[]
for p in parents: for p in parents:
if p<0: continue if p<0: continue
mright=revsymbol(repo,str(p)).manifest() mright=revsymbol(repo,b"%d" %p).manifest()
l,c,r=split_dict(mleft,mright,l,c,r) l,c,r=split_dict(mleft,mright,l,c,r)
l.sort() l.sort()
c.sort() c.sort()
@@ -110,7 +117,7 @@ def get_author(logmessage,committer,authors):
"Signed-off-by: foo" and thus matching our detection regex. Prevent "Signed-off-by: foo" and thus matching our detection regex. Prevent
that.""" that."""
loglines=logmessage.split('\n') loglines=logmessage.split(b'\n')
i=len(loglines) i=len(loglines)
# from tail walk to top skipping empty lines # from tail walk to top skipping empty lines
while i>=0: while i>=0:
@@ -138,23 +145,24 @@ def remove_gitmodules(ctx):
# be to only remove the submodules of the first parent. # be to only remove the submodules of the first parent.
for parent_ctx in ctx.parents(): for parent_ctx in ctx.parents():
for submodule in parent_ctx.substate.keys(): for submodule in parent_ctx.substate.keys():
wr('D %s' % submodule) wr(b'D %s' % submodule)
wr('D .gitmodules') wr(b'D .gitmodules')
def refresh_git_submodule(name,subrepo_info): def refresh_git_submodule(name,subrepo_info):
wr('M 160000 %s %s' % (subrepo_info[1],name)) wr(b'M 160000 %s %s' % (subrepo_info[1],name))
sys.stderr.write("Adding/updating submodule %s, revision %s\n" sys.stderr.write(
% (name,subrepo_info[1])) "Adding/updating submodule %s, revision %s\n"
return '[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name, % (name.decode('utf8'), subrepo_info[1].decode('utf8'))
subrepo_info[0]) )
return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name, name, subrepo_info[0])
def refresh_hg_submodule(name,subrepo_info): def refresh_hg_submodule(name,subrepo_info):
gitRepoLocation=submodule_mappings[name] + "/.git" gitRepoLocation=submodule_mappings[name] + b"/.git"
# Populate the cache to map mercurial revision to git revision # Populate the cache to map mercurial revision to git revision
if not name in subrepo_cache: if not name in subrepo_cache:
subrepo_cache[name]=(load_cache(gitRepoLocation+"/hg2git-mapping"), subrepo_cache[name]=(load_cache(gitRepoLocation+b"/hg2git-mapping"),
load_cache(gitRepoLocation+"/hg2git-marks", load_cache(gitRepoLocation+b"/hg2git-marks",
lambda s: int(s)-1)) lambda s: int(s)-1))
(mapping_cache,marks_cache)=subrepo_cache[name] (mapping_cache,marks_cache)=subrepo_cache[name]
@@ -162,30 +170,38 @@ def refresh_hg_submodule(name,subrepo_info):
if subrepo_hash in mapping_cache: if subrepo_hash in mapping_cache:
revnum=mapping_cache[subrepo_hash] revnum=mapping_cache[subrepo_hash]
gitSha=marks_cache[int(revnum)] gitSha=marks_cache[int(revnum)]
wr('M 160000 %s %s' % (gitSha,name)) wr(b'M 160000 %s %s' % (gitSha,name))
sys.stderr.write("Adding/updating submodule %s, revision %s->%s\n" sys.stderr.write(
% (name,subrepo_hash,gitSha)) "Adding/updating submodule %s, revision %s->%s\n"
return '[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name, % (name.decode('utf8'), subrepo_hash.decode('utf8'), gitSha.decode('utf8'))
)
return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name,
submodule_mappings[name]) submodule_mappings[name])
else: else:
sys.stderr.write("Warning: Could not find hg revision %s for %s in git %s\n" % sys.stderr.write(
(subrepo_hash,name,gitRepoLocation)) "Warning: Could not find hg revision %s for %s in git %s\n"
return '' % (
subrepo_hash.decode('utf8'),
name.decode('utf8'),
gitRepoLocation.decode('utf8'),
)
)
return b''
def refresh_gitmodules(ctx): def refresh_gitmodules(ctx):
"""Updates list of ctx submodules according to .hgsubstate file""" """Updates list of ctx submodules according to .hgsubstate file"""
remove_gitmodules(ctx) remove_gitmodules(ctx)
gitmodules="" gitmodules=b""
# Create the .gitmodules file and all submodules # Create the .gitmodules file and all submodules
for name,subrepo_info in ctx.substate.items(): for name,subrepo_info in ctx.substate.items():
if subrepo_info[2]=='git': if subrepo_info[2]==b'git':
gitmodules+=refresh_git_submodule(name,subrepo_info) gitmodules+=refresh_git_submodule(name,subrepo_info)
elif submodule_mappings and name in submodule_mappings: elif submodule_mappings and name in submodule_mappings:
gitmodules+=refresh_hg_submodule(name,subrepo_info) gitmodules+=refresh_hg_submodule(name,subrepo_info)
if len(gitmodules): if len(gitmodules):
wr('M 100644 inline .gitmodules') wr(b'M 100644 inline .gitmodules')
wr('data %d' % (len(gitmodules)+1)) wr(b'data %d' % (len(gitmodules)+1))
wr(gitmodules) wr(gitmodules)
def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}):
@@ -193,19 +209,21 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}):
max=len(files) max=len(files)
is_submodules_refreshed=False is_submodules_refreshed=False
for file in files: for file in files:
if not is_submodules_refreshed and (file=='.hgsub' or file=='.hgsubstate'): if not is_submodules_refreshed and (file==b'.hgsub' or file==b'.hgsubstate'):
is_submodules_refreshed=True is_submodules_refreshed=True
refresh_gitmodules(ctx) refresh_gitmodules(ctx)
# Skip .hgtags files. They only get us in trouble. # Skip .hgtags files. They only get us in trouble.
if not hgtags and file == ".hgtags": if not hgtags and file == b".hgtags":
sys.stderr.write('Skip %s\n' % (file)) sys.stderr.write('Skip %s\n' % file.decode('utf8'))
continue continue
if encoding: if encoding:
filename=file.decode(encoding).encode('utf8') filename=file.decode(encoding).encode('utf8')
else: else:
filename=file filename=file
if '.git' in filename.split(os.path.sep): if b'.git' in filename.split(os.path.sep.encode()):
sys.stderr.write('Ignoring file %s which cannot be tracked by git\n' % filename) sys.stderr.write(
'Ignoring file %s which cannot be tracked by git\n' % filename.decode('utf8')
)
continue continue
file_ctx=ctx.filectx(file) file_ctx=ctx.filectx(file)
d=file_ctx.data() d=file_ctx.data()
@@ -218,9 +236,9 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}):
filename=file_data['filename'] filename=file_data['filename']
file_ctx=file_data['file_ctx'] file_ctx=file_data['file_ctx']
wr('M %s inline %s' % (gitmode(manifest.flags(file)), wr(b'M %s inline %s' % (gitmode(manifest.flags(file)),
strip_leading_slash(filename))) strip_leading_slash(filename)))
wr('data %d' % len(d)) # had some trouble with size() wr(b'data %d' % len(d)) # had some trouble with size()
wr(d) wr(d)
count+=1 count+=1
if count%cfg_export_boundary==0: if count%cfg_export_boundary==0:
@@ -246,25 +264,28 @@ def sanitize_name(name,what="branch", mapping={}):
def dot(name): def dot(name):
if not name: return name if not name: return name
if name[0] == '.': return '_'+name[1:] if name[0:1] == b'.': return b'_'+name[1:]
return name return name
if not auto_sanitize: if not auto_sanitize:
return mapping.get(name,name) return mapping.get(name,name)
n=mapping.get(name,name) n=mapping.get(name,name)
p=re.compile('([[ ~^:?\\\\*]|\.\.)') p=re.compile(b'([[ ~^:?\\\\*]|\.\.)')
n=p.sub('_', n) n=p.sub(b'_', n)
if n[-1] in ('/', '.'): n=n[:-1]+'_' if n[-1:] in (b'/', b'.'): n=n[:-1]+b'_'
n='/'.join(map(dot,n.split('/'))) n=b'/'.join([dot(s) for s in n.split(b'/')])
p=re.compile('_+') p=re.compile(b'_+')
n=p.sub('_', n) n=p.sub(b'_', n)
if n!=name: if n!=name:
sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n)) sys.stderr.write(
'Warning: sanitized %s [%s] to [%s]\n'
% (what, name.decode('utf8'), n.decode('utf8'))
)
return n return n
def strip_leading_slash(filename): def strip_leading_slash(filename):
if filename[0] == '/': if filename[0:1] == b'/':
return filename[1:] return filename[1:]
return filename return filename
@@ -272,7 +293,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors,
branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='', branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',
plugins={}): plugins={}):
def get_branchname(name): def get_branchname(name):
if brmap.has_key(name): if name in brmap:
return brmap[name] return brmap[name]
n=sanitize_name(name, "branch", branchesmap) n=sanitize_name(name, "branch", branchesmap)
brmap[name]=n brmap[name]=n
@@ -297,18 +318,18 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors,
desc = commit_data['desc'] desc = commit_data['desc']
if len(parents)==0 and revision != 0: if len(parents)==0 and revision != 0:
wr('reset refs/heads/%s' % branch) wr(b'reset refs/heads/%s' % branch)
wr('commit refs/heads/%s' % branch) wr(b'commit refs/heads/%s' % branch)
wr('mark :%d' % (revision+1)) wr(b'mark :%d' % (revision+1))
if sob: if sob:
wr('author %s %d %s' % (author,time,timezone)) wr(b'author %s %d %s' % (author,time,timezone))
wr('committer %s %d %s' % (user,time,timezone)) wr(b'committer %s %d %s' % (user,time,timezone))
wr('data %d' % (len(desc)+1)) # wtf? wr(b'data %d' % (len(desc)+1)) # wtf?
wr(desc) wr(desc)
wr() wr()
ctx=revsymbol(repo,str(revision)) ctx=revsymbol(repo, b"%d" % revision)
man=ctx.manifest() man=ctx.manifest()
added,changed,removed,type=[],[],[],'' added,changed,removed,type=[],[],[],''
@@ -318,7 +339,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors,
added.sort() added.sort()
type='full' type='full'
else: else:
wr('from %s' % revnum_to_revref(parents[0], old_marks)) wr(b'from %s' % revnum_to_revref(parents[0], old_marks))
if len(parents) == 1: if len(parents) == 1:
# later non-merge revision: feed in changed manifest # later non-merge revision: feed in changed manifest
# if we have exactly one parent, just take the changes from the # if we have exactly one parent, just take the changes from the
@@ -327,7 +348,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors,
added,changed,removed=f.added,f.modified,f.removed added,changed,removed=f.added,f.modified,f.removed
type='simple delta' type='simple delta'
else: # a merge with two parents else: # a merge with two parents
wr('merge %s' % revnum_to_revref(parents[1], old_marks)) wr(b'merge %s' % revnum_to_revref(parents[1], old_marks))
# later merge revision: feed in changed manifest # later merge revision: feed in changed manifest
# for many files comparing checksums is expensive so only do it for # for many files comparing checksums is expensive so only do it for
# merges where we really need it due to hg's revlog logic # merges where we really need it due to hg's revlog logic
@@ -335,15 +356,15 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors,
type='thorough delta' type='thorough delta'
sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' % sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
(branch,type,revision+1,max,len(added),len(changed),len(removed))) (branch.decode('utf8'),type,revision+1,max,len(added),len(changed),len(removed)))
for filename in removed: for filename in removed:
if fn_encoding: if fn_encoding:
filename=filename.decode(fn_encoding).encode('utf8') filename=filename.decode(fn_encoding).encode('utf8')
filename=strip_leading_slash(filename) filename=strip_leading_slash(filename)
if filename=='.hgsub': if filename==b'.hgsub':
remove_gitmodules(ctx) remove_gitmodules(ctx)
wr('D %s' % filename) wr(b'D %s' % filename)
export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins) export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins)
export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins) export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins)
@@ -358,52 +379,54 @@ def export_note(ui,repo,revision,count,authors,encoding,is_first):
parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0] parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
wr('commit refs/notes/hg') wr(b'commit refs/notes/hg')
wr('committer %s %d %s' % (user,time,timezone)) wr(b'committer %s %d %s' % (user,time,timezone))
wr('data 0') wr(b'data 0')
if is_first: if is_first:
wr('from refs/notes/hg^0') wr(b'from refs/notes/hg^0')
wr('N inline :%d' % (revision+1)) wr(b'N inline :%d' % (revision+1))
hg_hash=revsymbol(repo,str(revision)).hex() hg_hash=revsymbol(repo,b"%d" % revision).hex()
wr('data %d' % (len(hg_hash))) wr(b'data %d' % (len(hg_hash)))
wr_no_nl(hg_hash) wr_no_nl(hg_hash)
wr() wr()
return checkpoint(count) return checkpoint(count)
wr('data %d' % (len(desc)+1)) # wtf?
wr(desc)
wr()
def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap): def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap):
l=repo.tagslist() l=repo.tagslist()
for tag,node in l: for tag,node in l:
# Remap the branch name # Remap the branch name
tag=sanitize_name(tag,"tag",tagsmap) tag=sanitize_name(tag,"tag",tagsmap)
# ignore latest revision # ignore latest revision
if tag=='tip': continue if tag==b'tip': continue
# ignore tags to nodes that are missing (ie, 'in the future') # ignore tags to nodes that are missing (ie, 'in the future')
if node.encode('hex_codec') not in mapping_cache: if hexlify(node) not in mapping_cache:
sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec'))) sys.stderr.write(
'Tag %s refers to unseen node %s\n'
% (tag.decode('utf8'), hexlify(node).decode('utf8'))
)
continue continue
rev=int(mapping_cache[node.encode('hex_codec')]) rev=int(mapping_cache[hexlify(node)])
ref=revnum_to_revref(rev, old_marks) ref=revnum_to_revref(rev, old_marks)
if ref==None: if ref==None:
sys.stderr.write('Failed to find reference for creating tag' sys.stderr.write('Failed to find reference for creating tag'
' %s at r%d\n' % (tag,rev)) ' %s at r%d\n' % (tag,rev))
continue continue
sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref)) sys.stderr.write(
wr('reset refs/tags/%s' % tag) 'Exporting tag [%s] at [hg r%d] [git %s]\n'
wr('from %s' % ref) % (tag.decode('utf8'), rev, ref.decode('utf8'))
)
wr(b'reset refs/tags/%s' % tag)
wr(b'from %s' % ref)
wr() wr()
count=checkpoint(count) count=checkpoint(count)
return count return count
def load_mapping(name, filename, mapping_is_raw): def load_mapping(name, filename, mapping_is_raw):
raw_regexp=re.compile('^([^=]+)[ ]*=[ ]*(.+)$') raw_regexp=re.compile(b'^([^=]+)[ ]*=[ ]*(.+)$')
string_regexp='"(((\\.)|(\\")|[^"])*)"' string_regexp=b'"(((\\.)|(\\")|[^"])*)"'
quoted_regexp=re.compile('^'+string_regexp+'[ ]*=[ ]*'+string_regexp+'$') quoted_regexp=re.compile(b'^'+string_regexp+b'[ ]*=[ ]*'+string_regexp+b'$')
def parse_raw_line(line): def parse_raw_line(line):
m=raw_regexp.match(line) m=raw_regexp.match(line)
@@ -415,22 +438,22 @@ def load_mapping(name, filename, mapping_is_raw):
m=quoted_regexp.match(line) m=quoted_regexp.match(line)
if m==None: if m==None:
return None return None
return (m.group(1).decode('string_escape'), return (m.group(1).decode('unicode_escape').encode('utf8'),
m.group(5).decode('string_escape')) m.group(5).decode('unicode_escape').encode('utf8'))
cache={} cache={}
if not os.path.exists(filename): if not os.path.exists(filename):
sys.stderr.write('Could not open mapping file [%s]\n' % (filename)) sys.stderr.write('Could not open mapping file [%s]\n' % (filename))
return cache return cache
f=open(filename,'r') f=open(filename,'rb')
l=0 l=0
a=0 a=0
for line in f.readlines(): for line in f.readlines():
l+=1 l+=1
line=line.strip() line=line.strip()
if l==1 and line[0]=='#' and line=='# quoted-escaped-strings': if l==1 and line[0:1]==b'#' and line==b'# quoted-escaped-strings':
continue continue
elif line=='' or line[0]=='#': elif line==b'' or line[0:1]==b'#':
continue continue
m=parse_raw_line(line) if mapping_is_raw else parse_quoted_line(line) m=parse_raw_line(line) if mapping_is_raw else parse_quoted_line(line)
if m==None: if m==None:
@@ -454,7 +477,7 @@ def branchtip(repo, heads):
def verify_heads(ui,repo,cache,force,branchesmap): def verify_heads(ui,repo,cache,force,branchesmap):
branches={} branches={}
for bn, heads in repo.branchmap().iteritems(): for bn, heads in repo.branchmap().items():
branches[bn] = branchtip(repo, heads) branches[bn] = branchtip(repo, heads)
l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()] l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()]
l.sort() l.sort()
@@ -466,13 +489,16 @@ def verify_heads(ui,repo,cache,force,branchesmap):
sha1=get_git_sha1(sanitized_name) sha1=get_git_sha1(sanitized_name)
c=cache.get(sanitized_name) c=cache.get(sanitized_name)
if sha1!=c: if sha1!=c:
sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:' sys.stderr.write(
'\n%s (repo) != %s (cache)\n' % (b,sha1,c)) 'Error: Branch [%s] modified outside hg-fast-export:'
'\n%s (repo) != %s (cache)\n'
% (b.decode('utf8'), sha1.decode('utf8'), c.decode('utf8'))
)
if not force: return False if not force: return False
# verify that branch has exactly one head # verify that branch has exactly one head
t={} t={}
for h in repo.filtered('visible').heads(): for h in repo.filtered(b'visible').heads():
(_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h) (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h)
if t.get(branch,False): if t.get(branch,False):
sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' % sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' %
@@ -519,15 +545,15 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
max=tip max=tip
for rev in range(0,max): for rev in range(0,max):
(revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors) (revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors)
if repo[revnode].hidden(): if repo[revnode].hidden():
continue continue
mapping_cache[revnode.encode('hex_codec')] = str(rev) mapping_cache[hexlify(revnode)] = b"%d" % rev
if submodule_mappings: if submodule_mappings:
# Make sure that all submodules are registered in the submodule-mappings file # Make sure that all submodules are registered in the submodule-mappings file
for rev in range(0,max): for rev in range(0,max):
ctx=revsymbol(repo,str(rev)) ctx=revsymbol(repo,b"%d" % rev)
if ctx.hidden(): if ctx.hidden():
continue continue
if ctx.substate: if ctx.substate:

View File

@@ -28,25 +28,20 @@ SFX_STATE="state"
GFI_OPTS="" GFI_OPTS=""
if [ -z "${PYTHON}" ]; then if [ -z "${PYTHON}" ]; then
# $PYTHON is not set, so we try to find a working python 2.7 to # $PYTHON is not set, so we try to find a working python with mercurial:
# use. PEP 394 tells us to use 'python2', otherwise try plain for python_cmd in python2 python python3; do
# 'python'. if command -v $python_cmd > /dev/null; then
if command -v python2 > /dev/null; then $python_cmd -c 'import mercurial' 2> /dev/null
PYTHON="python2" if [ $? -eq 0 ]; then
elif command -v python > /dev/null; then PYTHON=$python_cmd
PYTHON="python" break
else fi
echo "Could not find any python interpreter, please use the 'PYTHON'" \ fi
"environment variable to specify the interpreter to use." done
exit 1
fi
fi fi
if [ -z "${PYTHON}" ]; then
# Check that the python specified by the user or autodetected above is echo "Could not find a python interpreter with the mercurial module available. " \
# >= 2.7 and < 3. "Please use the 'PYTHON' environment variable to specify the interpreter to use."
if ! ${PYTHON} -c 'import sys; v=sys.version_info; exit(0 if v.major == 2 and v.minor >= 7 else 1)' > /dev/null 2>&1 ; then
echo "${PYTHON} is not a working python 2.7 interpreter, please use the" \
"'PYTHON' environment variable to specify the interpreter to use."
exit 1 exit 1
fi fi

View File

@@ -7,6 +7,7 @@ from mercurial import node
from hg2git import setup_repo,load_cache,get_changeset,get_git_sha1 from hg2git import setup_repo,load_cache,get_changeset,get_git_sha1
from optparse import OptionParser from optparse import OptionParser
import sys import sys
from binascii import hexlify
def heads(ui,repo,start=None,stop=None,max=None): def heads(ui,repo,start=None,stop=None,max=None):
# this is copied from mercurial/revlog.py and differs only in # this is copied from mercurial/revlog.py and differs only in
@@ -24,7 +25,7 @@ def heads(ui,repo,start=None,stop=None,max=None):
heads = {startrev: 1} heads = {startrev: 1}
parentrevs = repo.changelog.parentrevs parentrevs = repo.changelog.parentrevs
for r in xrange(startrev + 1, max): for r in range(startrev + 1, max):
for p in parentrevs(r): for p in parentrevs(r):
if p in reachable: if p in reachable:
if r not in stoprevs: if r not in stoprevs:
@@ -33,7 +34,7 @@ def heads(ui,repo,start=None,stop=None,max=None):
if p in heads and p not in stoprevs: if p in heads and p not in stoprevs:
del heads[p] del heads[p]
return [(repo.changelog.node(r),str(r)) for r in heads] return [(repo.changelog.node(r), b"%d" % r) for r in heads]
def get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,max): def get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,max):
h=heads(ui,repo,max=max) h=heads(ui,repo,max=max)
@@ -44,11 +45,11 @@ def get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,max):
_,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev) _,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev)
del stale[branch] del stale[branch]
git_sha1=get_git_sha1(branch) git_sha1=get_git_sha1(branch)
cache_sha1=marks_cache.get(str(int(rev)+1)) cache_sha1=marks_cache.get(b"%d" % (int(rev)+1))
if git_sha1!=None and git_sha1==cache_sha1: if git_sha1!=None and git_sha1==cache_sha1:
unchanged.append([branch,cache_sha1,rev,desc.split('\n')[0],user]) unchanged.append([branch,cache_sha1,rev,desc.split(b'\n')[0],user])
else: else:
changed.append([branch,cache_sha1,rev,desc.split('\n')[0],user]) changed.append([branch,cache_sha1,rev,desc.split(b'\n')[0],user])
changed.sort() changed.sort()
unchanged.sort() unchanged.sort()
return stale,changed,unchanged return stale,changed,unchanged
@@ -57,20 +58,20 @@ def get_tags(ui,repo,marks_cache,mapping_cache,max):
l=repo.tagslist() l=repo.tagslist()
good,bad=[],[] good,bad=[],[]
for tag,node in l: for tag,node in l:
if tag=='tip': continue if tag==b'tip': continue
rev=int(mapping_cache[node.encode('hex_codec')]) rev=int(mapping_cache[hexlify(node)])
cache_sha1=marks_cache.get(str(int(rev)+1)) cache_sha1=marks_cache.get(b"%d" % (int(rev)+1))
_,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev) _,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev)
if int(rev)>int(max): if int(rev)>int(max):
bad.append([tag,branch,cache_sha1,rev,desc.split('\n')[0],user]) bad.append([tag,branch,cache_sha1,rev,desc.split(b'\n')[0],user])
else: else:
good.append([tag,branch,cache_sha1,rev,desc.split('\n')[0],user]) good.append([tag,branch,cache_sha1,rev,desc.split(b'\n')[0],user])
good.sort() good.sort()
bad.sort() bad.sort()
return good,bad return good,bad
def mangle_mark(mark): def mangle_mark(mark):
return str(int(mark)-1) return b"%d" % (int(mark)-1)
if __name__=='__main__': if __name__=='__main__':
def bail(parser,opt): def bail(parser,opt):
@@ -107,7 +108,7 @@ if __name__=='__main__':
state_cache=load_cache(options.statusfile) state_cache=load_cache(options.statusfile)
mapping_cache = load_cache(options.mappingfile) mapping_cache = load_cache(options.mappingfile)
l=int(state_cache.get('tip',options.revision)) l=int(state_cache.get(b'tip',options.revision))
if options.revision+1>l: if options.revision+1>l:
sys.stderr.write('Revision is beyond last revision imported: %d>%d\n' % (options.revision,l)) sys.stderr.write('Revision is beyond last revision imported: %d>%d\n' % (options.revision,l))
sys.exit(1) sys.exit(1)
@@ -117,19 +118,39 @@ if __name__=='__main__':
stale,changed,unchanged=get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,options.revision+1) stale,changed,unchanged=get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,options.revision+1)
good,bad=get_tags(ui,repo,marks_cache,mapping_cache,options.revision+1) good,bad=get_tags(ui,repo,marks_cache,mapping_cache,options.revision+1)
print "Possibly stale branches:" print("Possibly stale branches:")
map(lambda b: sys.stdout.write('\t%s\n' % b),stale.keys()) for b in stale:
sys.stdout.write('\t%s\n' % b.decode('utf8'))
print "Possibly stale tags:" print("Possibly stale tags:")
map(lambda b: sys.stdout.write('\t%s on %s (r%s)\n' % (b[0],b[1],b[3])),bad) for b in bad:
sys.stdout.write(
'\t%s on %s (r%s)\n'
% (b[0].decode('utf8'), b[1].decode('utf8'), b[3].decode('utf8'))
)
print "Unchanged branches:" print("Unchanged branches:")
map(lambda b: sys.stdout.write('\t%s (r%s)\n' % (b[0],b[2])),unchanged) for b in unchanged:
sys.stdout.write('\t%s (r%s)\n' % (b[0].decode('utf8'),b[2].decode('utf8')))
print "Unchanged tags:" print("Unchanged tags:")
map(lambda b: sys.stdout.write('\t%s on %s (r%s)\n' % (b[0],b[1],b[3])),good) for b in good:
sys.stdout.write(
'\t%s on %s (r%s)\n'
% (b[0].decode('utf8'), b[1].decode('utf8'), b[3].decode('utf8'))
)
print "Reset branches in '%s' to:" % options.headsfile print("Reset branches in '%s' to:" % options.headsfile)
map(lambda b: sys.stdout.write('\t:%s %s\n\t\t(r%s: %s: %s)\n' % (b[0],b[1],b[2],b[4],b[3])),changed) for b in changed:
sys.stdout.write(
'\t:%s %s\n\t\t(r%s: %s: %s)\n'
% (
b[0].decode('utf8'),
b[1].decode('utf8'),
b[2].decode('utf8'),
b[4].decode('utf8'),
b[3].decode('utf8'),
)
)
print "Reset ':tip' in '%s' to '%d'" % (options.statusfile,options.revision) print("Reset ':tip' in '%s' to '%d'" % (options.statusfile,options.revision))

View File

@@ -11,7 +11,24 @@ SFX_MAPPING="mapping"
SFX_HEADS="heads" SFX_HEADS="heads"
SFX_STATE="state" SFX_STATE="state"
QUIET="" QUIET=""
PYTHON=${PYTHON:-python}
if [ -z "${PYTHON}" ]; then
# $PYTHON is not set, so we try to find a working python with mercurial:
for python_cmd in python2 python python3; do
if command -v $python_cmd > /dev/null; then
$python_cmd -c 'import mercurial' 2> /dev/null
if [ $? -eq 0 ]; then
PYTHON=$python_cmd
break
fi
fi
done
fi
if [ -z "${PYTHON}" ]; then
echo "Could not find a python interpreter with the mercurial module available. " \
"Please use the 'PYTHON'environment variable to specify the interpreter to use."
exit 1
fi
USAGE="[-r <repo>] -R <rev>" USAGE="[-r <repo>] -R <rev>"
LONG_USAGE="Print SHA1s of latest changes per branch up to <rev> useful LONG_USAGE="Print SHA1s of latest changes per branch up to <rev> useful

View File

@@ -12,14 +12,21 @@ import os
import sys import sys
import subprocess import subprocess
PY2 = sys.version_info.major < 3
if PY2:
str = unicode
fsencode = lambda s: s.encode(sys.getfilesystemencoding())
else:
from os import fsencode
# default git branch name # default git branch name
cfg_master='master' cfg_master=b'master'
# default origin name # default origin name
origin_name='' origin_name=b''
# silly regex to see if user field has email address # silly regex to see if user field has email address
user_re=re.compile('([^<]+) (<[^>]*>)$') user_re=re.compile(b'([^<]+) (<[^>]*>)$')
# silly regex to clean out user names # silly regex to clean out user names
user_clean_re=re.compile('^["]([^"]+)["]$') user_clean_re=re.compile(b'^["]([^"]+)["]$')
def set_default_branch(name): def set_default_branch(name):
global cfg_master global cfg_master
@@ -34,26 +41,26 @@ def setup_repo(url):
myui=ui.ui(interactive=False) myui=ui.ui(interactive=False)
except TypeError: except TypeError:
myui=ui.ui() myui=ui.ui()
myui.setconfig('ui', 'interactive', 'off') myui.setconfig(b'ui', b'interactive', b'off')
# Avoids a warning when the repository has obsolete markers # Avoids a warning when the repository has obsolete markers
myui.setconfig('experimental', 'evolution.createmarkers', True) myui.setconfig(b'experimental', b'evolution.createmarkers', True)
return myui,hg.repository(myui,url).unfiltered() return myui,hg.repository(myui, fsencode(url)).unfiltered()
def fixup_user(user,authors): def fixup_user(user,authors):
user=user.strip("\"") user=user.strip(b"\"")
if authors!=None: if authors!=None:
# if we have an authors table, try to get mapping # if we have an authors table, try to get mapping
# by defaulting to the current value of 'user' # by defaulting to the current value of 'user'
user=authors.get(user,user) user=authors.get(user,user)
name,mail,m='','',user_re.match(user) name,mail,m=b'',b'',user_re.match(user)
if m==None: if m==None:
# if we don't have 'Name <mail>' syntax, extract name # if we don't have 'Name <mail>' syntax, extract name
# and mail from hg helpers. this seems to work pretty well. # and mail from hg helpers. this seems to work pretty well.
# if email doesn't contain @, replace it with devnull@localhost # if email doesn't contain @, replace it with devnull@localhost
name=templatefilters.person(user) name=templatefilters.person(user)
mail='<%s>' % templatefilters.email(user) mail=b'<%s>' % templatefilters.email(user)
if '@' not in mail: if b'@' not in mail:
mail = '<devnull@localhost>' mail = b'<devnull@localhost>'
else: else:
# if we have 'Name <mail>' syntax, everything is fine :) # if we have 'Name <mail>' syntax, everything is fine :)
name,mail=m.group(1),m.group(2) name,mail=m.group(1),m.group(2)
@@ -62,15 +69,15 @@ def fixup_user(user,authors):
m2=user_clean_re.match(name) m2=user_clean_re.match(name)
if m2!=None: if m2!=None:
name=m2.group(1) name=m2.group(1)
return '%s %s' % (name,mail) return b'%s %s' % (name,mail)
def get_branch(name): def get_branch(name):
# 'HEAD' is the result of a bug in mutt's cvs->hg conversion, # 'HEAD' is the result of a bug in mutt's cvs->hg conversion,
# other CVS imports may need it, too # other CVS imports may need it, too
if name=='HEAD' or name=='default' or name=='': if name==b'HEAD' or name==b'default' or name==b'':
name=cfg_master name=cfg_master
if origin_name: if origin_name:
return origin_name + '/' + name return origin_name + b'/' + name
return name return name
def get_changeset(ui,repo,revision,authors={},encoding=''): def get_changeset(ui,repo,revision,authors={},encoding=''):
@@ -79,16 +86,16 @@ def get_changeset(ui,repo,revision,authors={},encoding=''):
# how it fails # how it fails
try: try:
node=repo.lookup(revision) node=repo.lookup(revision)
except hgerror.ProgrammingError: except (TypeError, hgerror.ProgrammingError):
node=binnode(revsymbol(repo,str(revision))) # We were given a numeric rev node=binnode(revsymbol(repo, b"%d" % revision)) # We were given a numeric rev
except hgerror.RepoLookupError: except hgerror.RepoLookupError:
node=revision # We got a raw hash node=revision # We got a raw hash
(manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node) (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node)
if encoding: if encoding:
user=user.decode(encoding).encode('utf8') user=user.decode(encoding).encode('utf8')
desc=desc.decode(encoding).encode('utf8') desc=desc.decode(encoding).encode('utf8')
tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60)) tz=b"%+03d%02d" % (-timezone // 3600, ((-timezone % 3600) // 60))
branch=get_branch(extra.get('branch','master')) branch=get_branch(extra.get(b'branch', b'master'))
return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra) return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra)
def mangle_key(key): def mangle_key(key):
@@ -98,28 +105,33 @@ def load_cache(filename,get_key=mangle_key):
cache={} cache={}
if not os.path.exists(filename): if not os.path.exists(filename):
return cache return cache
f=open(filename,'r') f=open(filename,'rb')
l=0 l=0
for line in f.readlines(): for line in f.readlines():
l+=1 l+=1
fields=line.split(' ') fields=line.split(b' ')
if fields==None or not len(fields)==2 or fields[0][0]!=':': if fields==None or not len(fields)==2 or fields[0][0:1]!=b':':
sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l)) sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
continue continue
# put key:value in cache, key without ^: # put key:value in cache, key without ^:
cache[get_key(fields[0][1:])]=fields[1].split('\n')[0] cache[get_key(fields[0][1:])]=fields[1].split(b'\n')[0]
f.close() f.close()
return cache return cache
def save_cache(filename,cache): def save_cache(filename,cache):
f=open(filename,'w+') f=open(filename,'wb')
map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys()) for key, value in cache.items():
if not isinstance(key, bytes):
key = str(key).encode('utf8')
if not isinstance(value, bytes):
value = str(value).encode('utf8')
f.write(b':%s %s\n' % (key, value))
f.close() f.close()
def get_git_sha1(name,type='heads'): def get_git_sha1(name,type='heads'):
try: try:
# use git-rev-parse to support packed refs # use git-rev-parse to support packed refs
ref="refs/%s/%s" % (type,name) ref="refs/%s/%s" % (type,name.decode('utf8'))
l=subprocess.check_output(["git", "rev-parse", "--verify", "--quiet", ref]) l=subprocess.check_output(["git", "rev-parse", "--verify", "--quiet", ref])
if l == None or len(l) == 0: if l == None or len(l) == 0:
return None return None

View File

@@ -15,9 +15,11 @@ class Filter:
raise ValueError("Unknown args: " + ','.join(args)) raise ValueError("Unknown args: " + ','.join(args))
def commit_message_filter(self, commit_data): def commit_message_filter(self, commit_data):
if not (self.skip_master and commit_data['branch'] == 'master'): if not (self.skip_master and commit_data['branch'] == b'master'):
if self.start: if self.start:
sep = ': ' if self.sameline else '\n' sep = b': ' if self.sameline else b'\n'
commit_data['desc'] = commit_data['branch'] + sep + commit_data['desc'] commit_data['desc'] = commit_data['branch'] + sep + commit_data['desc']
if self.end: if self.end:
commit_data['desc'] = commit_data['desc'] + '\n' + commit_data['branch'] commit_data['desc'] = (
commit_data['desc'] + b'\n' + commit_data['branch']
)

View File

@@ -8,4 +8,4 @@ class Filter():
def file_data_filter(self,file_data): def file_data_filter(self,file_data):
file_ctx = file_data['file_ctx'] file_ctx = file_data['file_ctx']
if not file_ctx.isbinary(): if not file_ctx.isbinary():
file_data['data'] = file_data['data'].replace('\r\n', '\n') file_data['data'] = file_data['data'].replace(b'\r\n', b'\n')

View File

@@ -7,9 +7,11 @@ def build_filter(args):
class Filter: class Filter:
def __init__(self, args): def __init__(self, args):
if not isinstance(args, bytes):
args = args.encode('utf8')
self.prefix = args self.prefix = args
def commit_message_filter(self, commit_data): def commit_message_filter(self, commit_data):
for match in re.findall('#[1-9][0-9]+', commit_data['desc']): for match in re.findall(b'#[1-9][0-9]+', commit_data['desc']):
commit_data['desc'] = commit_data['desc'].replace( commit_data['desc'] = commit_data['desc'].replace(
match, '#%s%s' % (self.prefix, match[1:])) match, b'#%s%s' % (self.prefix, match[1:]))

View File

@@ -4,13 +4,13 @@ def build_filter(args):
class Filter: class Filter:
def __init__(self, args): def __init__(self, args):
if args == '': if args == '':
message = '<empty commit message>' message = b'<empty commit message>'
else: else:
message = args message = args.encode('utf8')
self.message = message self.message = message
def commit_message_filter(self,commit_data): def commit_message_filter(self,commit_data):
# Only write the commit message if the recorded commit # Only write the commit message if the recorded commit
# message is null. # message is null.
if commit_data['desc'] == '\x00': if commit_data['desc'] == b'\x00':
commit_data['desc'] = self.message commit_data['desc'] = self.message