From b961f146dfdad3dcbe36ba16d8dc9c05f10a9d65 Mon Sep 17 00:00:00 2001 From: chrisjbillington Date: Mon, 10 Feb 2020 21:39:13 -0500 Subject: [PATCH] Support Python 3 Port hg-fast-import to Python 2/3 polyglot code. Since mercurial accepts and returns bytestrings for all repository data, the approach I've taken here is to use bytestrings throughout the hg-fast-import code. All strings pertaining to repository data are bytestrings. This means the code is using the same string datatype for this data on Python 3 as it did (and still does) on Python 2. Repository data coming from subprocess calls to git, or read from files, is also left as the bytestrings either returned from subprocess.check_output or as read from the file in 'rb' mode. Regexes and string literals that are used with repository data have all had a b'' prefix added. When repository data is used in error/warning messages, it is decoded with the UTF8 codec for printing. With this patch, hg-fast-export.py writes binary output to sys.stdout.buffer on Python 3 - on Python 2 this doesn't exist and it still uses sys.stdout. The only strings that are left as "native" strings and not coerced to bytestrings are filepaths passed in on the command line, and dictionary keys for internal data structures used by hg-fast-import.py, that do not originate in repository data. Mapping files are read in 'rb' mode, and thus bytestrings are read from them. When an encoding is given, their contents are decoded with that encoding, but then immediately encoded again with UTF8 and they are returned as the resulting bytestrings Other necessary changes were: - indexing byestrings with a single index returns an integer on Python. These indexing operations have been replaced with a one-element slice: x[0] -> x[0:1] or x[-1] -> [-1:] so at to return a bytestring. - raw_hash.encode('hex_codec') replaced with binascii.hexlify(raw_hash) - str(integer) -> b'%d' % integer - 'string_escape' codec replaced with 'unicode_escape' (which was backported to python 2.7). Strings decoded with this codec were then immediately re-encoded with UTF8. - Calls to map() intended to execute their contents immediately were unwrapped or converted to list comprehensions, since map() is an iterator and does not execute until iterated over. hg-fast-export.sh has been modified to not require Python 2. Instead, if PYTHON has not been defined, it checks python2, python, then python3, and uses the first one that exists and can import the mercurial module. --- README.md | 7 +- hg-fast-export.py | 218 +++++++++++--------- hg-fast-export.sh | 31 ++- hg-reset.py | 67 +++--- hg-reset.sh | 19 +- hg2git.py | 64 +++--- plugins/branch_name_in_commit/__init__.py | 8 +- plugins/dos2unix/__init__.py | 2 +- plugins/issue_prefix/__init__.py | 6 +- plugins/overwrite_null_messages/__init__.py | 6 +- 10 files changed, 252 insertions(+), 176 deletions(-) diff --git a/README.md b/README.md index 04e3774..1d73f59 100644 --- a/README.md +++ b/README.md @@ -29,9 +29,10 @@ first time. System Requirements ------------------- -This project depends on Python 2.7 and the Mercurial >= 4.6 -package. If Python is not installed, install it before proceeding. The -Mercurial package can be installed with `pip install mercurial`. +This project depends on Python 2.7 or 3.5+, and the Mercurial >= 4.6 +package (>= 5.2, if Python 3.5+). If Python is not installed, install +it before proceeding. TheMercurial package can be installed with +`pip install mercurial`. On windows the bash that comes with "Git for Windows" is known to work well. diff --git a/hg-fast-export.py b/hg-fast-export.py index c005836..76d4679 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -11,9 +11,13 @@ from optparse import OptionParser import re import sys import os +from binascii import hexlify import pluginloader +PY2 = sys.version_info.major == 2 +if PY2: + str = unicode -if sys.platform == "win32": +if PY2 and sys.platform == "win32": # On Windows, sys.stdout is initially opened in text mode, which means that # when a LF (\n) character is written to sys.stdout, it will be converted # into CRLF (\r\n). That makes git blow up, so use this platform-specific @@ -22,7 +26,7 @@ if sys.platform == "win32": msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) # silly regex to catch Signed-off-by lines in log message -sob_re=re.compile('^Signed-[Oo]ff-[Bb]y: (.+)$') +sob_re=re.compile(b'^Signed-[Oo]ff-[Bb]y: (.+)$') # insert 'checkpoint' command after this many commits or none at all if 0 cfg_checkpoint_count=0 # write some progress message every this many file contents written @@ -35,30 +39,33 @@ submodule_mappings=None # author/branch/tag names. auto_sanitize = None +stdout_buffer = sys.stdout if PY2 else sys.stdout.buffer + def gitmode(flags): - return 'l' in flags and '120000' or 'x' in flags and '100755' or '100644' + return b'l' in flags and b'120000' or b'x' in flags and b'100755' or b'100644' -def wr_no_nl(msg=''): +def wr_no_nl(msg=b''): + assert isinstance(msg, bytes) if msg: - sys.stdout.write(msg) + stdout_buffer.write(msg) -def wr(msg=''): +def wr(msg=b''): wr_no_nl(msg) - sys.stdout.write('\n') + stdout_buffer.write(b'\n') #map(lambda x: sys.stderr.write('\t[%s]\n' % x),msg.split('\n')) def checkpoint(count): count=count+1 if cfg_checkpoint_count>0 and count%cfg_checkpoint_count==0: sys.stderr.write("Checkpoint after %d commits\n" % count) - wr('checkpoint') + wr(b'checkpoint') wr() return count def revnum_to_revref(rev, old_marks): """Convert an hg revnum to a git-fast-import rev reference (an SHA1 or a mark)""" - return old_marks.get(rev) or ':%d' % (rev+1) + return old_marks.get(rev) or b':%d' % (rev+1) def file_mismatch(f1,f2): """See if two revisions of a file are not equal.""" @@ -87,7 +94,7 @@ def get_filechanges(repo,revision,parents,mleft): l,c,r=[],[],[] for p in parents: if p<0: continue - mright=revsymbol(repo,str(p)).manifest() + mright=revsymbol(repo,b"%d" %p).manifest() l,c,r=split_dict(mleft,mright,l,c,r) l.sort() c.sort() @@ -110,7 +117,7 @@ def get_author(logmessage,committer,authors): "Signed-off-by: foo" and thus matching our detection regex. Prevent that.""" - loglines=logmessage.split('\n') + loglines=logmessage.split(b'\n') i=len(loglines) # from tail walk to top skipping empty lines while i>=0: @@ -138,23 +145,24 @@ def remove_gitmodules(ctx): # be to only remove the submodules of the first parent. for parent_ctx in ctx.parents(): for submodule in parent_ctx.substate.keys(): - wr('D %s' % submodule) - wr('D .gitmodules') + wr(b'D %s' % submodule) + wr(b'D .gitmodules') def refresh_git_submodule(name,subrepo_info): - wr('M 160000 %s %s' % (subrepo_info[1],name)) - sys.stderr.write("Adding/updating submodule %s, revision %s\n" - % (name,subrepo_info[1])) - return '[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name, - subrepo_info[0]) + wr(b'M 160000 %s %s' % (subrepo_info[1],name)) + sys.stderr.write( + "Adding/updating submodule %s, revision %s\n" + % (name.decode('utf8'), subrepo_info[1].decode('utf8')) + ) + return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name, name, subrepo_info[0]) def refresh_hg_submodule(name,subrepo_info): - gitRepoLocation=submodule_mappings[name] + "/.git" + gitRepoLocation=submodule_mappings[name] + b"/.git" # Populate the cache to map mercurial revision to git revision if not name in subrepo_cache: - subrepo_cache[name]=(load_cache(gitRepoLocation+"/hg2git-mapping"), - load_cache(gitRepoLocation+"/hg2git-marks", + subrepo_cache[name]=(load_cache(gitRepoLocation+b"/hg2git-mapping"), + load_cache(gitRepoLocation+b"/hg2git-marks", lambda s: int(s)-1)) (mapping_cache,marks_cache)=subrepo_cache[name] @@ -162,30 +170,38 @@ def refresh_hg_submodule(name,subrepo_info): if subrepo_hash in mapping_cache: revnum=mapping_cache[subrepo_hash] gitSha=marks_cache[int(revnum)] - wr('M 160000 %s %s' % (gitSha,name)) - sys.stderr.write("Adding/updating submodule %s, revision %s->%s\n" - % (name,subrepo_hash,gitSha)) - return '[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name, + wr(b'M 160000 %s %s' % (gitSha,name)) + sys.stderr.write( + "Adding/updating submodule %s, revision %s->%s\n" + % (name.decode('utf8'), subrepo_hash.decode('utf8'), gitSha.decode('utf8')) + ) + return b'[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name, submodule_mappings[name]) else: - sys.stderr.write("Warning: Could not find hg revision %s for %s in git %s\n" % - (subrepo_hash,name,gitRepoLocation)) - return '' + sys.stderr.write( + "Warning: Could not find hg revision %s for %s in git %s\n" + % ( + subrepo_hash.decode('utf8'), + name.decode('utf8'), + gitRepoLocation.decode('utf8'), + ) + ) + return b'' def refresh_gitmodules(ctx): """Updates list of ctx submodules according to .hgsubstate file""" remove_gitmodules(ctx) - gitmodules="" + gitmodules=b"" # Create the .gitmodules file and all submodules for name,subrepo_info in ctx.substate.items(): - if subrepo_info[2]=='git': + if subrepo_info[2]==b'git': gitmodules+=refresh_git_submodule(name,subrepo_info) elif submodule_mappings and name in submodule_mappings: gitmodules+=refresh_hg_submodule(name,subrepo_info) if len(gitmodules): - wr('M 100644 inline .gitmodules') - wr('data %d' % (len(gitmodules)+1)) + wr(b'M 100644 inline .gitmodules') + wr(b'data %d' % (len(gitmodules)+1)) wr(gitmodules) def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): @@ -193,19 +209,21 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): max=len(files) is_submodules_refreshed=False for file in files: - if not is_submodules_refreshed and (file=='.hgsub' or file=='.hgsubstate'): + if not is_submodules_refreshed and (file==b'.hgsub' or file==b'.hgsubstate'): is_submodules_refreshed=True refresh_gitmodules(ctx) # Skip .hgtags files. They only get us in trouble. - if not hgtags and file == ".hgtags": - sys.stderr.write('Skip %s\n' % (file)) + if not hgtags and file == b".hgtags": + sys.stderr.write('Skip %s\n' % file.decode('utf8')) continue if encoding: filename=file.decode(encoding).encode('utf8') else: filename=file - if '.git' in filename.split(os.path.sep): - sys.stderr.write('Ignoring file %s which cannot be tracked by git\n' % filename) + if b'.git' in filename.split(os.path.sep.encode()): + sys.stderr.write( + 'Ignoring file %s which cannot be tracked by git\n' % filename.decode('utf8') + ) continue file_ctx=ctx.filectx(file) d=file_ctx.data() @@ -218,9 +236,9 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): filename=file_data['filename'] file_ctx=file_data['file_ctx'] - wr('M %s inline %s' % (gitmode(manifest.flags(file)), + wr(b'M %s inline %s' % (gitmode(manifest.flags(file)), strip_leading_slash(filename))) - wr('data %d' % len(d)) # had some trouble with size() + wr(b'data %d' % len(d)) # had some trouble with size() wr(d) count+=1 if count%cfg_export_boundary==0: @@ -246,25 +264,28 @@ def sanitize_name(name,what="branch", mapping={}): def dot(name): if not name: return name - if name[0] == '.': return '_'+name[1:] + if name[0:1] == b'.': return b'_'+name[1:] return name if not auto_sanitize: return mapping.get(name,name) n=mapping.get(name,name) - p=re.compile('([[ ~^:?\\\\*]|\.\.)') - n=p.sub('_', n) - if n[-1] in ('/', '.'): n=n[:-1]+'_' - n='/'.join(map(dot,n.split('/'))) - p=re.compile('_+') - n=p.sub('_', n) + p=re.compile(b'([[ ~^:?\\\\*]|\.\.)') + n=p.sub(b'_', n) + if n[-1:] in (b'/', b'.'): n=n[:-1]+b'_' + n=b'/'.join([dot(s) for s in n.split(b'/')]) + p=re.compile(b'_+') + n=p.sub(b'_', n) if n!=name: - sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n)) + sys.stderr.write( + 'Warning: sanitized %s [%s] to [%s]\n' + % (what, name.decode('utf8'), n.decode('utf8')) + ) return n def strip_leading_slash(filename): - if filename[0] == '/': + if filename[0:1] == b'/': return filename[1:] return filename @@ -272,7 +293,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='', plugins={}): def get_branchname(name): - if brmap.has_key(name): + if name in brmap: return brmap[name] n=sanitize_name(name, "branch", branchesmap) brmap[name]=n @@ -297,18 +318,18 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, desc = commit_data['desc'] if len(parents)==0 and revision != 0: - wr('reset refs/heads/%s' % branch) + wr(b'reset refs/heads/%s' % branch) - wr('commit refs/heads/%s' % branch) - wr('mark :%d' % (revision+1)) + wr(b'commit refs/heads/%s' % branch) + wr(b'mark :%d' % (revision+1)) if sob: - wr('author %s %d %s' % (author,time,timezone)) - wr('committer %s %d %s' % (user,time,timezone)) - wr('data %d' % (len(desc)+1)) # wtf? + wr(b'author %s %d %s' % (author,time,timezone)) + wr(b'committer %s %d %s' % (user,time,timezone)) + wr(b'data %d' % (len(desc)+1)) # wtf? wr(desc) wr() - ctx=revsymbol(repo,str(revision)) + ctx=revsymbol(repo, b"%d" % revision) man=ctx.manifest() added,changed,removed,type=[],[],[],'' @@ -318,7 +339,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, added.sort() type='full' else: - wr('from %s' % revnum_to_revref(parents[0], old_marks)) + wr(b'from %s' % revnum_to_revref(parents[0], old_marks)) if len(parents) == 1: # later non-merge revision: feed in changed manifest # if we have exactly one parent, just take the changes from the @@ -327,7 +348,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, added,changed,removed=f.added,f.modified,f.removed type='simple delta' else: # a merge with two parents - wr('merge %s' % revnum_to_revref(parents[1], old_marks)) + wr(b'merge %s' % revnum_to_revref(parents[1], old_marks)) # later merge revision: feed in changed manifest # for many files comparing checksums is expensive so only do it for # merges where we really need it due to hg's revlog logic @@ -335,15 +356,15 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, type='thorough delta' sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' % - (branch,type,revision+1,max,len(added),len(changed),len(removed))) + (branch.decode('utf8'),type,revision+1,max,len(added),len(changed),len(removed))) for filename in removed: if fn_encoding: filename=filename.decode(fn_encoding).encode('utf8') filename=strip_leading_slash(filename) - if filename=='.hgsub': + if filename==b'.hgsub': remove_gitmodules(ctx) - wr('D %s' % filename) + wr(b'D %s' % filename) export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins) export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins) @@ -358,52 +379,54 @@ def export_note(ui,repo,revision,count,authors,encoding,is_first): parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0] - wr('commit refs/notes/hg') - wr('committer %s %d %s' % (user,time,timezone)) - wr('data 0') + wr(b'commit refs/notes/hg') + wr(b'committer %s %d %s' % (user,time,timezone)) + wr(b'data 0') if is_first: - wr('from refs/notes/hg^0') - wr('N inline :%d' % (revision+1)) - hg_hash=revsymbol(repo,str(revision)).hex() - wr('data %d' % (len(hg_hash))) + wr(b'from refs/notes/hg^0') + wr(b'N inline :%d' % (revision+1)) + hg_hash=revsymbol(repo,b"%d" % revision).hex() + wr(b'data %d' % (len(hg_hash))) wr_no_nl(hg_hash) wr() return checkpoint(count) - wr('data %d' % (len(desc)+1)) # wtf? - wr(desc) - wr() - def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap): l=repo.tagslist() for tag,node in l: # Remap the branch name tag=sanitize_name(tag,"tag",tagsmap) # ignore latest revision - if tag=='tip': continue + if tag==b'tip': continue # ignore tags to nodes that are missing (ie, 'in the future') - if node.encode('hex_codec') not in mapping_cache: - sys.stderr.write('Tag %s refers to unseen node %s\n' % (tag, node.encode('hex_codec'))) + if hexlify(node) not in mapping_cache: + sys.stderr.write( + 'Tag %s refers to unseen node %s\n' + % (tag.decode('utf8'), hexlify(node).decode('utf8')) + ) continue - rev=int(mapping_cache[node.encode('hex_codec')]) + rev=int(mapping_cache[hexlify(node)]) ref=revnum_to_revref(rev, old_marks) if ref==None: sys.stderr.write('Failed to find reference for creating tag' ' %s at r%d\n' % (tag,rev)) continue - sys.stderr.write('Exporting tag [%s] at [hg r%d] [git %s]\n' % (tag,rev,ref)) - wr('reset refs/tags/%s' % tag) - wr('from %s' % ref) + sys.stderr.write( + 'Exporting tag [%s] at [hg r%d] [git %s]\n' + % (tag.decode('utf8'), rev, ref.decode('utf8')) + ) + wr(b'reset refs/tags/%s' % tag) + wr(b'from %s' % ref) wr() count=checkpoint(count) return count def load_mapping(name, filename, mapping_is_raw): - raw_regexp=re.compile('^([^=]+)[ ]*=[ ]*(.+)$') - string_regexp='"(((\\.)|(\\")|[^"])*)"' - quoted_regexp=re.compile('^'+string_regexp+'[ ]*=[ ]*'+string_regexp+'$') + raw_regexp=re.compile(b'^([^=]+)[ ]*=[ ]*(.+)$') + string_regexp=b'"(((\\.)|(\\")|[^"])*)"' + quoted_regexp=re.compile(b'^'+string_regexp+b'[ ]*=[ ]*'+string_regexp+b'$') def parse_raw_line(line): m=raw_regexp.match(line) @@ -415,22 +438,22 @@ def load_mapping(name, filename, mapping_is_raw): m=quoted_regexp.match(line) if m==None: return None - return (m.group(1).decode('string_escape'), - m.group(5).decode('string_escape')) + return (m.group(1).decode('unicode_escape').encode('utf8'), + m.group(5).decode('unicode_escape').encode('utf8')) cache={} if not os.path.exists(filename): sys.stderr.write('Could not open mapping file [%s]\n' % (filename)) return cache - f=open(filename,'r') + f=open(filename,'rb') l=0 a=0 for line in f.readlines(): l+=1 line=line.strip() - if l==1 and line[0]=='#' and line=='# quoted-escaped-strings': + if l==1 and line[0:1]==b'#' and line==b'# quoted-escaped-strings': continue - elif line=='' or line[0]=='#': + elif line==b'' or line[0:1]==b'#': continue m=parse_raw_line(line) if mapping_is_raw else parse_quoted_line(line) if m==None: @@ -454,7 +477,7 @@ def branchtip(repo, heads): def verify_heads(ui,repo,cache,force,branchesmap): branches={} - for bn, heads in repo.branchmap().iteritems(): + for bn, heads in repo.branchmap().items(): branches[bn] = branchtip(repo, heads) l=[(-repo.changelog.rev(n), n, t) for t, n in branches.items()] l.sort() @@ -466,13 +489,16 @@ def verify_heads(ui,repo,cache,force,branchesmap): sha1=get_git_sha1(sanitized_name) c=cache.get(sanitized_name) if sha1!=c: - sys.stderr.write('Error: Branch [%s] modified outside hg-fast-export:' - '\n%s (repo) != %s (cache)\n' % (b,sha1,c)) + sys.stderr.write( + 'Error: Branch [%s] modified outside hg-fast-export:' + '\n%s (repo) != %s (cache)\n' + % (b.decode('utf8'), sha1.decode('utf8'), c.decode('utf8')) + ) if not force: return False # verify that branch has exactly one head t={} - for h in repo.filtered('visible').heads(): + for h in repo.filtered(b'visible').heads(): (_,_,_,_,_,_,branch,_)=get_changeset(ui,repo,h) if t.get(branch,False): sys.stderr.write('Error: repository has at least one unnamed head: hg r%s\n' % @@ -519,15 +545,15 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, max=tip for rev in range(0,max): - (revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors) - if repo[revnode].hidden(): - continue - mapping_cache[revnode.encode('hex_codec')] = str(rev) + (revnode,_,_,_,_,_,_,_)=get_changeset(ui,repo,rev,authors) + if repo[revnode].hidden(): + continue + mapping_cache[hexlify(revnode)] = b"%d" % rev if submodule_mappings: # Make sure that all submodules are registered in the submodule-mappings file for rev in range(0,max): - ctx=revsymbol(repo,str(rev)) + ctx=revsymbol(repo,b"%d" % rev) if ctx.hidden(): continue if ctx.substate: diff --git a/hg-fast-export.sh b/hg-fast-export.sh index e1f2f50..06d791c 100755 --- a/hg-fast-export.sh +++ b/hg-fast-export.sh @@ -28,25 +28,20 @@ SFX_STATE="state" GFI_OPTS="" if [ -z "${PYTHON}" ]; then - # $PYTHON is not set, so we try to find a working python 2.7 to - # use. PEP 394 tells us to use 'python2', otherwise try plain - # 'python'. - if command -v python2 > /dev/null; then - PYTHON="python2" - elif command -v python > /dev/null; then - PYTHON="python" - else - echo "Could not find any python interpreter, please use the 'PYTHON'" \ - "environment variable to specify the interpreter to use." - exit 1 - fi + # $PYTHON is not set, so we try to find a working python with mercurial: + for python_cmd in python2 python python3; do + if command -v $python_cmd > /dev/null; then + $python_cmd -c 'import mercurial' 2> /dev/null + if [ $? -eq 0 ]; then + PYTHON=$python_cmd + break + fi + fi + done fi - -# Check that the python specified by the user or autodetected above is -# >= 2.7 and < 3. -if ! ${PYTHON} -c 'import sys; v=sys.version_info; exit(0 if v.major == 2 and v.minor >= 7 else 1)' > /dev/null 2>&1 ; then - echo "${PYTHON} is not a working python 2.7 interpreter, please use the" \ - "'PYTHON' environment variable to specify the interpreter to use." +if [ -z "${PYTHON}" ]; then + echo "Could not find a python interpreter with the mercurial module available. " \ + "Please use the 'PYTHON' environment variable to specify the interpreter to use." exit 1 fi diff --git a/hg-reset.py b/hg-reset.py index 2a36b1d..d91738b 100755 --- a/hg-reset.py +++ b/hg-reset.py @@ -7,6 +7,7 @@ from mercurial import node from hg2git import setup_repo,load_cache,get_changeset,get_git_sha1 from optparse import OptionParser import sys +from binascii import hexlify def heads(ui,repo,start=None,stop=None,max=None): # this is copied from mercurial/revlog.py and differs only in @@ -24,7 +25,7 @@ def heads(ui,repo,start=None,stop=None,max=None): heads = {startrev: 1} parentrevs = repo.changelog.parentrevs - for r in xrange(startrev + 1, max): + for r in range(startrev + 1, max): for p in parentrevs(r): if p in reachable: if r not in stoprevs: @@ -33,7 +34,7 @@ def heads(ui,repo,start=None,stop=None,max=None): if p in heads and p not in stoprevs: del heads[p] - return [(repo.changelog.node(r),str(r)) for r in heads] + return [(repo.changelog.node(r), b"%d" % r) for r in heads] def get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,max): h=heads(ui,repo,max=max) @@ -44,11 +45,11 @@ def get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,max): _,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev) del stale[branch] git_sha1=get_git_sha1(branch) - cache_sha1=marks_cache.get(str(int(rev)+1)) + cache_sha1=marks_cache.get(b"%d" % (int(rev)+1)) if git_sha1!=None and git_sha1==cache_sha1: - unchanged.append([branch,cache_sha1,rev,desc.split('\n')[0],user]) + unchanged.append([branch,cache_sha1,rev,desc.split(b'\n')[0],user]) else: - changed.append([branch,cache_sha1,rev,desc.split('\n')[0],user]) + changed.append([branch,cache_sha1,rev,desc.split(b'\n')[0],user]) changed.sort() unchanged.sort() return stale,changed,unchanged @@ -57,20 +58,20 @@ def get_tags(ui,repo,marks_cache,mapping_cache,max): l=repo.tagslist() good,bad=[],[] for tag,node in l: - if tag=='tip': continue - rev=int(mapping_cache[node.encode('hex_codec')]) - cache_sha1=marks_cache.get(str(int(rev)+1)) + if tag==b'tip': continue + rev=int(mapping_cache[hexlify(node)]) + cache_sha1=marks_cache.get(b"%d" % (int(rev)+1)) _,_,user,(_,_),_,desc,branch,_=get_changeset(ui,repo,rev) if int(rev)>int(max): - bad.append([tag,branch,cache_sha1,rev,desc.split('\n')[0],user]) + bad.append([tag,branch,cache_sha1,rev,desc.split(b'\n')[0],user]) else: - good.append([tag,branch,cache_sha1,rev,desc.split('\n')[0],user]) + good.append([tag,branch,cache_sha1,rev,desc.split(b'\n')[0],user]) good.sort() bad.sort() return good,bad def mangle_mark(mark): - return str(int(mark)-1) + return b"%d" % (int(mark)-1) if __name__=='__main__': def bail(parser,opt): @@ -107,7 +108,7 @@ if __name__=='__main__': state_cache=load_cache(options.statusfile) mapping_cache = load_cache(options.mappingfile) - l=int(state_cache.get('tip',options.revision)) + l=int(state_cache.get(b'tip',options.revision)) if options.revision+1>l: sys.stderr.write('Revision is beyond last revision imported: %d>%d\n' % (options.revision,l)) sys.exit(1) @@ -117,19 +118,39 @@ if __name__=='__main__': stale,changed,unchanged=get_branches(ui,repo,heads_cache,marks_cache,mapping_cache,options.revision+1) good,bad=get_tags(ui,repo,marks_cache,mapping_cache,options.revision+1) - print "Possibly stale branches:" - map(lambda b: sys.stdout.write('\t%s\n' % b),stale.keys()) + print("Possibly stale branches:") + for b in stale: + sys.stdout.write('\t%s\n' % b.decode('utf8')) - print "Possibly stale tags:" - map(lambda b: sys.stdout.write('\t%s on %s (r%s)\n' % (b[0],b[1],b[3])),bad) + print("Possibly stale tags:") + for b in bad: + sys.stdout.write( + '\t%s on %s (r%s)\n' + % (b[0].decode('utf8'), b[1].decode('utf8'), b[3].decode('utf8')) + ) - print "Unchanged branches:" - map(lambda b: sys.stdout.write('\t%s (r%s)\n' % (b[0],b[2])),unchanged) + print("Unchanged branches:") + for b in unchanged: + sys.stdout.write('\t%s (r%s)\n' % (b[0].decode('utf8'),b[2].decode('utf8'))) - print "Unchanged tags:" - map(lambda b: sys.stdout.write('\t%s on %s (r%s)\n' % (b[0],b[1],b[3])),good) + print("Unchanged tags:") + for b in good: + sys.stdout.write( + '\t%s on %s (r%s)\n' + % (b[0].decode('utf8'), b[1].decode('utf8'), b[3].decode('utf8')) + ) - print "Reset branches in '%s' to:" % options.headsfile - map(lambda b: sys.stdout.write('\t:%s %s\n\t\t(r%s: %s: %s)\n' % (b[0],b[1],b[2],b[4],b[3])),changed) + print("Reset branches in '%s' to:" % options.headsfile) + for b in changed: + sys.stdout.write( + '\t:%s %s\n\t\t(r%s: %s: %s)\n' + % ( + b[0].decode('utf8'), + b[1].decode('utf8'), + b[2].decode('utf8'), + b[4].decode('utf8'), + b[3].decode('utf8'), + ) + ) - print "Reset ':tip' in '%s' to '%d'" % (options.statusfile,options.revision) + print("Reset ':tip' in '%s' to '%d'" % (options.statusfile,options.revision)) diff --git a/hg-reset.sh b/hg-reset.sh index 453dbab..7370e34 100755 --- a/hg-reset.sh +++ b/hg-reset.sh @@ -11,7 +11,24 @@ SFX_MAPPING="mapping" SFX_HEADS="heads" SFX_STATE="state" QUIET="" -PYTHON=${PYTHON:-python} + +if [ -z "${PYTHON}" ]; then + # $PYTHON is not set, so we try to find a working python with mercurial: + for python_cmd in python2 python python3; do + if command -v $python_cmd > /dev/null; then + $python_cmd -c 'import mercurial' 2> /dev/null + if [ $? -eq 0 ]; then + PYTHON=$python_cmd + break + fi + fi + done +fi +if [ -z "${PYTHON}" ]; then + echo "Could not find a python interpreter with the mercurial module available. " \ + "Please use the 'PYTHON'environment variable to specify the interpreter to use." + exit 1 +fi USAGE="[-r ] -R " LONG_USAGE="Print SHA1s of latest changes per branch up to useful diff --git a/hg2git.py b/hg2git.py index 1e740f1..991ca44 100755 --- a/hg2git.py +++ b/hg2git.py @@ -12,14 +12,21 @@ import os import sys import subprocess +PY2 = sys.version_info.major < 3 +if PY2: + str = unicode + fsencode = lambda s: s.encode(sys.getfilesystemencoding()) +else: + from os import fsencode + # default git branch name -cfg_master='master' +cfg_master=b'master' # default origin name -origin_name='' +origin_name=b'' # silly regex to see if user field has email address -user_re=re.compile('([^<]+) (<[^>]*>)$') +user_re=re.compile(b'([^<]+) (<[^>]*>)$') # silly regex to clean out user names -user_clean_re=re.compile('^["]([^"]+)["]$') +user_clean_re=re.compile(b'^["]([^"]+)["]$') def set_default_branch(name): global cfg_master @@ -34,26 +41,26 @@ def setup_repo(url): myui=ui.ui(interactive=False) except TypeError: myui=ui.ui() - myui.setconfig('ui', 'interactive', 'off') + myui.setconfig(b'ui', b'interactive', b'off') # Avoids a warning when the repository has obsolete markers - myui.setconfig('experimental', 'evolution.createmarkers', True) - return myui,hg.repository(myui,url).unfiltered() + myui.setconfig(b'experimental', b'evolution.createmarkers', True) + return myui,hg.repository(myui, fsencode(url)).unfiltered() def fixup_user(user,authors): - user=user.strip("\"") + user=user.strip(b"\"") if authors!=None: # if we have an authors table, try to get mapping # by defaulting to the current value of 'user' user=authors.get(user,user) - name,mail,m='','',user_re.match(user) + name,mail,m=b'',b'',user_re.match(user) if m==None: # if we don't have 'Name ' syntax, extract name # and mail from hg helpers. this seems to work pretty well. # if email doesn't contain @, replace it with devnull@localhost name=templatefilters.person(user) - mail='<%s>' % templatefilters.email(user) - if '@' not in mail: - mail = '' + mail=b'<%s>' % templatefilters.email(user) + if b'@' not in mail: + mail = b'' else: # if we have 'Name ' syntax, everything is fine :) name,mail=m.group(1),m.group(2) @@ -62,15 +69,15 @@ def fixup_user(user,authors): m2=user_clean_re.match(name) if m2!=None: name=m2.group(1) - return '%s %s' % (name,mail) + return b'%s %s' % (name,mail) def get_branch(name): # 'HEAD' is the result of a bug in mutt's cvs->hg conversion, # other CVS imports may need it, too - if name=='HEAD' or name=='default' or name=='': + if name==b'HEAD' or name==b'default' or name==b'': name=cfg_master if origin_name: - return origin_name + '/' + name + return origin_name + b'/' + name return name def get_changeset(ui,repo,revision,authors={},encoding=''): @@ -79,16 +86,16 @@ def get_changeset(ui,repo,revision,authors={},encoding=''): # how it fails try: node=repo.lookup(revision) - except hgerror.ProgrammingError: - node=binnode(revsymbol(repo,str(revision))) # We were given a numeric rev + except (TypeError, hgerror.ProgrammingError): + node=binnode(revsymbol(repo, b"%d" % revision)) # We were given a numeric rev except hgerror.RepoLookupError: node=revision # We got a raw hash (manifest,user,(time,timezone),files,desc,extra)=repo.changelog.read(node) if encoding: user=user.decode(encoding).encode('utf8') desc=desc.decode(encoding).encode('utf8') - tz="%+03d%02d" % (-timezone / 3600, ((-timezone % 3600) / 60)) - branch=get_branch(extra.get('branch','master')) + tz=b"%+03d%02d" % (-timezone // 3600, ((-timezone % 3600) // 60)) + branch=get_branch(extra.get(b'branch', b'master')) return (node,manifest,fixup_user(user,authors),(time,tz),files,desc,branch,extra) def mangle_key(key): @@ -98,28 +105,33 @@ def load_cache(filename,get_key=mangle_key): cache={} if not os.path.exists(filename): return cache - f=open(filename,'r') + f=open(filename,'rb') l=0 for line in f.readlines(): l+=1 - fields=line.split(' ') - if fields==None or not len(fields)==2 or fields[0][0]!=':': + fields=line.split(b' ') + if fields==None or not len(fields)==2 or fields[0][0:1]!=b':': sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l)) continue # put key:value in cache, key without ^: - cache[get_key(fields[0][1:])]=fields[1].split('\n')[0] + cache[get_key(fields[0][1:])]=fields[1].split(b'\n')[0] f.close() return cache def save_cache(filename,cache): - f=open(filename,'w+') - map(lambda x: f.write(':%s %s\n' % (str(x),str(cache.get(x)))),cache.keys()) + f=open(filename,'wb') + for key, value in cache.items(): + if not isinstance(key, bytes): + key = str(key).encode('utf8') + if not isinstance(value, bytes): + value = str(value).encode('utf8') + f.write(b':%s %s\n' % (key, value)) f.close() def get_git_sha1(name,type='heads'): try: # use git-rev-parse to support packed refs - ref="refs/%s/%s" % (type,name) + ref="refs/%s/%s" % (type,name.decode('utf8')) l=subprocess.check_output(["git", "rev-parse", "--verify", "--quiet", ref]) if l == None or len(l) == 0: return None diff --git a/plugins/branch_name_in_commit/__init__.py b/plugins/branch_name_in_commit/__init__.py index 910a446..311a84c 100644 --- a/plugins/branch_name_in_commit/__init__.py +++ b/plugins/branch_name_in_commit/__init__.py @@ -15,9 +15,11 @@ class Filter: raise ValueError("Unknown args: " + ','.join(args)) def commit_message_filter(self, commit_data): - if not (self.skip_master and commit_data['branch'] == 'master'): + if not (self.skip_master and commit_data['branch'] == b'master'): if self.start: - sep = ': ' if self.sameline else '\n' + sep = b': ' if self.sameline else b'\n' commit_data['desc'] = commit_data['branch'] + sep + commit_data['desc'] if self.end: - commit_data['desc'] = commit_data['desc'] + '\n' + commit_data['branch'] + commit_data['desc'] = ( + commit_data['desc'] + b'\n' + commit_data['branch'] + ) diff --git a/plugins/dos2unix/__init__.py b/plugins/dos2unix/__init__.py index bf676a0..bae9358 100644 --- a/plugins/dos2unix/__init__.py +++ b/plugins/dos2unix/__init__.py @@ -8,4 +8,4 @@ class Filter(): def file_data_filter(self,file_data): file_ctx = file_data['file_ctx'] if not file_ctx.isbinary(): - file_data['data'] = file_data['data'].replace('\r\n', '\n') + file_data['data'] = file_data['data'].replace(b'\r\n', b'\n') diff --git a/plugins/issue_prefix/__init__.py b/plugins/issue_prefix/__init__.py index b5a0fc6..5dd30b5 100644 --- a/plugins/issue_prefix/__init__.py +++ b/plugins/issue_prefix/__init__.py @@ -7,9 +7,11 @@ def build_filter(args): class Filter: def __init__(self, args): + if not isinstance(args, bytes): + args = args.encode('utf8') self.prefix = args def commit_message_filter(self, commit_data): - for match in re.findall('#[1-9][0-9]+', commit_data['desc']): + for match in re.findall(b'#[1-9][0-9]+', commit_data['desc']): commit_data['desc'] = commit_data['desc'].replace( - match, '#%s%s' % (self.prefix, match[1:])) + match, b'#%s%s' % (self.prefix, match[1:])) diff --git a/plugins/overwrite_null_messages/__init__.py b/plugins/overwrite_null_messages/__init__.py index 3be7e53..b9ce512 100644 --- a/plugins/overwrite_null_messages/__init__.py +++ b/plugins/overwrite_null_messages/__init__.py @@ -4,13 +4,13 @@ def build_filter(args): class Filter: def __init__(self, args): if args == '': - message = '' + message = b'' else: - message = args + message = args.encode('utf8') self.message = message def commit_message_filter(self,commit_data): # Only write the commit message if the recorded commit # message is null. - if commit_data['desc'] == '\x00': + if commit_data['desc'] == b'\x00': commit_data['desc'] = self.message