From 8779cb5e951e1b29c9b91bbcf25e2ac4350937db Mon Sep 17 00:00:00 2001 From: MokhamedDakhraui Date: Fri, 16 Aug 2019 02:40:44 +0300 Subject: [PATCH 1/4] Extract operations with submodules to separated methods --- hg-fast-export.py | 88 +++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index ef87ded..7f8302b 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -127,52 +127,58 @@ def get_author(logmessage,committer,authors): return r return committer +def remove_gitmodules(ctx): + """Removes all submodules""" + # Remove all submodules as we don't detect deleted submodules properly + # in any other way. We will add the ones not deleted back again below. + for module in submodule_mappings.keys(): + wr('D %s' % module) + +def refresh_gitmodules(ctx): + """Updates list of ctx submodules according to .hgsubstate file""" + remove_gitmodules(ctx) + # Read .hgsubstate file in order to find the revision of each subrepo + data=ctx.filectx(".hgsubstate").data() + subHashes={} + for line in data.split('\n'): + if line.strip()=="": + continue + cols=line.split(' ') + subHashes[cols[1]]=cols[0] + + gitmodules="" + # Create the .gitmodules file and all submodules + for name in ctx.substate: + gitRepoLocation=submodule_mappings[name] + "/.git" + + # Populate the cache to map mercurial revision to git revision + if not name in subrepo_cache: + subrepo_cache[name]=(load_cache(gitRepoLocation+"/hg2git-mapping"), + load_cache(gitRepoLocation+"/hg2git-marks", + lambda s: int(s)-1)) + + (mapping_cache, marks_cache)=subrepo_cache[name] + if subHashes[name] in mapping_cache: + revnum=mapping_cache[subHashes[name]] + gitSha=marks_cache[int(revnum)] + wr('M 160000 %s %s' % (gitSha, name)) + sys.stderr.write("Adding submodule %s, revision %s->%s\n" + % (name,subHashes[name],gitSha)) + gitmodules+='[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name, name, submodule_mappings[name]) + else: + sys.stderr.write("Warning: Could not find hg revision %s for %s in git %s\n" % (subHashes[name],name,gitRepoLocation)) + + if len(gitmodules): + wr('M 100644 inline .gitmodules') + wr('data %d' % (len(gitmodules)+1)) + wr(gitmodules) + def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): count=0 max=len(files) for file in files: if submodule_mappings and ctx.substate and file==".hgsubstate": - # Remove all submodules as we don't detect deleted submodules properly - # in any other way. We will add the ones not deleted back again below. - for module in submodule_mappings.keys(): - wr('D %s' % module) - - # Read .hgsubstate file in order to find the revision of each subrepo - data=ctx.filectx(file).data() - subHashes={} - for line in data.split('\n'): - if line.strip()=="": - continue - cols=line.split(' ') - subHashes[cols[1]]=cols[0] - - gitmodules="" - # Create the .gitmodules file and all submodules - for name in ctx.substate: - gitRepoLocation=submodule_mappings[name] + "/.git" - - # Populate the cache to map mercurial revision to git revision - if not name in subrepo_cache: - subrepo_cache[name]=(load_cache(gitRepoLocation+"/hg2git-mapping"), - load_cache(gitRepoLocation+"/hg2git-marks", - lambda s: int(s)-1)) - - (mapping_cache, marks_cache)=subrepo_cache[name] - if subHashes[name] in mapping_cache: - revnum=mapping_cache[subHashes[name]] - gitSha=marks_cache[int(revnum)] - wr('M 160000 %s %s' % (gitSha, name)) - sys.stderr.write("Adding submodule %s, revision %s->%s\n" - % (name,subHashes[name],gitSha)) - gitmodules+='[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name, name, submodule_mappings[name]) - else: - sys.stderr.write("Warning: Could not find hg revision %s for %s in git %s\n" % (subHashes[name],name,gitRepoLocation)) - - if len(gitmodules): - wr('M 100644 inline .gitmodules') - wr('data %d' % (len(gitmodules)+1)) - wr(gitmodules) - + refresh_gitmodules(ctx) # Skip .hgtags files. They only get us in trouble. if not hgtags and file == ".hgtags": sys.stderr.write('Skip %s\n' % (file)) From 914f5a0dbe839bead20f8bf69468f08b1766ec72 Mon Sep 17 00:00:00 2001 From: MokhamedDakhraui Date: Fri, 16 Aug 2019 02:41:54 +0300 Subject: [PATCH 2/4] Replaced several lambdas by one loop --- hg-fast-export.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index 7f8302b..7421b91 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -313,12 +313,12 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' % (branch,type,revision+1,max,len(added),len(changed),len(removed))) - if fn_encoding: - removed=[r.decode(fn_encoding).encode('utf8') for r in removed] + for filename in removed: + if fn_encoding: + filename=filename.decode(fn_encoding).encode('utf8') + filename=strip_leading_slash(filename) + wr('D %s' % filename) - removed=[strip_leading_slash(x) for x in removed] - - map(lambda r: wr('D %s' % r),removed) export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins) export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins) wr() From 7df01ac323c3be50fb0ba276b455c52464ce1f10 Mon Sep 17 00:00:00 2001 From: MokhamedDakhraui Date: Thu, 15 Aug 2019 03:05:21 +0300 Subject: [PATCH 3/4] Refactor refresh_gitmodules() Use the change context substate field instead of manually parsing the `.hgsubstate` file. --- hg-fast-export.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index 7421b91..897c2f0 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -137,18 +137,9 @@ def remove_gitmodules(ctx): def refresh_gitmodules(ctx): """Updates list of ctx submodules according to .hgsubstate file""" remove_gitmodules(ctx) - # Read .hgsubstate file in order to find the revision of each subrepo - data=ctx.filectx(".hgsubstate").data() - subHashes={} - for line in data.split('\n'): - if line.strip()=="": - continue - cols=line.split(' ') - subHashes[cols[1]]=cols[0] - gitmodules="" # Create the .gitmodules file and all submodules - for name in ctx.substate: + for name,subrepo_info in ctx.substate.items(): gitRepoLocation=submodule_mappings[name] + "/.git" # Populate the cache to map mercurial revision to git revision @@ -157,16 +148,19 @@ def refresh_gitmodules(ctx): load_cache(gitRepoLocation+"/hg2git-marks", lambda s: int(s)-1)) - (mapping_cache, marks_cache)=subrepo_cache[name] - if subHashes[name] in mapping_cache: - revnum=mapping_cache[subHashes[name]] + (mapping_cache,marks_cache)=subrepo_cache[name] + subrepo_hash=subrepo_info[1] + if subrepo_hash in mapping_cache: + revnum=mapping_cache[subrepo_hash] gitSha=marks_cache[int(revnum)] - wr('M 160000 %s %s' % (gitSha, name)) - sys.stderr.write("Adding submodule %s, revision %s->%s\n" - % (name,subHashes[name],gitSha)) - gitmodules+='[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name, name, submodule_mappings[name]) + wr('M 160000 %s %s' % (gitSha,name)) + sys.stderr.write("Adding/updating submodule %s, revision %s->%s\n" + % (name,subrepo_hash,gitSha)) + gitmodules+='[submodule "%s"]\n\tpath = %s\n\turl = %s\n' % (name,name, + submodule_mappings[name]) else: - sys.stderr.write("Warning: Could not find hg revision %s for %s in git %s\n" % (subHashes[name],name,gitRepoLocation)) + sys.stderr.write("Warning: Could not find hg revision %s for %s in git %s\n" % + (subrepo_hash,name,gitRepoLocation)) if len(gitmodules): wr('M 100644 inline .gitmodules') From 581b1b3d1703357d619bc124527b00e585f1978e Mon Sep 17 00:00:00 2001 From: MokhamedDakhraui Date: Thu, 15 Aug 2019 03:13:52 +0300 Subject: [PATCH 4/4] Remove git submodules if .hgsubstate file was removed or emptied --- hg-fast-export.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index 897c2f0..72d9c7f 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -128,11 +128,14 @@ def get_author(logmessage,committer,authors): return committer def remove_gitmodules(ctx): - """Removes all submodules""" - # Remove all submodules as we don't detect deleted submodules properly - # in any other way. We will add the ones not deleted back again below. - for module in submodule_mappings.keys(): - wr('D %s' % module) + """Removes all submodules of ctx parents""" + # Removing all submoduies coming from all parents is safe, as the submodules + # of the current commit will be re-added below. A possible optimization would + # be to only remove the submodules of the first parent. + for parent_ctx in ctx.parents(): + for submodule in parent_ctx.substate.keys(): + wr('D %s' % submodule) + wr('D .gitmodules') def refresh_gitmodules(ctx): """Updates list of ctx submodules according to .hgsubstate file""" @@ -171,7 +174,7 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding='',plugins={}): count=0 max=len(files) for file in files: - if submodule_mappings and ctx.substate and file==".hgsubstate": + if submodule_mappings and file==".hgsubstate": refresh_gitmodules(ctx) # Skip .hgtags files. They only get us in trouble. if not hgtags and file == ".hgtags": @@ -311,6 +314,8 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, if fn_encoding: filename=filename.decode(fn_encoding).encode('utf8') filename=strip_leading_slash(filename) + if filename=='.hgsubstate': + remove_gitmodules(ctx) wr('D %s' % filename) export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins)