From 21fa443b4ade59a3bf00a3ea163b693aa33b4b73 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Thu, 9 Mar 2023 15:35:50 -0600 Subject: [PATCH 01/10] Simplify list of files for the first commit We already have the files. Signed-off-by: Felipe Contreras --- hg-fast-export.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index 0f1726a..3b1f267 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -336,8 +336,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, if len(parents) == 0: # first revision: feed in full manifest - added=man.keys() - added.sort() + added=files type='full' else: wr(b'from %s' % revnum_to_revref(parents[0], old_marks)) From c666fd9c95baf797deedd4ba27900cc39b61118e Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Tue, 14 Mar 2023 00:12:19 -0600 Subject: [PATCH 02/10] Trivial style cleanup Checking the array directly is more idiomatic. Signed-off-by: Felipe Contreras --- hg-fast-export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index 3b1f267..9586b7c 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -334,7 +334,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, man=ctx.manifest() added,changed,removed,type=[],[],[],'' - if len(parents) == 0: + if not parents: # first revision: feed in full manifest added=files type='full' From a229b39d667813bd29e4041dec6da99724f44a7b Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Thu, 9 Mar 2023 15:32:54 -0600 Subject: [PATCH 03/10] Coalesce modified files Git doesn't care if they are added or changed: they are modified. Signed-off-by: Felipe Contreras --- hg-fast-export.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index 9586b7c..40bc52d 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -102,7 +102,7 @@ def get_filechanges(repo,revision,parents,mleft): l.sort() c.sort() r.sort() - return l,c,r + return c+l,r def get_author(logmessage,committer,authors): """As git distincts between author and committer of a patch, try to @@ -332,11 +332,11 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, wr_data(desc) man=ctx.manifest() - added,changed,removed,type=[],[],[],'' + modified,removed,type=[],[],'' if not parents: # first revision: feed in full manifest - added=files + modified=files type='full' else: wr(b'from %s' % revnum_to_revref(parents[0], old_marks)) @@ -345,19 +345,20 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, # if we have exactly one parent, just take the changes from the # manifest without expensively comparing checksums f=repo.status(parents[0],revision) - added,changed,removed=f.added,f.modified,f.removed + modified=f.modified + f.added + removed=f.removed type='simple delta' else: # a merge with two parents wr(b'merge %s' % revnum_to_revref(parents[1], old_marks)) # later merge revision: feed in changed manifest # for many files comparing checksums is expensive so only do it for # merges where we really need it due to hg's revlog logic - added,changed,removed=get_filechanges(repo,revision,parents,man) + modified,removed=get_filechanges(repo,revision,parents,man) type='thorough delta' stderr_buffer.write( - b'%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' - % (branch, type.encode(), revision + 1, max, len(added), len(changed), len(removed)) + b'%s: Exporting %s revision %d/%d with %d/%d modified/removed files\n' + % (branch, type.encode(), revision + 1, max, len(modified), len(removed)) ) for filename in removed: @@ -368,8 +369,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, remove_gitmodules(ctx) wr(b'D %s' % filename) - export_file_contents(ctx,man,added,hgtags,fn_encoding,plugins) - export_file_contents(ctx,man,changed,hgtags,fn_encoding,plugins) + export_file_contents(ctx,man,modified,hgtags,fn_encoding,plugins) wr() return checkpoint(count) From 531fa9b3a2ac4ed302cea66058a26c4ce615129f Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Thu, 9 Mar 2023 15:48:54 -0600 Subject: [PATCH 04/10] Simplify split_dict There's no need to keep track of the left side: if it's modified it's modified. Signed-off-by: Felipe Contreras --- hg-fast-export.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index 40bc52d..b72fc10 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -74,35 +74,34 @@ def file_mismatch(f1,f2): """See if two revisions of a file are not equal.""" return node.hex(f1)!=node.hex(f2) -def split_dict(dleft,dright,l=[],c=[],r=[],match=file_mismatch): +def split_dict(dleft,dright,c=[],r=[],match=file_mismatch): """Loop over our repository and find all changed and missing files.""" for left in dleft.keys(): right=dright.get(left,None) if right==None: - # we have the file but our parent hasn't: add to left set - l.append(left) + # we have the file but our parent hasn't: add + c.append(left) elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)): - # we have it but checksums mismatch: add to center set + # we have it but checksums mismatch: add c.append(left) for right in dright.keys(): left=dleft.get(right,None) if left==None: - # if parent has file but we don't: add to right set + # if parent has file but we don't: remove r.append(right) # change is already handled when comparing child against parent - return l,c,r + return c,r def get_filechanges(repo,revision,parents,mleft): """Given some repository and revision, find all changed/deleted files.""" - l,c,r=[],[],[] + c,r=[],[] for p in parents: if p<0: continue mright=repo[p].manifest() - l,c,r=split_dict(mleft,mright,l,c,r) - l.sort() + c,r=split_dict(mleft,mright,c,r) c.sort() r.sort() - return c+l,r + return c,r def get_author(logmessage,committer,authors): """As git distincts between author and committer of a patch, try to From 9df2f97f6cc69c497fd10561e6b91fb9396292b5 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Mon, 13 Mar 2023 20:32:10 -0600 Subject: [PATCH 05/10] Rename variables in get_filechanges It's easier to understand this way. No functional changes. Signed-off-by: Felipe Contreras --- hg-fast-export.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index b72fc10..923f30e 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -94,14 +94,14 @@ def split_dict(dleft,dright,c=[],r=[],match=file_mismatch): def get_filechanges(repo,revision,parents,mleft): """Given some repository and revision, find all changed/deleted files.""" - c,r=[],[] + modified,removed=[],[] for p in parents: if p<0: continue mright=repo[p].manifest() - c,r=split_dict(mleft,mright,c,r) - c.sort() - r.sort() - return c,r + modified,removed=split_dict(mleft,mright,modified,removed) + modified.sort() + removed.sort() + return modified,removed def get_author(logmessage,committer,authors): """As git distincts between author and committer of a patch, try to From e09a14a26675411496f4a8cc0d6188ca5f20f07f Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Mon, 13 Mar 2023 20:37:19 -0600 Subject: [PATCH 06/10] Move parents logic inside get_filechanges This way export_commit is much simpler (already quite complex), and it's easier to modify the logic. No functional changes. Signed-off-by: Felipe Contreras --- hg-fast-export.py | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index 923f30e..10ec63c 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -92,15 +92,31 @@ def split_dict(dleft,dright,c=[],r=[],match=file_mismatch): # change is already handled when comparing child against parent return c,r -def get_filechanges(repo,revision,parents,mleft): +def get_filechanges(repo,revision,parents,files,mleft): """Given some repository and revision, find all changed/deleted files.""" - modified,removed=[],[] - for p in parents: - if p<0: continue - mright=repo[p].manifest() - modified,removed=split_dict(mleft,mright,modified,removed) - modified.sort() - removed.sort() + if not parents: + # first revision: feed in full manifest + modified=files + removed=[] + else: + if len(parents) == 1: + # later non-merge revision: feed in changed manifest + # if we have exactly one parent, just take the changes from the + # manifest without expensively comparing checksums + f=repo.status(parents[0],revision) + modified=f.modified + f.added + removed=f.removed + else: # a merge with two parents + # later merge revision: feed in changed manifest + # for many files comparing checksums is expensive so only do it for + # merges where we really need it due to hg's revlog logic + modified,removed=[],[] + for p in parents: + if p<0: continue + mright=repo[p].manifest() + modified,removed=split_dict(mleft,mright,modified,removed) + modified.sort() + removed.sort() return modified,removed def get_author(logmessage,committer,authors): @@ -331,30 +347,19 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, wr_data(desc) man=ctx.manifest() - modified,removed,type=[],[],'' if not parents: - # first revision: feed in full manifest - modified=files type='full' else: wr(b'from %s' % revnum_to_revref(parents[0], old_marks)) if len(parents) == 1: - # later non-merge revision: feed in changed manifest - # if we have exactly one parent, just take the changes from the - # manifest without expensively comparing checksums - f=repo.status(parents[0],revision) - modified=f.modified + f.added - removed=f.removed type='simple delta' else: # a merge with two parents wr(b'merge %s' % revnum_to_revref(parents[1], old_marks)) - # later merge revision: feed in changed manifest - # for many files comparing checksums is expensive so only do it for - # merges where we really need it due to hg's revlog logic - modified,removed=get_filechanges(repo,revision,parents,man) type='thorough delta' + modified,removed=get_filechanges(repo,revision,parents,files,man) + stderr_buffer.write( b'%s: Exporting %s revision %d/%d with %d/%d modified/removed files\n' % (branch, type.encode(), revision + 1, max, len(modified), len(removed)) From 0ae0d20496200307360b59c3521fa805479a50a7 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Mon, 13 Mar 2023 22:34:34 -0600 Subject: [PATCH 07/10] Remove no-op check This code is only executed when there's two parents. Signed-off-by: Felipe Contreras --- hg-fast-export.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index 10ec63c..dd94d3a 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -112,7 +112,6 @@ def get_filechanges(repo,revision,parents,files,mleft): # merges where we really need it due to hg's revlog logic modified,removed=[],[] for p in parents: - if p<0: continue mright=repo[p].manifest() modified,removed=split_dict(mleft,mright,modified,removed) modified.sort() From 3582221efda99b65b7c42ba99c7ec20bc8389bf8 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Mon, 13 Mar 2023 22:51:05 -0600 Subject: [PATCH 08/10] Compare changes only with the first parent It's not necessary to check both parents. Signed-off-by: Felipe Contreras --- hg-fast-export.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index dd94d3a..fe72846 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -111,9 +111,8 @@ def get_filechanges(repo,revision,parents,files,mleft): # for many files comparing checksums is expensive so only do it for # merges where we really need it due to hg's revlog logic modified,removed=[],[] - for p in parents: - mright=repo[p].manifest() - modified,removed=split_dict(mleft,mright,modified,removed) + mright=repo[parents[0]].manifest() + modified,removed=split_dict(mleft,mright,modified,removed) modified.sort() removed.sort() return modified,removed From d2f11bd619c40bad008657cbe4384c97ae1d0250 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Mon, 13 Mar 2023 21:38:26 -0600 Subject: [PATCH 09/10] Remove multiple parent logic for file changes This is already what repo.status does. Signed-off-by: Felipe Contreras --- hg-fast-export.py | 47 ++++++----------------------------------------- 1 file changed, 6 insertions(+), 41 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index fe72846..b33cc4b 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -3,7 +3,6 @@ # Copyright (c) 2007, 2008 Rocco Rutte and others. # License: MIT -from mercurial import node from hg2git import setup_repo,fixup_user,get_branch,get_changeset from hg2git import load_cache,save_cache,get_git_sha1,set_default_branch,set_origin_name from optparse import OptionParser @@ -70,51 +69,17 @@ def revnum_to_revref(rev, old_marks): or a mark)""" return old_marks.get(rev) or b':%d' % (rev+1) -def file_mismatch(f1,f2): - """See if two revisions of a file are not equal.""" - return node.hex(f1)!=node.hex(f2) - -def split_dict(dleft,dright,c=[],r=[],match=file_mismatch): - """Loop over our repository and find all changed and missing files.""" - for left in dleft.keys(): - right=dright.get(left,None) - if right==None: - # we have the file but our parent hasn't: add - c.append(left) - elif match(dleft[left],right) or gitmode(dleft.flags(left))!=gitmode(dright.flags(left)): - # we have it but checksums mismatch: add - c.append(left) - for right in dright.keys(): - left=dleft.get(right,None) - if left==None: - # if parent has file but we don't: remove - r.append(right) - # change is already handled when comparing child against parent - return c,r - -def get_filechanges(repo,revision,parents,files,mleft): +def get_filechanges(repo,revision,parents,files): """Given some repository and revision, find all changed/deleted files.""" if not parents: # first revision: feed in full manifest modified=files removed=[] else: - if len(parents) == 1: - # later non-merge revision: feed in changed manifest - # if we have exactly one parent, just take the changes from the - # manifest without expensively comparing checksums - f=repo.status(parents[0],revision) - modified=f.modified + f.added - removed=f.removed - else: # a merge with two parents - # later merge revision: feed in changed manifest - # for many files comparing checksums is expensive so only do it for - # merges where we really need it due to hg's revlog logic - modified,removed=[],[] - mright=repo[parents[0]].manifest() - modified,removed=split_dict(mleft,mright,modified,removed) - modified.sort() - removed.sort() + # take the changes from the first parent + f=repo.status(parents[0],revision) + modified=f.modified + f.added + removed=f.removed return modified,removed def get_author(logmessage,committer,authors): @@ -356,7 +321,7 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, wr(b'merge %s' % revnum_to_revref(parents[1], old_marks)) type='thorough delta' - modified,removed=get_filechanges(repo,revision,parents,files,man) + modified,removed=get_filechanges(repo,revision,parents,files) stderr_buffer.write( b'%s: Exporting %s revision %d/%d with %d/%d modified/removed files\n' From 9754a9f3f6de64b239e134ac5f6d6907675822b1 Mon Sep 17 00:00:00 2001 From: Felipe Contreras Date: Tue, 14 Mar 2023 00:47:53 -0600 Subject: [PATCH 10/10] Trivial simplification Just return the values directly, no need to store them into variables. Signed-off-by: Felipe Contreras --- hg-fast-export.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/hg-fast-export.py b/hg-fast-export.py index b33cc4b..71b518d 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -73,14 +73,11 @@ def get_filechanges(repo,revision,parents,files): """Given some repository and revision, find all changed/deleted files.""" if not parents: # first revision: feed in full manifest - modified=files - removed=[] + return files,[] else: # take the changes from the first parent f=repo.status(parents[0],revision) - modified=f.modified + f.added - removed=f.removed - return modified,removed + return f.modified+f.added,f.removed def get_author(logmessage,committer,authors): """As git distincts between author and committer of a patch, try to