From 89db1d93cf61a6347b0c3f362d842d58a0e42867 Mon Sep 17 00:00:00 2001 From: Anton Tykhyy Date: Sun, 17 Jun 2018 21:09:59 +0300 Subject: [PATCH] Add --filter-contents --- README.md | 21 +++++++++++++++++++++ hg-fast-export.py | 36 ++++++++++++++++++++++++++++-------- hg-fast-export.sh | 2 ++ 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 702c7f6..3219240 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,27 @@ name the -B and -T options allow a mapping file to be specified to rename branches and tags (respectively). The syntax of the mapping file is the same as for the author mapping. +Content filtering +----------------- + +hg-fast-export supports filtering the content of exported files. +The filter is supplied to the --filter-contents option. hg-fast-export +runs the filter for each exported file, pipes its content to the filter's +standard input, and uses the filter's standard output in place +of the file's original content. The prototypical use of this feature +is to convert line endings in text files from CRLF to git's preferred LF: + +``` +-- Start of crlf-filter.sh -- +#!/bin/sh +# $1 = pathname of exported file relative to the root of the repo +# $2 = Mercurial's hash of the file +# $3 = "1" if Mercurial reports the file as binary, otherwise "0" + +if [ "$3" == "1" ]; then cat; else dos2unix; fi +-- End of crlf-filter.sh -- +``` + Notes/Limitations ----------------- diff --git a/hg-fast-export.py b/hg-fast-export.py index 0714b30..2394b2e 100755 --- a/hg-fast-export.py +++ b/hg-fast-export.py @@ -123,7 +123,7 @@ def get_author(logmessage,committer,authors): return r return committer -def export_file_contents(ctx,manifest,files,hgtags,encoding=''): +def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=None): count=0 max=len(files) for file in files: @@ -131,11 +131,24 @@ def export_file_contents(ctx,manifest,files,hgtags,encoding=''): if not hgtags and file == ".hgtags": sys.stderr.write('Skip %s\n' % (file)) continue - d=ctx.filectx(file).data() if encoding: filename=file.decode(encoding).encode('utf8') else: filename=file + file_ctx=ctx.filectx(file) + d=file_ctx.data() + if filter_contents: + import subprocess + filter_cmd=filter_contents + [filename,node.hex(file_ctx.filenode()),'1' if file_ctx.isbinary() else '0'] + try: + filter_proc=subprocess.Popen(filter_cmd,stdin=subprocess.PIPE,stdout=subprocess.PIPE) + d,_=filter_proc.communicate(d) + except: + sys.stderr.write('Running filter-contents %s:\n' % filter_cmd) + raise + filter_ret=filter_proc.poll() + if filter_ret: + raise subprocess.CalledProcessError(filter_ret,filter_cmd) wr('M %s inline %s' % (gitmode(manifest.flags(file)), strip_leading_slash(filename))) wr('data %d' % len(d)) # had some trouble with size() @@ -185,7 +198,7 @@ def strip_leading_slash(filename): return filename def export_commit(ui,repo,revision,old_marks,max,count,authors, - branchesmap,sob,brmap,hgtags,encoding='',fn_encoding=''): + branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',filter_contents=None): def get_branchname(name): if brmap.has_key(name): return brmap[name] @@ -246,8 +259,8 @@ def export_commit(ui,repo,revision,old_marks,max,count,authors, removed=[strip_leading_slash(x) for x in removed] map(lambda r: wr('D %s' % r),removed) - export_file_contents(ctx,man,added,hgtags,fn_encoding) - export_file_contents(ctx,man,changed,hgtags,fn_encoding) + export_file_contents(ctx,man,added,hgtags,fn_encoding,filter_contents) + export_file_contents(ctx,man,changed,hgtags,fn_encoding,filter_contents) wr() return checkpoint(count) @@ -383,7 +396,7 @@ def verify_heads(ui,repo,cache,force,branchesmap): def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, authors={},branchesmap={},tagsmap={}, - sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding=''): + sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='',filter_contents=None): def check_cache(filename, contents): if len(contents) == 0: sys.stderr.write('Warning: %s does not contain any data, this will probably make an incremental import fail\n' % filename) @@ -425,7 +438,7 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile, brmap={} for rev in range(min,max): c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap, - sob,brmap,hgtags,encoding,fn_encoding) + sob,brmap,hgtags,encoding,fn_encoding,filter_contents) if notes: for rev in range(min,max): c=export_note(ui,repo,rev,c,authors, encoding, rev == min and min != 0) @@ -485,6 +498,8 @@ if __name__=='__main__': help="Assume file names from Mercurial are encoded in ") parser.add_option("--mappings-are-raw",dest="raw_mappings", default=False, help="Assume mappings are raw = lines") + parser.add_option("--filter-contents",dest="filter_contents", + help="Pipe contents of each exported file through FILTER_CONTENTS ") (options,args)=parser.parse_args() @@ -523,8 +538,13 @@ if __name__=='__main__': if options.fn_encoding!=None: fn_encoding=options.fn_encoding + filter_contents=None + if options.filter_contents!=None: + import shlex + filter_contents=shlex.split(options.filter_contents) + sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile, options.headsfile, options.statusfile, authors=a,branchesmap=b,tagsmap=t, sob=options.sob,force=options.force,hgtags=options.hgtags, - notes=options.notes,encoding=encoding,fn_encoding=fn_encoding)) + notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,filter_contents=filter_contents)) diff --git a/hg-fast-export.sh b/hg-fast-export.sh index 1e9be36..5f4e4da 100755 --- a/hg-fast-export.sh +++ b/hg-fast-export.sh @@ -56,6 +56,8 @@ Options: --fe Assume filenames from Mercurial are encoded in --mappings-are-raw Assume mappings are raw = lines + --filter-contents Pipe contents of each exported file through + with as arguments " case "$1" in -h|--help)