Change syntax of mapping files

This is done to allow escape sequences in the key and value strings.
This commit is contained in:
Frej Drejhammar
2017-09-30 14:51:24 +02:00
parent e174c2a0b7
commit cc8fefe008
3 changed files with 26 additions and 8 deletions

View File

@@ -65,12 +65,18 @@ As mercurial appears to be much less picky about the syntax of the
author information than git, an author mapping file can be given to
hg-fast-export to fix up malformed author strings. The file is
specified using the -A option. The file should contain lines of the
form `FromAuthor=ToAuthor`. The example authors.map below will
translate `User <garbage<user@example.com>` to `User <user@example.com>`.
form `"<key>"="<value>"`. Inside the key and value strings, all escape
sequences understood by the python `string_escape` encoding are
supported. (Versions of fast-export prior to v171002 had a different
syntax, the old syntax can be enabled by the flag
`--mappings-are-raw`.)
The example authors.map below will translate `User
<garbage<tab><user@example.com>` to `User <user@example.com>`.
```
-- Start of authors.map --
User <garbage<user@example.com>=User <user@example.com>
"User <garbage\t<user@example.com>"="User <user@example.com>"
-- End of authors.map --
```

View File

@@ -294,8 +294,10 @@ def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap):
count=checkpoint(count)
return count
def load_mapping(name, filename):
def load_mapping(name, filename, mapping_is_raw):
raw_regexp=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
string_regexp='"(((\\.)|(\\")|[^"])*)"'
quoted_regexp=re.compile('^'+string_regexp+'[ ]*=[ ]*'+string_regexp+'$')
def parse_raw_line(line):
m=raw_regexp.match(line)
@@ -303,6 +305,13 @@ def load_mapping(name, filename):
return None
return (m.group(1).strip(), m.group(2).strip())
def parse_quoted_line(line):
m=quoted_regexp.match(line)
if m==None:
return None
return (m.group(1).decode('string_escape'),
m.group(5).decode('string_escape'))
cache={}
if not os.path.exists(filename):
sys.stderr.write('Could not open mapping file [%s]\n' % (filename))
@@ -317,7 +326,7 @@ def load_mapping(name, filename):
continue
elif line=='' or line[0]=='#':
continue
m=parse_raw_line(line)
m=parse_raw_line(line) if mapping_is_raw else parse_quoted_line(line)
if m==None:
sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
continue
@@ -469,6 +478,8 @@ if __name__=='__main__':
help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
parser.add_option("--fe",dest="fn_encoding",
help="Assume file names from Mercurial are encoded in <filename_encoding>")
parser.add_option("--mappings-are-raw",dest="raw_mappings", default=False,
help="Assume mappings are raw <key>=<value> lines")
(options,args)=parser.parse_args()
@@ -483,15 +494,15 @@ if __name__=='__main__':
a={}
if options.authorfile!=None:
a=load_mapping('authors', options.authorfile)
a=load_mapping('authors', options.authorfile, options.raw_mappings)
b={}
if options.branchesfile!=None:
b=load_mapping('branches', options.branchesfile)
b=load_mapping('branches', options.branchesfile, options.raw_mappings)
t={}
if options.tagsfile!=None:
t=load_mapping('tags', options.tagsfile)
t=load_mapping('tags', options.tagsfile, True)
if options.default_branch!=None:
set_default_branch(options.default_branch)

View File

@@ -55,6 +55,7 @@ Options:
Mercurial are encoded in <encoding>
--fe <filename_encoding> Assume filenames from Mercurial are encoded
in <filename_encoding>
--mappings-are-raw Assume mappings are raw <key>=<value> lines
"
case "$1" in
-h|--help)