3 Commits

Author SHA1 Message Date
Martin Freund
90483e02e5 Quote $PYTHON variable to support paths with spaces 2018-01-24 11:43:22 +01:00
Frej Drejhammar
cc8fefe008 Change syntax of mapping files
This is done to allow escape sequences in the key and value strings.
2017-10-02 13:05:14 +02:00
Frej Drejhammar
e174c2a0b7 Refactor load_mapping() to move line parsing to inner function
This is done in preparation to allowing mappings to contain quoted
characters.
2017-09-29 18:50:41 +02:00
3 changed files with 39 additions and 12 deletions

View File

@@ -65,12 +65,18 @@ As mercurial appears to be much less picky about the syntax of the
author information than git, an author mapping file can be given to author information than git, an author mapping file can be given to
hg-fast-export to fix up malformed author strings. The file is hg-fast-export to fix up malformed author strings. The file is
specified using the -A option. The file should contain lines of the specified using the -A option. The file should contain lines of the
form `FromAuthor=ToAuthor`. The example authors.map below will form `"<key>"="<value>"`. Inside the key and value strings, all escape
translate `User <garbage<user@example.com>` to `User <user@example.com>`. sequences understood by the python `string_escape` encoding are
supported. (Versions of fast-export prior to v171002 had a different
syntax, the old syntax can be enabled by the flag
`--mappings-are-raw`.)
The example authors.map below will translate `User
<garbage<tab><user@example.com>` to `User <user@example.com>`.
``` ```
-- Start of authors.map -- -- Start of authors.map --
User <garbage<user@example.com>=User <user@example.com> "User <garbage\t<user@example.com>"="User <user@example.com>"
-- End of authors.map -- -- End of authors.map --
``` ```

View File

@@ -294,7 +294,24 @@ def export_tags(ui,repo,old_marks,mapping_cache,count,authors,tagsmap):
count=checkpoint(count) count=checkpoint(count)
return count return count
def load_mapping(name, filename): def load_mapping(name, filename, mapping_is_raw):
raw_regexp=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
string_regexp='"(((\\.)|(\\")|[^"])*)"'
quoted_regexp=re.compile('^'+string_regexp+'[ ]*=[ ]*'+string_regexp+'$')
def parse_raw_line(line):
m=raw_regexp.match(line)
if m==None:
return None
return (m.group(1).strip(), m.group(2).strip())
def parse_quoted_line(line):
m=quoted_regexp.match(line)
if m==None:
return None
return (m.group(1).decode('string_escape'),
m.group(5).decode('string_escape'))
cache={} cache={}
if not os.path.exists(filename): if not os.path.exists(filename):
sys.stderr.write('Could not open mapping file [%s]\n' % (filename)) sys.stderr.write('Could not open mapping file [%s]\n' % (filename))
@@ -302,18 +319,19 @@ def load_mapping(name, filename):
f=open(filename,'r') f=open(filename,'r')
l=0 l=0
a=0 a=0
lre=re.compile('^([^=]+)[ ]*=[ ]*(.+)$')
for line in f.readlines(): for line in f.readlines():
l+=1 l+=1
line=line.strip() line=line.strip()
if line=='' or line[0]=='#': if l==1 and line[0]=='#' and line=='# quoted-escaped-strings':
continue continue
m=lre.match(line) elif line=='' or line[0]=='#':
continue
m=parse_raw_line(line) if mapping_is_raw else parse_quoted_line(line)
if m==None: if m==None:
sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l)) sys.stderr.write('Invalid file format in [%s], line %d\n' % (filename,l))
continue continue
# put key:value in cache, key without ^: # put key:value in cache, key without ^:
cache[m.group(1).strip()]=m.group(2).strip() cache[m[0]]=m[1]
a+=1 a+=1
f.close() f.close()
sys.stderr.write('Loaded %d %s\n' % (a, name)) sys.stderr.write('Loaded %d %s\n' % (a, name))
@@ -460,6 +478,8 @@ if __name__=='__main__':
help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>") help="Assume commit and author strings retrieved from Mercurial are encoded in <encoding>")
parser.add_option("--fe",dest="fn_encoding", parser.add_option("--fe",dest="fn_encoding",
help="Assume file names from Mercurial are encoded in <filename_encoding>") help="Assume file names from Mercurial are encoded in <filename_encoding>")
parser.add_option("--mappings-are-raw",dest="raw_mappings", default=False,
help="Assume mappings are raw <key>=<value> lines")
(options,args)=parser.parse_args() (options,args)=parser.parse_args()
@@ -474,15 +494,15 @@ if __name__=='__main__':
a={} a={}
if options.authorfile!=None: if options.authorfile!=None:
a=load_mapping('authors', options.authorfile) a=load_mapping('authors', options.authorfile, options.raw_mappings)
b={} b={}
if options.branchesfile!=None: if options.branchesfile!=None:
b=load_mapping('branches', options.branchesfile) b=load_mapping('branches', options.branchesfile, options.raw_mappings)
t={} t={}
if options.tagsfile!=None: if options.tagsfile!=None:
t=load_mapping('tags', options.tagsfile) t=load_mapping('tags', options.tagsfile, True)
if options.default_branch!=None: if options.default_branch!=None:
set_default_branch(options.default_branch) set_default_branch(options.default_branch)

View File

@@ -55,6 +55,7 @@ Options:
Mercurial are encoded in <encoding> Mercurial are encoded in <encoding>
--fe <filename_encoding> Assume filenames from Mercurial are encoded --fe <filename_encoding> Assume filenames from Mercurial are encoded
in <filename_encoding> in <filename_encoding>
--mappings-are-raw Assume mappings are raw <key>=<value> lines
" "
case "$1" in case "$1" in
-h|--help) -h|--help)
@@ -152,7 +153,7 @@ $(
exec 4>&3 3>&1 1>&4 4>&- exec 4>&3 3>&1 1>&4 4>&-
{ {
_e1=0 _e1=0
GIT_DIR="$GIT_DIR" $PYTHON "$ROOT/hg-fast-export.py" \ GIT_DIR="$GIT_DIR" "$PYTHON" "$ROOT/hg-fast-export.py" \
--repo "$REPO" \ --repo "$REPO" \
--marks "$GIT_DIR/$PFX-$SFX_MARKS" \ --marks "$GIT_DIR/$PFX-$SFX_MARKS" \
--mapping "$GIT_DIR/$PFX-$SFX_MAPPING" \ --mapping "$GIT_DIR/$PFX-$SFX_MAPPING" \