-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathshutil-make-optionhandler.py
executable file
·585 lines (523 loc) · 24.6 KB
/
shutil-make-optionhandler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# This script has been converted from Python 2 to Python 3.
# TODO:
# - Convert to use argparse.
# - Specify value separator for options that can be specified many
# times (syntax maybe *"sep", *(sep) or *[sep]).
# - Support specifying a hard line break in the option description.
# - Optionally strip quotes from values specified in a configuration
# file. The quote type should determine the quote type for the
# command-line arguments, overriding other specifications. The
# stripping option could be enabled by a ^ in the option
# specification after the quote type.
# - Obligatory options: error message if missing. Would it be better
# to output the error from the Python code or generate shell script
# code for checking if the option is specified?
# - Pass-through options: allow unrecognized options to be collected,
# probably to a separate output section, so that this script can be
# used to handle options in wrapper scripts that process only some
# of the options while passing the rest to a called script. That
# requires either converting to use argparse (parse_known_args) or
# subclassing OptionParser (https://stackoverflow.com/a/9307174).
# - Options without a value in the configuration file (by using
# allow_no_value=True).
# - A marker in the option specification to allow non-option arguments
# interspersed with options. This cannot be a script option, since
# interspersed arguments would need to be enabled before parsing
# script arguments.
# - More generally, a method of passing script options via the option
# specification input. That would make sense in particular for
# specifying the option name for configuration file.
"""
shutil-make-optionhandler.py
Usage: shutil-make-optionhandler.py [options] [script_options]
[script_arguments] < option_specifications
Generate option processing code for Bourne-like shell scripts (should
also work in other shells than Bash) based on option specifications
read from the standard input. script_options and script_arguments are
passed to the target (calling) script, the options possibly modified
by values read from a configuration file (INI-syntax).
An option specification is of the form
optname1|...|optnamen[=ARG] ["default"] ['|"] [*] [[!]target] [{code}]
description
...
The first line may not have leading whitespace, whereas the
description lines must have. The components are as follows:
- optnameN: Option names, separated by vertical bars. Single-letter
option names correspond to short options, others to long ones.
- =ARG: If specified, the option takes an argument. ARG is used in the
usage message for the option.
- "default": The default (initial) value for the variable
corresponding to the option, enclosed in double quotes. References
to shell variables are (typically) expanded in the shell script. A
double quote (or a backslash) in the value itself must be escaped by
a backslash.
- '|": The option values read from a configuration file should be
enclosed in this kind of quotes in the generated command-line
arguments instead of the default ones (double quotes unless
--_config-values-single-quoted is specified).
- *: The option may be specified multiple times. If option handler
code is specified ({code}), it will be called separately for each
value. Otherwise, the target value will contain all the argument
values separated by a newline. The asterisk must be preceded and may
be followed by whitespace.
- target: The shell variable corresponding to the option value. If
not specified, the variable is the first long option name with
dashes converted to underscores. If the target is immediately
preceded by !, the default value (for an argumentless option) is 1
and the option resets it to the empty string.
- {code}: Shell script code (for example, a function call) to be
executed when encountering the option, instead of directly setting a
variable value. In the code, the option argument value can be
referred to as $1 and the option itself as $optname. A target may
be specified in addition to code; in that case, the target variable
is used for setting the default value. (Note that the specified code
is *not* executed at the initialization phase.) If no target nor
default value is specified, no variable is initialized.
- description: A description of the option for the usage message; may
span several lines, each beginning with whitespace; is subject to
reformatting (word wrapping). If the option has a default value, the
string "%(default)s" is replaced with it; if not present and if the
description does not contain the string "(default: " or "(Default:
", then "(default: DEFAULT)" is appended to the description (where
DEFAULT is the default value).
Empty lines and lines beginning with a # are ignored. A line may be
continued on the next line by ending in a backslash. This is useful in
particular for the first line. A continuation line may but need not
have leading whitespace, regardless of whether it continues the first
line or a description line. Any whitespace surrounding the
continuation backslash is replaced with a single space.
Options can be grouped with with lines of the form
@ label
where label is the label for the group to be shown above the options
following in the usage message.
Options to this script are distinguished from the target script
options by having their names prefixed with an underscore. The
currently recognized options are:
--_output-section-format=FORMAT: Format each output section according
to the format string FORMAT, which may contain the keys {name} for
section name and {content} for section content. Literal \n is
replaced with a newline. Default: "----- {name}\n{content}\n-----\n"
--_config-file=FILE: Read FILE as a configuration file
--_config-file-option-name=OPTION: Use the argument of the (target
script) option OPTION as the name of the configuration file to read
--_config-section=SECT: Read options from configuration file section
SECT; default: "Default", which may be at the beginning of the
config file without an explicit section heading.
--_config-values-single-quoted: Generate single-quoted strings from
option values specified in the configuration file, without expanding
shell (environment) variables or backquotes. (Single quotes are
allowed.) By default, the script generates double-quoted strings,
subject to shell variable and backquote expansion, so literal $, `
and \ must be protected by a backslash.
--_option-group-label-format=FORMAT: Format each option group label
according to the format string FORMAT, which must contain the key
{label} for the group label. Literal \n is replaced with a newline.
Default: "\n{label}:"
Note that option values specified on the command line are treated as
single-quoted strings when they are processed by this script; the
possible expansions have already been performed by the shell.
The syntax of configuration files have two extensions to that natively
supported by Python's ConfigParser module:
- Options may be specified at the beginning of the configuration file
without a section heading.
- An option may be specified multiple times in the configuration file
with different values, corresponding to a command-line option that
can be specified multiple times. For a single-value option, the last
specified value takes effect, as usual with ConfigParser.
The script generates the following sections:
- cmdline_args: target script options and arguments (appropriately
quoted for 'eval set -- "$args"'), taking into account values from a
configuration file;
- getopt_opts: getopt option specifications (arguments for -o and -l)
as shell variable assignments (for 'eval "..."');
- set_defaults: setting default values as shell variable assignments
(for 'eval "..."');
- opt_usage: option descriptions for a usage message; and
- opt_handler: the actual option handler (a case statement, for 'eval
"..."').
"""
import re
import textwrap
import configparser
from collections import defaultdict
# dict could be used for Python 3.8+, as it preserves insertion order
from collections import OrderedDict as ConfigBaseDict
from optparse import OptionParser
import korpimport3.util
class ShellOptionHandlerGenerator(korpimport3.util.BasicInputProcessor):
def __init__(self, args=None):
super().__init__()
self._optspecs = []
self._optspec_map = {}
self._opts = None
self._args = None
self._help_indent = {'opt': 2, 'text': 18, 'grouplabel': 0}
self._help_indent_text = dict(
(key, val * ' ') for key, val in self._help_indent.items())
self._help_width = 78
self._optspec_re = re.compile(
r'''(?P<optnames> [^\s=:]+)
(?: [=:] (?P<optargname> \S+) )?
(?: \s+ (?P<default> "([^\"\\]|\\.)*") )?
(?: \s+
(?: (?P<quotetype> [\'\"]) \s* )?
(?: (?P<targetmulti> \*) \s* )?
(?: (?P<targetneg> ! \s*)? (?P<target> [a-zA-Z0-9_]+) \s* )?
(?: \{ \s* (?P<targetcode> .*) \s* \} )?
)?''',
re.VERBOSE)
self._curr_optgroup = None
self._optgroups = []
def process_input_stream(self, stream, filename=None):
self._read_optspecs(stream)
if 'help' not in self._optspec_map:
self._add_help_opt()
# print repr(self._optspecs)
self._parse_opts()
# print repr(self._optspecs)
if self._opts._config_file:
self._read_config_file()
# print repr(self._optspecs)
self._write_output()
def _read_optspecs(self, stream):
optspec_lines = []
continued_line = []
for line in stream:
line_strip = line.strip()
if not line_strip or line_strip[0] == '#':
continue
if line[0] not in [' ', '\t'] and not continued_line:
self._add_optspec(optspec_lines)
optspec_lines = []
# FIXME: This does not allow a literal backslash at the
# end of a line. Should we use a double backslash for
# that?
if line_strip[-1] == '\\':
continued_line.append(line_strip[:-1].strip())
else:
optspec_lines.append(' '.join(continued_line + [line_strip]))
continued_line = []
if continued_line:
optspec_lines.append(' '.join(continued_line))
self._add_optspec(optspec_lines)
if not self._optgroups:
self._add_optgroup([])
def _add_optspec(self, optspec_lines, optgroup=None, prepend=False):
if not optspec_lines:
return
if optspec_lines[0][0] == '@':
self._add_optgroup(optspec_lines)
return
elif not self._optgroups:
self._add_optgroup([])
optspec = {}
mo = self._optspec_re.match(optspec_lines[0])
if not mo:
self.error('Invalid option specification line: ' + optspec_lines[0])
optspec.update(mo.groupdict())
optspec['names'] = [('-' + name if len(name) == 1 else '--' + name)
for name in optspec['optnames'].split('|')]
# print repr(optspec_lines[0]), repr(optspec)
for name in optspec['names']:
try:
_ = name.encode('ascii')
except UnicodeEncodeError as e:
self.error('Invalid non-ASCII option name: ' + name)
for name in optspec['names']:
self._optspec_map[name.strip('-')] = optspec
optspec['defaulttrue'] = (optspec['targetneg'] == '!')
if not optspec['target']:
long_opts = [name for name in optspec['names'] if len(name) > 2]
target_name = long_opts[0] if long_opts else optspec['names'][0]
optspec['pytarget'] = target_name.strip('-').replace('-', '_')
optspec['target'] = optspec['pytarget']
else:
optspec['pytarget'] = optspec['target']
optspec['explicit_target'] = True
optspec['descr'] = (
' '.join(optspec_lines[1:]) if len(optspec_lines) > 1 else '')
self._optspecs.append(optspec)
if optgroup is None:
optgroup = self._curr_optgroup
if prepend:
optgroup[0:0] = [optspec]
else:
optgroup.append(optspec)
def _add_optgroup(self, optspec_lines):
if not optspec_lines:
self._optgroups.append(('', []))
else:
self._optgroups.append((optspec_lines[0].strip('@').strip(), []))
self._curr_optgroup = self._optgroups[-1][1]
def _add_help_opt(self):
if self._optgroups[0][0] != '':
self._optgroups[0:0] = [('Help', [])]
self._add_optspec(['h|help {usage}', 'show this help'],
self._optgroups[0][1], prepend=True)
def _parse_opts(self):
optparser = OptionParser(usage='', add_help_option=False)
optparser.disable_interspersed_args()
script_opts = [
[['config-file'], dict()],
[['config-file-option-name'], dict()],
[['config-section'], dict(default='Default')],
[['config-values-single-quoted'], dict(action='store_true')],
[['output-section-format'],
dict(default='----- {name}\n{content}\n-----\n')],
[['option-group-label-format'], dict(default='\n{label}:')],
]
for optnames, optopts in script_opts:
optparser.add_option(*['--_' + name for name in optnames],
**optopts)
for optspec in self._optspecs:
optopts = {'dest': optspec['pytarget']}
if optspec['optargname'] is None:
optopts['action'] = 'store_true'
elif optspec['targetmulti'] is not None:
optopts['action'] = 'append'
# print repr(optspec['names']), repr(optopts)
optparser.add_option(*optspec['names'], **optopts)
try:
self._opts, self._args = optparser.parse_args()
except UnicodeDecodeError as e:
self.error('Probably a non-ASCII character in an option name on'
' the command-line: ' + str(e))
config_file_opt = self._opts._config_file_option_name
if config_file_opt:
optval = getattr(self._opts,
config_file_opt.lstrip('-').replace('-', '_'),
None)
if optval:
self._opts._config_file = optval
self._opts._output_section_format = str(
self._opts._output_section_format.replace('\\n', '\n'))
self._opts._option_group_label_format = str(
self._opts._option_group_label_format.replace('\\n', '\n'))
for optspec in self._optspecs:
optspec['value'] = getattr(self._opts, optspec['pytarget'], None)
class ListExtendDict(ConfigBaseDict):
"""Extend old value with new with a '' between if both are lists.
When setting a value of a key in a dictionary, if a previous
value exists and if both the previous and the new value are
lists, instead of replacing the old value with the new one,
extend the existing list with the new one, with an empty
string element in between.
This is used to make ConfigParser handle options that may be
specified multiple times in the configuration file. They will
have two consecutive newlines between the different values for
an option, whereas a single multi-line value has a single
newline between each line.
NOTE: This works because ConfigParser._read() in Python 3
(and 2.7) internally collects multi-line values to lists. If
that changes, this probably will not work.
"""
def __setitem__(self, key, val):
# print key, repr(val), repr(self)
if (key in self and isinstance(self[key], list)
and isinstance(val, list)):
# Python 2.7 and 3
self[key].append('')
self[key].extend(val)
return
super().__setitem__(key, val)
def _read_config_file(self):
def default_config_reader(fp):
"""Add a [Default] section at the beginning of the config file."""
yield '[Default]\n'
for line in fp:
yield line
try:
# strict=False to allow multiple values for the same option
confparser = configparser.ConfigParser(
dict_type=self.ListExtendDict, strict=False)
# Do not lowercase, as camel case is converted to
# hyphenated command-line option names later
confparser.optionxform = str
with open(self._opts._config_file, 'r',
encoding=self._input_encoding) as conff:
confparser.read_file(default_config_reader(conff),
self._opts._config_file)
# raw=True: Do not expand %(...) variable references in
# option values
config_items = confparser.items(self._opts._config_section,
raw=True)
except configparser.Error as e:
self.error('Parsing configuration file: ' + str(e),
filename=self._opts._config_file)
self._set_config_options(config_items)
def _set_config_options(self, config_items):
def camel2dash(mo):
return '-' + mo.group(1).lower()
for name, val in config_items:
optname = (name[0].lower() + name[1:]).replace('_', '-')
optname = re.sub(r'(?<=[a-z])([A-Z])(?=[a-z])', camel2dash,
optname)
# print name, val, optname
optspec = self._optspec_map.get(optname)
if optspec is None:
self.warn('Unrecognized configuration option: ' + name)
elif optspec.get('value') is None:
vals = val.split('\n\n')
if optspec['targetmulti'] is not None:
optspec['value'] = vals
else:
# Take the last value
optspec['value'] = vals[-1]
optspec['valuefromconfig'] = True
def _write_output(self):
sectnames = [
'cmdline_args',
'getopt_opts',
'set_defaults',
'opt_usage',
'opt_handler',
]
for sectname in sectnames:
self.output(self._opts._output_section_format.format(
name=sectname,
content=(getattr(self, '_make_output_' + sectname)())))
def _shell_quote(self, text, type_='double'):
quote1 = '"' if type_ == 'double' else '\''
quote2 = '\'' if type_ == 'double' else '"'
replquote = quote1 + quote2 + quote1 + quote2 + quote1
return quote1 + text.replace(quote1, replquote) + quote1
def _make_output_cmdline_args(self):
opts = []
for optspec in self._optspecs:
optval = optspec['value']
if optval is not None:
if optspec.get('targetmulti'):
if optspec.get('targetcode'):
for value in optval:
opts.extend(self._make_cmdline_opt(optspec, value))
else:
opts.extend(
self._make_cmdline_opt(optspec, '\n'.join(optval)))
else:
opts.extend(self._make_cmdline_opt(optspec, optval))
return ' '.join(opts + [self._shell_quote(arg) for arg in self._args])
def _make_cmdline_opt(self, optspec, optval):
opts = []
opts.append(optspec['names'][0])
if optspec.get('optargname'):
quote_type = (
'double' if (optspec.get('valuefromconfig')
and ((not self._opts._config_values_single_quoted
and optspec['quotetype'] != '\'')
or (self._opts._config_values_single_quoted
and optspec['quotetype'] == '"')))
else 'single')
opts.append(self._shell_quote(optval, quote_type))
return opts
def _make_output_getopt_opts(self):
shortopts = []
longopts = []
for optspec in self._optspecs:
argmarker = ':' if optspec.get('optargname') else ''
shortopts.extend(name.strip('-') + argmarker
for name in optspec['names'] if len(name) == 2)
longopts.extend(name.strip('-') + argmarker
for name in optspec['names'] if len(name) > 2)
# Prepending a + to the short option string makes GNU getopt
# stop parsing options as soon as it encounters the first
# non-option argument.
return ('shortopts="+' + ''.join(shortopts) + '"\n'
'longopts="' + ','.join(longopts) + '"')
def _make_output_set_defaults(self):
defaults = []
for optspec in self._optspecs:
if (optspec.get('default') or optspec.get('explicit_target')
or not optspec.get('targetcode')):
defaultval = (optspec.get('default')
or ('1' if optspec.get('defaulttrue') else ''))
defaults.append(optspec.get('target') + '=' + defaultval)
return '\n'.join(defaults)
def _make_output_opt_usage(self):
usage = []
for optgroup in self._optgroups:
label, optspecs = optgroup
if label != '':
usage.append(self._opts._option_group_label_format.format(
label=self._wrap_usage_text(label, 'grouplabel',
break_on_hyphens=False)))
for optspec in optspecs:
usage.extend(self._make_opt_usage_single(optspec))
return '\n'.join(usage)
def _make_opt_usage_single(self, optspec):
optlist = ', '.join(optspec['names'])
optarg = optspec.get('optargname')
if optarg:
optlist += ' ' + optarg
optlist = self._wrap_usage_text(optlist, 'opt', break_on_hyphens=False)
helptext = optspec.get('descr') or ''
default = optspec.get('default')
if default:
if '%(default)s' in helptext:
helptext = helptext.replace('%(default)s', default)
elif not re.search(r'\([Dd]efault: ', helptext):
# FIXME: Testing for "(default: " is potentially a bit fragile;
# would it be better to have some kind of a directive in the
# description to suppress the default value?
if helptext and helptext[-1] == '.':
helptext = helptext[:-1] + ' (default: ' + default + ').'
else:
helptext += ((' ' if helptext else '')
+ '(default: ' + default + ')')
if helptext:
helptext = self._wrap_usage_text(helptext, 'text')
if len(optlist) <= self._help_indent['text'] - 2:
return [optlist + helptext[len(optlist):]]
elif helptext:
return [optlist, helptext]
else:
return [optlist]
def _wrap_usage_text(self, text, text_type, break_on_hyphens=True):
return textwrap.fill(
text, width=self._help_width,
initial_indent=self._help_indent_text[text_type],
subsequent_indent=self._help_indent_text[text_type],
break_on_hyphens=break_on_hyphens)
def _make_output_opt_handler(self):
code = [
'''while [ "x$1" != "x" ]; do
optname=$1
case "$optname" in''']
for optspec in self._optspecs:
code.extend(self._make_opt_handler_single(optspec))
code.append(
'''
-- )
shift
break
;;
--* )
warn "Unrecognized option: $1"
;;
* )
break
;;
esac
shift
done''')
return '\n'.join(code)
def _make_opt_handler_single(self, optspec):
indent8 = ' ' * 8
indent12 = ' ' * 12
code = []
code.append(indent8 + ' | '.join(optspec['names']) + ' )')
if optspec.get('optargname'):
code.append(indent12 + 'shift')
if optspec.get('targetcode'):
set_line = optspec.get('targetcode')
else:
set_line = (
optspec['target'] + '='
+ ('$1' if optspec.get('optargname')
else ('' if optspec.get('defaulttrue') else '1')))
code.append(indent12 + set_line)
code.append(indent12 + ';;')
return code
if __name__ == "__main__":
ShellOptionHandlerGenerator().run()