1 from __future__ import absolute_import
5 from lit.ShCommands import Command, Pipeline, Seq
8 def __init__(self, data, win32Escapes = False):
12 self.win32Escapes = win32Escapes
15 c = self.data[self.pos]
20 return self.data[self.pos]
22 def maybe_eat(self, c):
24 maybe_eat(c) - Consume the character c if it is the next character,
25 returning True if a character was consumed. """
26 if self.data[self.pos] == c:
31 def lex_arg_fast(self, c):
32 # Get the leading whitespace free section.
33 chunk = self.data[self.pos - 1:].split(None, 1)[0]
35 # If it has special characters, the fast path failed.
36 if ('|' in chunk or '&' in chunk or
37 '<' in chunk or '>' in chunk or
38 "'" in chunk or '"' in chunk or
39 ';' in chunk or '\\' in chunk):
42 self.pos = self.pos - 1 + len(chunk)
45 def lex_arg_slow(self, c):
47 str = self.lex_arg_quoted(c)
50 while self.pos != self.end:
52 if c.isspace() or c in "|&;":
55 # This is an annoying case; we treat '2>' as a single token so
56 # we don't have to track whitespace tokens.
58 # If the parse string isn't an integer, do the usual thing.
62 # Otherwise, lex the operator and convert to a redirection
65 tok = self.lex_one_token()
66 assert isinstance(tok, tuple) and len(tok) == 1
70 str += self.lex_arg_quoted('"')
73 str += self.lex_arg_quoted("'")
74 elif not self.win32Escapes and c == '\\':
75 # Outside of a string, '\\' escapes everything.
77 if self.pos == self.end:
79 "escape at end of quoted argument in: %r" % self.data)
86 def lex_arg_quoted(self, delim):
88 while self.pos != self.end:
92 elif c == '\\' and delim == '"':
93 # Inside a '"' quoted string, '\\' only escapes the quote
94 # character and backslash, otherwise it is preserved.
95 if self.pos == self.end:
97 "escape at end of quoted argument in: %r" % self.data)
108 lit.util.warning("missing quote character in %r" % self.data)
111 def lex_arg_checked(self, c):
113 res = self.lex_arg_fast(c)
117 reference = self.lex_arg_slow(c)
120 raise ValueError("Fast path failure: %r != %r" % (
123 raise ValueError("Fast path failure: %r != %r" % (
127 def lex_arg(self, c):
128 return self.lex_arg_fast(c) or self.lex_arg_slow(c)
130 def lex_one_token(self):
132 lex_one_token - Lex a single 'sh' token. """
138 if self.maybe_eat('|'):
142 if self.maybe_eat('&'):
144 if self.maybe_eat('>'):
148 if self.maybe_eat('&'):
150 if self.maybe_eat('>'):
154 if self.maybe_eat('&'):
156 if self.maybe_eat('>'):
160 return self.lex_arg(c)
163 while self.pos != self.end:
164 if self.look().isspace():
167 yield self.lex_one_token()
172 def __init__(self, data, win32Escapes = False, pipefail = False):
174 self.pipefail = pipefail
175 self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
178 for item in self.tokens:
184 if token is not None:
185 self.tokens = itertools.chain([token], self.tokens)
188 def parse_command(self):
191 raise ValueError("empty command!")
192 if isinstance(tok, tuple):
193 raise ValueError("syntax error near unexpected token %r" % tok[0])
204 # If this is an argument, just add it to the current command.
205 if isinstance(tok, str):
206 args.append(self.lex())
209 # Otherwise see if it is a terminator.
210 assert isinstance(tok, tuple)
211 if tok[0] in ('|',';','&','||','&&'):
214 # Otherwise it must be a redirection.
218 raise ValueError("syntax error near token %r" % op[0])
219 redirects.append((op, arg))
221 return Command(args, redirects)
223 def parse_pipeline(self):
226 commands = [self.parse_command()]
227 while self.look() == ('|',):
229 commands.append(self.parse_command())
230 return Pipeline(commands, negate, self.pipefail)
233 lhs = self.parse_pipeline()
236 operator = self.lex()
237 assert isinstance(operator, tuple) and len(operator) == 1
241 "missing argument to operator %r" % operator[0])
243 # FIXME: Operator precedence!!
244 lhs = Seq(lhs, operator[0], self.parse_pipeline())
252 class TestShLexer(unittest.TestCase):
253 def lex(self, str, *args, **kwargs):
254 return list(ShLexer(str, *args, **kwargs).lex())
256 def test_basic(self):
257 self.assertEqual(self.lex('a|b>c&d<e;f'),
258 ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd',
259 ('<',), 'e', (';',), 'f'])
261 def test_redirection_tokens(self):
262 self.assertEqual(self.lex('a2>c'),
264 self.assertEqual(self.lex('a 2>c'),
267 def test_quoting(self):
268 self.assertEqual(self.lex(""" 'a' """),
270 self.assertEqual(self.lex(""" "hello\\"world" """),
272 self.assertEqual(self.lex(""" "hello\\'world" """),
274 self.assertEqual(self.lex(""" "hello\\\\world" """),
276 self.assertEqual(self.lex(""" he"llo wo"rld """),
278 self.assertEqual(self.lex(""" a\\ b a\\\\b """),
280 self.assertEqual(self.lex(""" "" "" """),
282 self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True),
285 class TestShParse(unittest.TestCase):
286 def parse(self, str):
287 return ShParser(str).parse()
289 def test_basic(self):
290 self.assertEqual(self.parse('echo hello'),
291 Pipeline([Command(['echo', 'hello'], [])], False))
292 self.assertEqual(self.parse('echo ""'),
293 Pipeline([Command(['echo', ''], [])], False))
294 self.assertEqual(self.parse("""echo -DFOO='a'"""),
295 Pipeline([Command(['echo', '-DFOO=a'], [])], False))
296 self.assertEqual(self.parse('echo -DFOO="a"'),
297 Pipeline([Command(['echo', '-DFOO=a'], [])], False))
299 def test_redirection(self):
300 self.assertEqual(self.parse('echo hello > c'),
301 Pipeline([Command(['echo', 'hello'],
302 [((('>'),), 'c')])], False))
303 self.assertEqual(self.parse('echo hello > c >> d'),
304 Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
305 (('>>',), 'd')])], False))
306 self.assertEqual(self.parse('a 2>&1'),
307 Pipeline([Command(['a'], [(('>&',2), '1')])], False))
309 def test_pipeline(self):
310 self.assertEqual(self.parse('a | b'),
311 Pipeline([Command(['a'], []),
315 self.assertEqual(self.parse('a | b | c'),
316 Pipeline([Command(['a'], []),
322 self.assertEqual(self.parse('a ; b'),
323 Seq(Pipeline([Command(['a'], [])], False),
325 Pipeline([Command(['b'], [])], False)))
327 self.assertEqual(self.parse('a & b'),
328 Seq(Pipeline([Command(['a'], [])], False),
330 Pipeline([Command(['b'], [])], False)))
332 self.assertEqual(self.parse('a && b'),
333 Seq(Pipeline([Command(['a'], [])], False),
335 Pipeline([Command(['b'], [])], False)))
337 self.assertEqual(self.parse('a || b'),
338 Seq(Pipeline([Command(['a'], [])], False),
340 Pipeline([Command(['b'], [])], False)))
342 self.assertEqual(self.parse('a && b || c'),
343 Seq(Seq(Pipeline([Command(['a'], [])], False),
345 Pipeline([Command(['b'], [])], False)),
347 Pipeline([Command(['c'], [])], False)))
349 self.assertEqual(self.parse('a; b'),
350 Seq(Pipeline([Command(['a'], [])], False),
352 Pipeline([Command(['b'], [])], False)))
354 if __name__ == '__main__':