This patch lacks updated documentation and tests.
This change adds the ability to group entries in a string list with single
quotes. So the string:
"'foo bar'"
becomes 1 token rather than 2. Consequently single quotes must now be escaped:
"\'"
resolves to a literal single quote.
Incidentally, this change also alters another, probably unintentional behaviour
of the previous implementation, in that tokens are separated by any amount of
whitespace rather than a single whitespace character. I.e.:
"a b"
resolves to:
'a' 'b'
rather than:
'a' '' 'b'
Whitespace is now also defined to include tabs.
parse_string_list can also now fail if it contains an unmatched open quote.
---
fish/fish.c | 129 +++++++++++++++++++++++++++++++++++++++++++++--------
src/generator.ml | 3 +-
2 files changed, 111 insertions(+), 21 deletions(-)
diff --git a/fish/fish.c b/fish/fish.c
index a4069d6..62ec3a3 100644
--- a/fish/fish.c
+++ b/fish/fish.c
@@ -1082,30 +1082,119 @@ is_true (const char *str)
strcasecmp (str, "no") != 0;
}
-/* XXX We could improve list parsing. */
char **
parse_string_list (const char *str)
{
- char **argv;
- const char *p, *pend;
- int argc, i;
-
- argc = 1;
- for (i = 0; str[i]; ++i)
- if (str[i] == ' ') argc++;
-
- argv = malloc (sizeof (char *) * (argc+1));
- if (argv == NULL) { perror ("malloc"); exit (1); }
-
- p = str;
- i = 0;
- while (*p) {
- pend = strchrnul (p, ' ');
- argv[i] = strndup (p, pend-p);
- i++;
- p = *pend == ' ' ? pend+1 : pend;
+ char **argv = NULL;
+ size_t argv_i = 0;
+
+ /* Current position pointer */
+ const char *p = str;
+
+ /* Token might be simple:
+ * Token
+ * or be quoted:
+ * 'This is a single token'
+ * or contain embedded single-quoted sections:
+ * This' is a sing'l'e to'ken
+ *
+ * The latter may seem over-complicated, but it's what a normal shell does.
+ * Not doing it risks surprising somebody.
+ *
+ * This outer loop is over complete tokens.
+ */
+ while(*p) {
+ char *tok = NULL;
+ size_t tok_len = 0;
+
+ /* Skip leading whitespace */
+ p += strspn (p, " \t");
+
+ char in_quote = 0;
+
+ /* This loop is over token 'fragments'. A token can be in multiple bits if
+ * it contains single quotes. We also treat a both sides of an escaped quote
+ * as separate fragments because we can't just copy it: we have to remove
+ * the \.
+ */
+ while (*p && (!isblank (*p) || in_quote)) {
+ const char *end = p;
+
+ /* Check if the fragment starts with a quote */
+ if ('\'' == *p) {
+ /* Toggle in_quote */
+ in_quote = !in_quote;
+
+ /* Skip the quote */
+ p++; end++;
+ }
+
+ /* If we're in a quote, look for an end quote */
+ if (in_quote) {
+ end += strcspn (end, "'");
+ }
+
+ /* Otherwise, look for whitespace or a quote */
+ else {
+ end += strcspn (end, " \t'");
+ }
+
+ /* Grow the token to accommodate the fragment */
+ char *tok_end = tok_len == 0 ? NULL : tok + tok_len;
+ tok_len += end - p;
+ tok = realloc (tok, tok_len + 1);
+ if (NULL == tok) { perror ("realloc"); exit (1); }
+ if (NULL == tok_end) tok_end = tok;
+
+ /* Check if we stopped on an escaped quote */
+ if ('\'' == *end && end != p && *(end-1) ==
'\\') {
+ /* Add everything before \' to the token */
+ memcpy (tok_end, p, end - p - 1);
+
+ /* Add the ' */
+ tok[tok_len-1] = '\'';
+
+ /* Already processed the quote */
+ p = end + 1;
+ }
+
+ else {
+ /* Add the whole fragment */
+ memcpy (tok_end, p, end - p);
+
+ p = end;
+ }
+ }
+
+ /* We've reached the end of a token. We shouldn't still be in quotes. */
+ if (in_quote) {
+ fprintf(stderr, _("Runaway quote in string \"%s\"\n"), str);
+
+ size_t i;
+ for (i = 0; i < argv_i; i++) {
+ free (argv[i]);
+ }
+ free(argv);
+
+ return NULL;
+ }
+
+ /* Add this token if there is one. There might not be if there was
+ * whitespace at the end of the input string */
+ if(tok) {
+ /* Add the NULL terminator */
+ tok[tok_len] = '\0';
+
+ /* Add the argument to the argument list */
+ argv = realloc(argv, sizeof(*argv) * argv_i + 1);
+ argv[argv_i] = tok;
+ argv_i++;
+ }
}
- argv[i] = NULL;
+
+ /* NULL terminate the argument list */
+ argv = realloc(argv, sizeof(*argv) * argv_i + 1);
+ argv[argv_i] = NULL;
return argv;
}
diff --git a/src/generator.ml b/src/generator.ml
index 5cf6a94..7571f95 100755
--- a/src/generator.ml
+++ b/src/generator.ml
@@ -6348,7 +6348,8 @@ and generate_fish_cmds () =
pr " %s = strcmp (argv[%d], \"-\") != 0 ? argv[%d] :
\"/dev/stdout\";\n"
name i i
| StringList name | DeviceList name ->
- pr " %s = parse_string_list (argv[%d]);\n" name i
+ pr " %s = parse_string_list (argv[%d]);\n" name i;
+ pr " if (%s == NULL) return -1;\n" name;
| Bool name ->
pr " %s = is_true (argv[%d]) ? 1 : 0;\n" name i
| Int name ->
--
1.6.2.5