# HG changeset patch # User Ben Schmidt # Date 1330434226 -39600 # Node ID efd01230d20a8727e6a76374b473bf822e3db718 # Parent 3168aed4b01a92b508ebeb6450123b52e66cde6d Fix backslash escaping mechanism. Ensuring: - double backslash can't effectively recurse and form part of another escape sequence (because backslash is treated both in unistr.c and prepstdreply.c) - other non-unicode escapes aren't ignored (because they are turned into question marks in unistr.c) - first lines of included files don't 'escape' escaping because they are retrieved in begin_new_*() rather than get_processed_text_line(). - files intended to be included transparently aren't escaped, and upcoming lines aren't doubly escaped diff -r 3168aed4b01a -r efd01230d20a ChangeLog --- a/ChangeLog Wed Feb 22 00:11:07 2012 +1100 +++ b/ChangeLog Wed Feb 29 00:03:46 2012 +1100 @@ -1,3 +1,6 @@ + o Fix backslash escaping mechanism so double backslash can't effectively + recurse and form part of another escape sequence, other non-unicode escapes + aren't ignored, and first lines of included files don't 'escape' escaping. o Add ability to subscribe to both (normal and digest). o Fix access logic so subonlypost doesn't override a send access rule. o Make +unsubscribe remove the requester from all versions of the list. diff -r 3168aed4b01a -r efd01230d20a include/unistr.h --- a/include/unistr.h Wed Feb 22 00:11:07 2012 +1100 +++ b/include/unistr.h Wed Feb 29 00:03:46 2012 +1100 @@ -34,16 +34,16 @@ unistr *unistr_new(void); void unistr_free(unistr *str); -int unistr_cmp(unistr *str1, unistr *str2); -unistr *unistr_dup(unistr *str); +int unistr_cmp(const unistr *str1, const unistr *str2); +unistr *unistr_dup(const unistr *str); void unistr_append_char(unistr *str, unistr_char uc); -void unistr_append_usascii(unistr *str, char *binary, size_t bin_len); -void unistr_append_utf8(unistr *str, char *binary, size_t bin_len); -void unistr_append_iso88591(unistr *str, char *binary, size_t bin_len); -void unistr_dump(unistr *str); -char *unistr_to_utf8(unistr *str); -char *unistr_header_to_utf8(char *str); -char *unistr_utf8_to_header(char *str); -char *unistr_escaped_to_utf8(char *str); +void unistr_append_usascii(unistr *str, const char *binary, size_t bin_len); +void unistr_append_utf8(unistr *str, const char *binary, size_t bin_len); +void unistr_append_iso88591(unistr *str, const char *binary, size_t bin_len); +void unistr_dump(const unistr *str); +char *unistr_to_utf8(const unistr *str); +char *unistr_header_to_utf8(const char *str); +char *unistr_utf8_to_header(const char *str); +char *unistr_escaped_to_utf8(const char *str); #endif diff -r 3168aed4b01a -r efd01230d20a src/prepstdreply.c --- a/src/prepstdreply.c Wed Feb 22 00:11:07 2012 +1100 +++ b/src/prepstdreply.c Wed Feb 29 00:03:46 2012 +1100 @@ -562,10 +562,10 @@ static void begin_new_source_file(text *txt, char **line_p, char **pos_p, - const char *filename) { + const char *filename, int transparent) { char *line = *line_p; char *pos = *pos_p; - char *tmp; + char *tmp, *esc; source *src; int fd; size_t len; @@ -593,7 +593,7 @@ src->suffix = NULL; src->fd = fd; src->fmt = NULL; - src->transparent = 0; + src->transparent = transparent; src->limit = -1; txt->src = src; tmp = mygetline(fd); @@ -602,6 +602,11 @@ **pos_p = '\0'; return; } + if (!transparent) { + esc = unistr_escaped_to_utf8(tmp); + myfree(tmp); + tmp = esc; + } line = concatstr(2, line, tmp); *pos_p = line + (*pos_p - *line_p); myfree(*line_p); @@ -611,7 +616,7 @@ static void begin_new_formatted_source(text *txt, char **line_p, char **pos_p, - char *suffix, formatted *fmt) { + char *suffix, formatted *fmt, int transparent) { char *line = *line_p; char *pos = *pos_p; const char *str; @@ -640,7 +645,7 @@ } src->fd = -1; src->fmt = fmt; - src->transparent = 0; + src->transparent = transparent; src->limit = -1; txt->src = src; str = (*fmt->get)(fmt->state); @@ -650,6 +655,7 @@ *pos_p = *line_p; return; } + if (!transparent) str = unistr_escaped_to_utf8(str); line = concatstr(2, line, str); /* The suffix will be added back in get_processed_text_line() */ *pos_p = line + strlen(line); @@ -911,7 +917,7 @@ token = filename_token(token + 8); if (token != NULL) { filename = concatstr(3, listdir, "/control/", token); - begin_new_source_file(txt, line_p, pos_p, filename); + begin_new_source_file(txt, line_p, pos_p, filename, 0); myfree(filename); return 0; } @@ -919,7 +925,7 @@ token = filename_token(token + 5); if (token != NULL) { filename = concatstr(3, listdir, "/text/", token); - begin_new_source_file(txt, line_p, pos_p, filename); + begin_new_source_file(txt, line_p, pos_p, filename, 0); myfree(filename); return 0; } @@ -938,8 +944,7 @@ } if (limit != 0) { begin_new_source_file(txt, line_p, pos_p, - txt->mailname); - txt->src->transparent = 1; + txt->mailname, 1); if (limit == -1) txt->src->limit = -1; else txt->src->limit = limit - 1; return 0; @@ -958,7 +963,7 @@ while (fmt != NULL) { if (strcmp(token, fmt->token) == 0) { begin_new_formatted_source(txt, line_p, pos_p, - endpos + 1, fmt); + endpos + 1, fmt, 0); return 0; } fmt = fmt->next; @@ -1004,17 +1009,25 @@ } if (txt->src->limit != 0) { if (txt->src->fd != -1) { - txt->src->upcoming = - mygetline(txt->src->fd); + tmp = mygetline(txt->src->fd); } else if (txt->src->fmt != NULL) { item = (*txt->src->fmt->get)( txt->src->fmt->state); - if (item==NULL) txt->src->upcoming=NULL; - else txt->src->upcoming=mystrdup(item); + if (item==NULL) tmp = NULL; + else tmp = mystrdup(item); } else { - txt->src->upcoming = NULL; + tmp = NULL; } if (txt->src->limit > 0) txt->src->limit--; + if (tmp == NULL) { + txt->src->upcoming = NULL; + } else if (txt->src->transparent) { + txt->src->upcoming = tmp; + } else { + txt->src->upcoming = + unistr_escaped_to_utf8(tmp); + myfree(tmp); + } } else { txt->src->upcoming = NULL; } @@ -1027,10 +1040,6 @@ return NULL; } - tmp = unistr_escaped_to_utf8(line); - myfree(line); - line = tmp; - if (prev != NULL) { /* Wrapping */ len = strlen(prev); @@ -1120,23 +1129,28 @@ spc = pos - line; spcnext = spc + 1; } - } else if (*pos == '\\' && *(pos + 1) == ' ') { - if (txt->skip == NULL) { - spc = pos - line - 1; - spcnext = spc + 1; - } - *pos = '\0'; - tmp = concatstr(2, line, pos + 2); - pos = tmp + (pos - line); - myfree(line); - line = tmp; - continue; } else if (*pos == '\t') { /* Avoid breaking due to peeking */ } else if (txt->src->transparent) { /* Do nothing if the file is to be included * transparently */ if (peeking && txt->skip == NULL) break; + } else if (*pos == '\\' && txt->skip == NULL) { + if (peeking) break; + if (*(pos + 1) == ' ') { + spc = len - 1; + tmp = pos + 2; + } else { + /* Includes backslash */ + tmp = pos + 1; + } + *pos = '\0'; + tmp = concatstr(2, line, tmp); + pos = tmp + len; + myfree(line); + line = tmp; + skipwhite = 0; + continue; } else if (*pos == '$' && txt->skip == NULL) { if (peeking) break; substitute_one(&line, &pos, listaddr, diff -r 3168aed4b01a -r efd01230d20a src/unistr.c --- a/src/unistr.c Wed Feb 22 00:11:07 2012 +1100 +++ b/src/unistr.c Wed Feb 29 00:03:46 2012 +1100 @@ -59,7 +59,7 @@ } -int unistr_cmp(unistr *str1, unistr *str2) +int unistr_cmp(const unistr *str1, const unistr *str2) { unsigned int i; @@ -77,7 +77,7 @@ } -unistr *unistr_dup(unistr *str) +unistr *unistr_dup(const unistr *str) { unistr *ret; unsigned int i; @@ -101,7 +101,7 @@ } -void unistr_append_usascii(unistr *str, char *binary, size_t bin_len) +void unistr_append_usascii(unistr *str, const char *binary, size_t bin_len) { unsigned int i; @@ -115,7 +115,7 @@ } -void unistr_append_utf8(unistr *str, char *binary, size_t bin_len) +void unistr_append_utf8(unistr *str, const char *binary, size_t bin_len) { unsigned int i, j; unistr_char ch; @@ -166,7 +166,7 @@ } -void unistr_append_iso88591(unistr *str, char *binary, size_t bin_len) +void unistr_append_iso88591(unistr *str, const char *binary, size_t bin_len) { unsigned int i; @@ -180,7 +180,7 @@ } -void unistr_dump(unistr *str) +void unistr_dump(const unistr *str) { unsigned int i; @@ -199,7 +199,7 @@ } -char *unistr_to_utf8(unistr *str) +char *unistr_to_utf8(const unistr *str) { unsigned int i; size_t len = 0; @@ -433,7 +433,7 @@ /* IN: "=?iso-8859-1?Q?hyggem=F8de?= torsdag" * OUT: "hyggem\xC3\xB8de torsdag" */ -char *unistr_header_to_utf8(char *str) +char *unistr_header_to_utf8(const char *str) { char *my_str; char *word; @@ -479,11 +479,11 @@ /* IN: "hyggem\xC3\xB8de torsdag" * OUT: "=?utf-8?Q?hyggem=C3=B8de_torsdag?=" */ -char *unistr_utf8_to_header(char *str) +char *unistr_utf8_to_header(const char *str) { unistr *us; char *ret; - char *p; + const char *p; int clean; char buf[4]; @@ -524,24 +524,21 @@ /* IN: "hyggem\\u00F8de torsdag" * OUT: "hyggem\xC3\xB8de torsdag" */ -char *unistr_escaped_to_utf8(char *str) +char *unistr_escaped_to_utf8(const char *str) { unistr_char ch; unistr *us; char *ret; char u[5]; int len; + int skip = 0; us = unistr_new(); while (*str) { if (*str == '\\') { str++; - if (*str == '\\') { - str++; - unistr_append_char(us, '\\'); - continue; - } else if (*str == 'u') { + if (*str == 'u' && !skip) { str++; if (!isxdigit(str[0]) || !isxdigit(str[1]) || @@ -559,7 +556,11 @@ unistr_append_char(us, ch); continue; } else { - unistr_append_char(us, '?'); + unistr_append_char(us, '\\'); + /* Avoid processing the second backslash of a + * double-backslash; but if this was a such a + * one, go back to normal */ + skip = !skip; continue; } } else {