changeset 850:efd01230d20a

Fix backslash escaping mechanism. Ensuring: - double backslash can't effectively recurse and form part of another escape sequence (because backslash is treated both in unistr.c and prepstdreply.c) - other non-unicode escapes aren't ignored (because they are turned into question marks in unistr.c) - first lines of included files don't 'escape' escaping because they are retrieved in begin_new_*() rather than get_processed_text_line(). - files intended to be included transparently aren't escaped, and upcoming lines aren't doubly escaped
author Ben Schmidt
date Wed, 29 Feb 2012 00:03:46 +1100
parents 3168aed4b01a
children 31ac95b2d625
files ChangeLog include/unistr.h src/prepstdreply.c src/unistr.c
diffstat 4 files changed, 76 insertions(+), 58 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Wed Feb 22 00:11:07 2012 +1100
+++ b/ChangeLog	Wed Feb 29 00:03:46 2012 +1100
@@ -1,3 +1,6 @@
+ o Fix backslash escaping mechanism so double backslash can't effectively
+   recurse and form part of another escape sequence, other non-unicode escapes
+   aren't ignored, and first lines of included files don't 'escape' escaping.
  o Add ability to subscribe to both (normal and digest).
  o Fix access logic so subonlypost doesn't override a send access rule.
  o Make +unsubscribe remove the requester from all versions of the list.
--- a/include/unistr.h	Wed Feb 22 00:11:07 2012 +1100
+++ b/include/unistr.h	Wed Feb 29 00:03:46 2012 +1100
@@ -34,16 +34,16 @@
 
 unistr *unistr_new(void);
 void unistr_free(unistr *str);
-int unistr_cmp(unistr *str1, unistr *str2);
-unistr *unistr_dup(unistr *str);
+int unistr_cmp(const unistr *str1, const unistr *str2);
+unistr *unistr_dup(const unistr *str);
 void unistr_append_char(unistr *str, unistr_char uc);
-void unistr_append_usascii(unistr *str, char *binary, size_t bin_len);
-void unistr_append_utf8(unistr *str, char *binary, size_t bin_len);
-void unistr_append_iso88591(unistr *str, char *binary, size_t bin_len);
-void unistr_dump(unistr *str);
-char *unistr_to_utf8(unistr *str);
-char *unistr_header_to_utf8(char *str);
-char *unistr_utf8_to_header(char *str);
-char *unistr_escaped_to_utf8(char *str);
+void unistr_append_usascii(unistr *str, const char *binary, size_t bin_len);
+void unistr_append_utf8(unistr *str, const char *binary, size_t bin_len);
+void unistr_append_iso88591(unistr *str, const char *binary, size_t bin_len);
+void unistr_dump(const unistr *str);
+char *unistr_to_utf8(const unistr *str);
+char *unistr_header_to_utf8(const char *str);
+char *unistr_utf8_to_header(const char *str);
+char *unistr_escaped_to_utf8(const char *str);
 
 #endif
--- a/src/prepstdreply.c	Wed Feb 22 00:11:07 2012 +1100
+++ b/src/prepstdreply.c	Wed Feb 29 00:03:46 2012 +1100
@@ -562,10 +562,10 @@
 
 
 static void begin_new_source_file(text *txt, char **line_p, char **pos_p,
-		const char *filename) {
+		const char *filename, int transparent) {
 	char *line = *line_p;
 	char *pos = *pos_p;
-	char *tmp;
+	char *tmp, *esc;
 	source *src;
 	int fd;
 	size_t len;
@@ -593,7 +593,7 @@
 	src->suffix = NULL;
 	src->fd = fd;
 	src->fmt = NULL;
-	src->transparent = 0;
+	src->transparent = transparent;
 	src->limit = -1;
 	txt->src = src;
 	tmp = mygetline(fd);
@@ -602,6 +602,11 @@
 		**pos_p = '\0';
 		return;
 	}
+	if (!transparent) {
+		esc = unistr_escaped_to_utf8(tmp);
+		myfree(tmp);
+		tmp = esc;
+	}
 	line = concatstr(2, line, tmp);
 	*pos_p = line + (*pos_p - *line_p);
 	myfree(*line_p);
@@ -611,7 +616,7 @@
 
 
 static void begin_new_formatted_source(text *txt, char **line_p, char **pos_p,
-		char *suffix, formatted *fmt) {
+		char *suffix, formatted *fmt, int transparent) {
 	char *line = *line_p;
 	char *pos = *pos_p;
 	const char *str;
@@ -640,7 +645,7 @@
 	}
 	src->fd = -1;
 	src->fmt = fmt;
-	src->transparent = 0;
+	src->transparent = transparent;
 	src->limit = -1;
 	txt->src = src;
 	str = (*fmt->get)(fmt->state);
@@ -650,6 +655,7 @@
 		*pos_p = *line_p;
 		return;
 	}
+	if (!transparent) str = unistr_escaped_to_utf8(str);
 	line = concatstr(2, line, str);
 	/* The suffix will be added back in get_processed_text_line() */
 	*pos_p = line + strlen(line);
@@ -911,7 +917,7 @@
 		token = filename_token(token + 8);
 		if (token != NULL) {
 			filename = concatstr(3, listdir, "/control/", token);
-			begin_new_source_file(txt, line_p, pos_p, filename);
+			begin_new_source_file(txt, line_p, pos_p, filename, 0);
 			myfree(filename);
 			return 0;
 		}
@@ -919,7 +925,7 @@
 		token = filename_token(token + 5);
 		if (token != NULL) {
 			filename = concatstr(3, listdir, "/text/", token);
-			begin_new_source_file(txt, line_p, pos_p, filename);
+			begin_new_source_file(txt, line_p, pos_p, filename, 0);
 			myfree(filename);
 			return 0;
 		}
@@ -938,8 +944,7 @@
 		}
 		if (limit != 0) {
 			begin_new_source_file(txt, line_p, pos_p,
-					txt->mailname);
-			txt->src->transparent = 1;
+					txt->mailname, 1);
 			if (limit == -1) txt->src->limit = -1;
 			else txt->src->limit = limit - 1;
 			return 0;
@@ -958,7 +963,7 @@
 	while (fmt != NULL) {
 		if (strcmp(token, fmt->token) == 0) {
 			begin_new_formatted_source(txt, line_p, pos_p,
-					endpos + 1, fmt);
+					endpos + 1, fmt, 0);
 			return 0;
 		}
 		fmt = fmt->next;
@@ -1004,17 +1009,25 @@
 			}
 			if (txt->src->limit != 0) {
 				if (txt->src->fd != -1) {
-					txt->src->upcoming =
-							mygetline(txt->src->fd);
+					tmp = mygetline(txt->src->fd);
 				} else if (txt->src->fmt != NULL) {
 					item = (*txt->src->fmt->get)(
 							txt->src->fmt->state);
-					if (item==NULL) txt->src->upcoming=NULL;
-					else txt->src->upcoming=mystrdup(item);
+					if (item==NULL) tmp = NULL;
+					else tmp = mystrdup(item);
 				} else {
-					txt->src->upcoming = NULL;
+					tmp = NULL;
 				}
 				if (txt->src->limit > 0) txt->src->limit--;
+				if (tmp == NULL) {
+					txt->src->upcoming = NULL;
+				} else if (txt->src->transparent) {
+					txt->src->upcoming = tmp;
+				} else {
+					txt->src->upcoming =
+						unistr_escaped_to_utf8(tmp);
+					myfree(tmp);
+				}
 			} else {
 				txt->src->upcoming = NULL;
 			}
@@ -1027,10 +1040,6 @@
 			return NULL;
 		}
 
-		tmp = unistr_escaped_to_utf8(line);
-		myfree(line);
-		line = tmp;
-
 		if (prev != NULL) {
 			/* Wrapping */
 			len = strlen(prev);
@@ -1120,23 +1129,28 @@
 					spc = pos - line;
 					spcnext = spc + 1;
 				}
-			} else if (*pos == '\\' && *(pos + 1) == ' ') {
-				if (txt->skip == NULL) {
-					spc = pos - line - 1;
-					spcnext = spc + 1;
-				}
-				*pos = '\0';
-				tmp = concatstr(2, line, pos + 2);
-				pos = tmp + (pos - line);
-				myfree(line);
-				line = tmp;
-				continue;
 			} else if (*pos == '\t') {
 				/* Avoid breaking due to peeking */
 			} else if (txt->src->transparent) {
 				/* Do nothing if the file is to be included
 			 	 * transparently */
 				if (peeking && txt->skip == NULL) break;
+			} else if (*pos == '\\' && txt->skip == NULL) {
+				if (peeking) break;
+				if (*(pos + 1) == ' ') {
+					spc = len - 1;
+					tmp = pos + 2;
+				} else {
+					/* Includes backslash */
+					tmp = pos + 1;
+				}
+				*pos = '\0';
+				tmp = concatstr(2, line, tmp);
+				pos = tmp + len;
+				myfree(line);
+				line = tmp;
+				skipwhite = 0;
+				continue;
 			} else if (*pos == '$' && txt->skip == NULL) {
 				if (peeking) break;
 				substitute_one(&line, &pos, listaddr,
--- a/src/unistr.c	Wed Feb 22 00:11:07 2012 +1100
+++ b/src/unistr.c	Wed Feb 29 00:03:46 2012 +1100
@@ -59,7 +59,7 @@
 }
 
 
-int unistr_cmp(unistr *str1, unistr *str2)
+int unistr_cmp(const unistr *str1, const unistr *str2)
 {
 	unsigned int i;
 
@@ -77,7 +77,7 @@
 }
 
 
-unistr *unistr_dup(unistr *str)
+unistr *unistr_dup(const unistr *str)
 {
 	unistr *ret;
 	unsigned int i;
@@ -101,7 +101,7 @@
 }
 
 
-void unistr_append_usascii(unistr *str, char *binary, size_t bin_len)
+void unistr_append_usascii(unistr *str, const char *binary, size_t bin_len)
 {
 	unsigned int i;
 
@@ -115,7 +115,7 @@
 }
 
 
-void unistr_append_utf8(unistr *str, char *binary, size_t bin_len)
+void unistr_append_utf8(unistr *str, const char *binary, size_t bin_len)
 {
 	unsigned int i, j;
 	unistr_char ch;
@@ -166,7 +166,7 @@
 }
 
 
-void unistr_append_iso88591(unistr *str, char *binary, size_t bin_len)
+void unistr_append_iso88591(unistr *str, const char *binary, size_t bin_len)
 {
 	unsigned int i;
 
@@ -180,7 +180,7 @@
 }
 
 
-void unistr_dump(unistr *str)
+void unistr_dump(const unistr *str)
 {
 	unsigned int i;
 
@@ -199,7 +199,7 @@
 }
 
 
-char *unistr_to_utf8(unistr *str)
+char *unistr_to_utf8(const unistr *str)
 {
 	unsigned int i;
 	size_t len = 0;
@@ -433,7 +433,7 @@
 /* IN: "=?iso-8859-1?Q?hyggem=F8de?= torsdag"
  * OUT: "hyggem\xC3\xB8de torsdag"
  */
-char *unistr_header_to_utf8(char *str)
+char *unistr_header_to_utf8(const char *str)
 {
 	char *my_str;
 	char *word;
@@ -479,11 +479,11 @@
 /* IN: "hyggem\xC3\xB8de torsdag"
  * OUT: "=?utf-8?Q?hyggem=C3=B8de_torsdag?="
  */
-char *unistr_utf8_to_header(char *str)
+char *unistr_utf8_to_header(const char *str)
 {
 	unistr *us;
 	char *ret;
-	char *p;
+	const char *p;
 	int clean;
 	char buf[4];
 
@@ -524,24 +524,21 @@
 /* IN: "hyggem\\u00F8de torsdag"
  * OUT: "hyggem\xC3\xB8de torsdag"
  */
-char *unistr_escaped_to_utf8(char *str)
+char *unistr_escaped_to_utf8(const char *str)
 {
 	unistr_char ch;
 	unistr *us;
 	char *ret;
 	char u[5];
 	int len;
+	int skip = 0;
 
 	us = unistr_new();
 
 	while (*str) {
 		if (*str == '\\') {
 			str++;
-			if (*str == '\\') {
-				str++;
-				unistr_append_char(us, '\\');
-				continue;
-			} else if (*str == 'u') {
+			if (*str == 'u' && !skip) {
 				str++;
 				if (!isxdigit(str[0]) ||
 						!isxdigit(str[1]) ||
@@ -559,7 +556,11 @@
 				unistr_append_char(us, ch);
 				continue;
 			} else {
-				unistr_append_char(us, '?');
+				unistr_append_char(us, '\\');
+				/* Avoid processing the second backslash of a
+				 * double-backslash; but if this was a such a
+				 * one, go back to normal */
+				skip = !skip;
 				continue;
 			}
 		} else {