changeset 652:00a1c5697fa6

Added unicode support to listtexts
author mortenp
date Thu, 14 Jun 2007 06:54:41 +1000
parents dc9fb2d9f035
children 49388aa75fd5
files ChangeLog include/prepstdreply.h include/unistr.h src/Makefile.am src/Makefile.in src/prepstdreply.c src/send_digest.c src/unistr.c
diffstat 8 files changed, 238 insertions(+), 55 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Thu Jun 14 06:48:44 2007 +1000
+++ b/ChangeLog	Thu Jun 14 06:54:41 2007 +1000
@@ -1,3 +1,4 @@
+ o Added unicode support to listtexts
  o Fixed build outside the source directory (Ansgar Burchardt)
  o Moved English listtexts and install all languages (Ansgar Burchardt)
  o Fixed missing exit()s in case of failed execl() calls (Ansgar Burchardt)
--- a/include/prepstdreply.h	Thu Jun 14 06:48:44 2007 +1000
+++ b/include/prepstdreply.h	Thu Jun 14 06:54:41 2007 +1000
@@ -28,6 +28,7 @@
 		 size_t datacount, char **data);
 char *substitute_one(const char *line, const char *listaddr,
 		 const char *listdelim, size_t datacount, char **data);
+int open_listtext(const char *listdir, const char *filename);
 char *prepstdreply(const char *listdir, const char *filename, const char *from,
 		   const char *to, const char *replyto, size_t tokencount,
 		   char **data, char *customheaders);
--- a/include/unistr.h	Thu Jun 14 06:48:44 2007 +1000
+++ b/include/unistr.h	Thu Jun 14 06:54:41 2007 +1000
@@ -43,5 +43,7 @@
 void unistr_dump(unistr *str);
 char *unistr_to_utf8(unistr *str);
 char *unistr_header_to_utf8(char *str);
+char *unistr_utf8_to_header(char *str);
+char *unistr_escaped_to_utf8(char *str);
 
 #endif
--- a/src/Makefile.am	Thu Jun 14 06:48:44 2007 +1000
+++ b/src/Makefile.am	Thu Jun 14 06:54:41 2007 +1000
@@ -30,25 +30,26 @@
 			do_all_the_voodo_here.c mygetline.c gethdrline.c \
 			log_error.c statctrl.c ctrlvalue.c dumpfd2fd.c \
 			subscriberfuncs.c ctrlvalues.c memory.c log_oper.c \
-			send_list.c readn.c getlistdelim.c
+			send_list.c readn.c getlistdelim.c unistr.c
 
 mlmmj_sub_SOURCES = mlmmj-sub.c writen.c mylocking.c \
 			getlistaddr.c chomp.c random-int.c strgen.c \
 			subscriberfuncs.c print-version.c \
 			log_error.c mygetline.c prepstdreply.c memory.c \
-			statctrl.c readn.c getlistdelim.c ctrlvalues.c
+			statctrl.c readn.c getlistdelim.c ctrlvalues.c \
+			unistr.c
 
 mlmmj_unsub_SOURCES = mlmmj-unsub.c writen.c mylocking.c \
 			getlistaddr.c chomp.c subscriberfuncs.c random-int.c \
 			strgen.c print-version.c log_error.c mygetline.c \
 			prepstdreply.c memory.c statctrl.c readn.c \
-			getlistdelim.c
+			getlistdelim.c unistr.c
 			
 mlmmj_bounce_SOURCES = mlmmj-bounce.c print-version.c log_error.c \
 		       subscriberfuncs.c strgen.c random-int.c writen.c \
 		       prepstdreply.c mygetline.c chomp.c getlistaddr.c \
 		       memory.c find_email_adr.c gethdrline.c readn.c \
-		       getlistdelim.c
+		       getlistdelim.c unistr.c
 
 mlmmj_maintd_SOURCES = mlmmj-maintd.c print-version.c log_error.c mygetline.c \
 		       strgen.c random-int.c chomp.c writen.c memory.c \
--- a/src/Makefile.in	Thu Jun 14 06:48:44 2007 +1000
+++ b/src/Makefile.in	Thu Jun 14 06:54:41 2007 +1000
@@ -140,28 +140,29 @@
 			do_all_the_voodo_here.c mygetline.c gethdrline.c \
 			log_error.c statctrl.c ctrlvalue.c dumpfd2fd.c \
 			subscriberfuncs.c ctrlvalues.c memory.c log_oper.c \
-			send_list.c readn.c getlistdelim.c
+			send_list.c readn.c getlistdelim.c unistr.c
 
 
 mlmmj_sub_SOURCES = mlmmj-sub.c writen.c mylocking.c \
 			getlistaddr.c chomp.c random-int.c strgen.c \
 			subscriberfuncs.c print-version.c \
 			log_error.c mygetline.c prepstdreply.c memory.c \
-			statctrl.c readn.c getlistdelim.c ctrlvalues.c
+			statctrl.c readn.c getlistdelim.c ctrlvalues.c \
+			unistr.c
 
 
 mlmmj_unsub_SOURCES = mlmmj-unsub.c writen.c mylocking.c \
 			getlistaddr.c chomp.c subscriberfuncs.c random-int.c \
 			strgen.c print-version.c log_error.c mygetline.c \
 			prepstdreply.c memory.c statctrl.c readn.c \
-			getlistdelim.c
+			getlistdelim.c unistr.c
 
 
 mlmmj_bounce_SOURCES = mlmmj-bounce.c print-version.c log_error.c \
 		       subscriberfuncs.c strgen.c random-int.c writen.c \
 		       prepstdreply.c mygetline.c chomp.c getlistaddr.c \
 		       memory.c find_email_adr.c gethdrline.c readn.c \
-		       getlistdelim.c
+		       getlistdelim.c unistr.c
 
 
 mlmmj_maintd_SOURCES = mlmmj-maintd.c print-version.c log_error.c mygetline.c \
@@ -189,7 +190,7 @@
 	random-int.$(OBJEXT) writen.$(OBJEXT) prepstdreply.$(OBJEXT) \
 	mygetline.$(OBJEXT) chomp.$(OBJEXT) getlistaddr.$(OBJEXT) \
 	memory.$(OBJEXT) find_email_adr.$(OBJEXT) gethdrline.$(OBJEXT) \
-	readn.$(OBJEXT) getlistdelim.$(OBJEXT)
+	readn.$(OBJEXT) getlistdelim.$(OBJEXT) unistr.$(OBJEXT)
 mlmmj_bounce_OBJECTS = $(am_mlmmj_bounce_OBJECTS)
 mlmmj_bounce_LDADD = $(LDADD)
 mlmmj_bounce_DEPENDENCIES =
@@ -223,7 +224,7 @@
 	statctrl.$(OBJEXT) ctrlvalue.$(OBJEXT) dumpfd2fd.$(OBJEXT) \
 	subscriberfuncs.$(OBJEXT) ctrlvalues.$(OBJEXT) memory.$(OBJEXT) \
 	log_oper.$(OBJEXT) send_list.$(OBJEXT) readn.$(OBJEXT) \
-	getlistdelim.$(OBJEXT)
+	getlistdelim.$(OBJEXT) unistr.$(OBJEXT)
 mlmmj_process_OBJECTS = $(am_mlmmj_process_OBJECTS)
 mlmmj_process_LDADD = $(LDADD)
 mlmmj_process_DEPENDENCIES =
@@ -253,7 +254,8 @@
 	random-int.$(OBJEXT) strgen.$(OBJEXT) subscriberfuncs.$(OBJEXT) \
 	print-version.$(OBJEXT) log_error.$(OBJEXT) mygetline.$(OBJEXT) \
 	prepstdreply.$(OBJEXT) memory.$(OBJEXT) statctrl.$(OBJEXT) \
-	readn.$(OBJEXT) getlistdelim.$(OBJEXT) ctrlvalues.$(OBJEXT)
+	readn.$(OBJEXT) getlistdelim.$(OBJEXT) ctrlvalues.$(OBJEXT) \
+	unistr.$(OBJEXT)
 mlmmj_sub_OBJECTS = $(am_mlmmj_sub_OBJECTS)
 mlmmj_sub_LDADD = $(LDADD)
 mlmmj_sub_DEPENDENCIES =
@@ -263,7 +265,7 @@
 	subscriberfuncs.$(OBJEXT) random-int.$(OBJEXT) strgen.$(OBJEXT) \
 	print-version.$(OBJEXT) log_error.$(OBJEXT) mygetline.$(OBJEXT) \
 	prepstdreply.$(OBJEXT) memory.$(OBJEXT) statctrl.$(OBJEXT) \
-	readn.$(OBJEXT) getlistdelim.$(OBJEXT)
+	readn.$(OBJEXT) getlistdelim.$(OBJEXT) unistr.$(OBJEXT)
 mlmmj_unsub_OBJECTS = $(am_mlmmj_unsub_OBJECTS)
 mlmmj_unsub_LDADD = $(LDADD)
 mlmmj_unsub_DEPENDENCIES =
--- a/src/prepstdreply.c	Thu Jun 14 06:48:44 2007 +1000
+++ b/src/prepstdreply.c	Thu Jun 14 06:54:41 2007 +1000
@@ -1,4 +1,5 @@
 /* Copyright (C) 2004 Mads Martin Joergensen <mmj at mmj.dk>
+ * Copyright (C) 2007 Morten K. Poulsen <morten at afdelingp.dk>
  *
  * $Id$
  *
@@ -40,6 +41,7 @@
 #include "getlistaddr.h"
 #include "mlmmj.h"
 #include "getlistdelim.h"
+#include "unistr.h"
 
 char *substitute(const char *line, const char *listaddr, const char *listdelim,
 		 size_t datacount, char **data)
@@ -154,47 +156,80 @@
 	return retstr;
 }
 
+
+int open_listtext(const char *listdir, const char *filename)
+{
+	char *tmp;
+	int fd;
+
+	tmp = concatstr(3, listdir, "/text/", filename);
+	fd = open(tmp, O_RDONLY);
+	myfree(tmp);
+	if (fd >= 0)
+		return fd;
+
+	tmp = concatstr(2, DEFAULTTEXTDIR "/default/", filename);
+	fd = open(tmp, O_RDONLY);
+	myfree(tmp);
+	if (fd >= 0)
+		return fd;
+
+	tmp = concatstr(2, DEFAULTTEXTDIR "/en/", filename);
+	fd = open(tmp, O_RDONLY);
+	myfree(tmp);
+	if (fd >= 0)
+		return fd;
+
+	log_error(LOG_ARGS, "Could not open listtext '%s'", filename);
+	return -1;
+}
+
+
 char *prepstdreply(const char *listdir, const char *filename, const char *from,
 		   const char *to, const char *replyto, size_t tokencount,
 		   char **data, char *customheaders)
 {
 	int infd, outfd;
 	char *listaddr, *listdelim, *myfrom, *tmp, *subject, *retstr = NULL;
-	char *listfqdn;
+	char *listfqdn, *line, *utfline, *utfsub, *utfsub2;
 	char *myreplyto, *myto, *str = NULL, *mydate, *mymsgid;
 
-	tmp = concatstr(3, listdir, "/text/", filename);
-	infd = open(tmp, O_RDONLY);
-	myfree(tmp);
-	if(infd < 0) {
-		tmp = concatstr(2, DEFAULTTEXTDIR "/default/", filename);
-		infd = open(tmp, O_RDONLY);
-		myfree(tmp);
-		if(infd < 0) {
-			tmp = concatstr(2, DEFAULTTEXTDIR "/en/", filename);
-			infd = open(tmp, O_RDONLY);
-			myfree(tmp);
-			if(infd < 0) {
-				log_error(LOG_ARGS, "Could not open std mail %s", filename);
+	if ((infd = open_listtext(listdir, filename)) < 0) {
 				return NULL;
 			}
-		}
-	}
 
 	listaddr = getlistaddr(listdir);
 	listdelim = getlistdelim(listdir);
 	listfqdn = genlistfqdn(listaddr);
 
-	tmp = mygetline(infd);
-	if(strncasecmp(tmp, "Subject:", 8) != 0) {
-		log_error(LOG_ARGS, "No Subject in listtexts. Using "
-				"standard subject");
-		subject = mystrdup("Subject: mlmmj administrativa\n");
-	} else
-		subject = substitute(tmp, listaddr, listdelim, tokencount,
+	line = mygetline(infd);
+	if(!line || (strncasecmp(line, "Subject: ", 9) != 0)) {
+		log_error(LOG_ARGS, "No Subject in '%s' listtext. Using "
+				"standard subject", filename);
+		subject = mystrdup("mlmmj administrativa");
+	} else {
+		chomp(line);
+		utfsub = unistr_escaped_to_utf8(line + 9);
+		utfsub2 = substitute(utfsub, listaddr, listdelim, tokencount,
 				     data);
+		subject = unistr_utf8_to_header(utfsub2);
+		myfree(utfsub);
+		myfree(utfsub2);
+		myfree(line);
 
-	myfree(tmp);
+		/* skip empty line after subject */
+		line = mygetline(infd);
+		if (line && (line[0] == '\n')) {
+			myfree(line);
+			line = NULL;
+		}
+	}
+	if (line) {
+		utfline = unistr_escaped_to_utf8(line);
+		myfree(line);
+	} else {
+		utfline = NULL;
+	}
 	
 	myfrom = substitute(from, listaddr, listdelim, tokencount, data);
 	myto = substitute(to, listaddr, listdelim, tokencount, data);
@@ -226,11 +261,23 @@
 		myfree(listaddr);
 		myfree(listdelim);
 		myfree(listfqdn);
+		myfree(utfline);
 		return NULL;
 	}
 
-	str = concatstr(10, "From: ", myfrom, "\nTo: ", myto, "\n", myreplyto,
-			   mymsgid, mydate, subject, customheaders);
+	str = concatstr(14,
+			"From: ", myfrom,
+			"\nTo: ", myto,
+			"\n", myreplyto,
+			mymsgid,
+			mydate,
+			"Subject: ", subject,
+			"\nContent-Type: text/plain; charset=utf-8"
+			"\nContent-Encoding: 8bit"
+			"\n", customheaders,
+			"\n", utfline);
+
+	myfree(utfline);
 
 	if(writen(outfd, str, strlen(str)) < 0) {
 		log_error(LOG_ARGS, "Could not write std mail");
@@ -245,8 +292,13 @@
 
 	while((str = mygetline(infd))) {
 		tmp = str;
-		str = substitute(tmp, listaddr, listdelim, tokencount, data);
+		utfline = unistr_escaped_to_utf8(str);
 		myfree(tmp);
+
+		tmp = utfline;
+		str = substitute(utfline, listaddr, listdelim, tokencount, data);
+		myfree(tmp);
+
 		if(writen(outfd, str, strlen(str)) < 0) {
 			myfree(str);
 			myfree(listaddr);
--- a/src/send_digest.c	Thu Jun 14 06:48:44 2007 +1000
+++ b/src/send_digest.c	Thu Jun 14 06:54:41 2007 +1000
@@ -44,6 +44,7 @@
 #include "gethdrline.h"
 #include "statctrl.h"
 #include "unistr.h"
+#include "chomp.h"
 
 
 struct mail {
@@ -182,6 +183,7 @@
 	int i, fd, archivefd, status, hdrfd, txtfd;
 	char buf[45];
 	char *tmp, *queuename = NULL, *archivename, *subject, *line = NULL;
+	char *utfsub, *utfsub2, *utfline;
 	char *boundary, *listaddr, *listdelim, *listname, *listfqdn;
 	char *subst_data[10];
 	pid_t childpid, pid;
@@ -222,9 +224,7 @@
 	listfqdn = genlistfqdn(listaddr);
 	listdelim = getlistdelim(listdir);
 	
-	tmp = concatstr(2, listdir, "/text/digest");
-	txtfd = open(tmp, O_RDONLY);
-	myfree(tmp);
+	txtfd = open_listtext(listdir, "digest");
 	if (txtfd < 0) {
 		log_error(LOG_ARGS, "Notice: Could not open std mail digest");
 	}
@@ -252,22 +252,28 @@
 	subst_data[8] = "digestthreads";
 	subst_data[9] = thread_list(listdir, firstindex, lastindex);
 
-	if ((txtfd > 0) && (line = mygetline(txtfd)) &&
-			(strncasecmp(line, "Subject: ", 9) == 0)) {
-		subject = substitute(line + 9, listaddr, listdelim,
-				5, subst_data);
+	if ((txtfd < 0) || !(line = mygetline(txtfd)) ||
+			(strncasecmp(line, "Subject: ", 9) != 0)) {
+
+		utfsub = mystrdup("Digest of $listaddr$ issue $digestissue$"
+				" ($digestinterval$)");
 	} else {
-		subject = substitute("Digest of $listaddr$ issue $digestissue$"
-				" ($digestinterval$)\n", listaddr, listdelim,
-				5, subst_data);
+
+		chomp(line);
+		utfsub = unistr_escaped_to_utf8(line + 9);
 	}
 
-	tmp = concatstr(9, "From: ", listname, listdelim, "help@", listfqdn,
+	utfsub2 = substitute(utfsub, listaddr, listdelim, 5, subst_data);
+	subject = unistr_utf8_to_header(utfsub2);
+	myfree(utfsub);
+	myfree(utfsub2);
+
+	tmp = concatstr(10, "From: ", listname, listdelim, "help@", listfqdn,
 			   "\nMIME-Version: 1.0"
 			   "\nContent-Type: multipart/" DIGESTMIMETYPE "; "
 			   "boundary=", boundary,
-			   "\nSubject: ", subject);
-	/* subject includes a newline */
+			   "\nSubject: ", subject,
+			   "\n");
 
 	myfree(listfqdn);
 	myfree(subject);
@@ -315,6 +321,7 @@
 
 		tmp = concatstr(3, "\n--", boundary,
 				"\nContent-Type: text/plain; charset=UTF-8"
+				"\nContent-Encoding: 8bit"
 				"\n\n");
 		if (writen(fd, tmp, strlen(tmp)) == -1) {
 			log_error(LOG_ARGS, "Could not write digest text/plain"
@@ -351,9 +358,13 @@
 
 		if (line) {
 			do {
-				tmp = substitute(line, listaddr, listdelim,
+				utfline = unistr_escaped_to_utf8(line);
+				myfree(line);
+
+				tmp = substitute(utfline, listaddr, listdelim,
 						5, subst_data);
-				myfree(line);
+				myfree(utfline);
+
 				if(writen(fd, tmp, strlen(tmp)) < 0) {
 					myfree(tmp);
 					log_error(LOG_ARGS, "Could not write"
--- a/src/unistr.c	Thu Jun 14 06:48:44 2007 +1000
+++ b/src/unistr.c	Thu Jun 14 06:54:41 2007 +1000
@@ -459,3 +459,116 @@
 
 	return ret;
 }
+
+
+static int is_ok_in_header(char ch)
+{
+	if ((ch >= 'a') && (ch <= 'z')) return 1;
+	if ((ch >= 'A') && (ch <= 'Z')) return 1;
+	if ((ch >= '0') && (ch <= '9')) return 1;
+	if (ch == '.') return 1;
+	if (ch == ',') return 1;
+	if (ch == ':') return 1;
+	if (ch == ';') return 1;
+	if (ch == '-') return 1;
+	if (ch == ' ') return 1;
+	return 0;
+}
+
+
+/* IN: "hyggem\xC3\xB8de torsdag"
+ * OUT: "=?utf-8?Q?hyggem=C3=B8de_torsdag?="
+ */
+char *unistr_utf8_to_header(char *str)
+{
+	unistr *us;
+	char *ret;
+	char *p;
+	int clean;
+	char buf[4];
+
+	/* clean header? */
+	clean = 1;
+	for (p=str; *p; p++) {
+		if (!is_ok_in_header(*p)) {
+			clean = 0;
+			break;
+		}
+	}
+	if (clean) {
+		return mystrdup(str);
+	}
+
+	us = unistr_new();
+
+	unistr_append_usascii(us, "=?utf-8?q?", 10);
+	for (p=str; *p; p++) {
+		if (*p == 0x20) {
+			unistr_append_char(us, '_');
+		} else if (is_ok_in_header(*p)) {
+			unistr_append_char(us, *p);
+		} else {
+			snprintf(buf, sizeof(buf), "=%02X", (unsigned char)*p);
+			unistr_append_usascii(us, buf, 3);
+		}
+	}
+	unistr_append_usascii(us, "?=", 2);
+
+	ret = unistr_to_utf8(us);
+	unistr_free(us);
+
+	return ret;
+}
+
+
+/* IN: "hyggem\\u00F8de torsdag"
+ * OUT: "hyggem\xC3\xB8de torsdag"
+ */
+char *unistr_escaped_to_utf8(char *str)
+{
+	unistr_char ch;
+	unistr *us;
+	char *ret;
+	char u[5];
+
+	us = unistr_new();
+
+	while (*str) {
+		if (*str == '\\') {
+			str++;
+			if (*str == '\\') {
+				str++;
+				unistr_append_char(us, '\\');
+				continue;
+			} else if (*str == 'u') {
+				str++;
+				if (!isxdigit(str[0]) ||
+						!isxdigit(str[1]) ||
+						!isxdigit(str[2]) ||
+						!isxdigit(str[3])) {
+					unistr_append_char(us, '?');
+					continue;
+				}
+				u[0] = *str++;
+				u[1] = *str++;
+				u[2] = *str++;
+				u[3] = *str++;
+				u[4] = '\0';
+				ch = strtol(u, NULL, 16);
+				unistr_append_char(us, ch);
+				continue;
+			} else {
+				unistr_append_char(us, '?');
+				continue;
+			}
+		} else {
+			unistr_append_usascii(us, str, 1);
+			str++;
+		}
+	}
+
+	ret = unistr_to_utf8(us);
+	unistr_free(us);
+
+	return ret;
+}