# vi: ft=diff This is the assumed_charset patch by Takashi TAKIZAWA . The home page for this patch is: http://www.emaillab.org/mutt/download15.html.en * Patch last synced with upstream: - Date: 2004-07-22 - File: http://www.emaillab.org/mutt/1.5/patch-1.5.6.tt.assumed_charset.1.gz * Changes made: NONE. * Notes: - the adjust_line and adjust_edited_file patches found in the above page were formerly applied to the debian package too, but as of 1.5.8 they are merged upstream. == END PATCH diff -uNr mutt-1.5.6.orig/charset.c mutt-1.5.6/charset.c --- mutt-1.5.6.orig/charset.c Tue Jan 21 21:25:21 2003 +++ mutt-1.5.6/charset.c Sun Feb 15 15:13:26 2004 @@ -581,3 +581,86 @@ iconv_close (fc->cd); FREE (_fc); } + +char *mutt_get_first_charset (const char *charset) +{ + static char fcharset[SHORT_STRING]; + const char *c, *c1; + + c = charset; + if (!mutt_strlen(c)) + return "us-ascii"; + if (!(c1 = strchr (c, ':'))) + return charset; + strfcpy (fcharset, c, c1 - c + 1); + return fcharset; +} + +static size_t convert_string (ICONV_CONST char *f, size_t flen, + const char *from, const char *to, + char **t, size_t *tlen) +{ + iconv_t cd; + char *buf, *ob; + size_t obl, n; + int e; + + cd = mutt_iconv_open (to, from, 0); + if (cd == (iconv_t)(-1)) + return (size_t)(-1); + obl = 4 * flen + 1; + ob = buf = safe_malloc (obl); + n = iconv (cd, &f, &flen, &ob, &obl); + if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1)) + { + e = errno; + FREE (&buf); + iconv_close (cd); + errno = e; + return (size_t)(-1); + } + *ob = '\0'; + + *tlen = ob - buf; + + safe_realloc ((void **) &buf, ob - buf + 1); + *t = buf; + iconv_close (cd); + + return n; +} + +int mutt_convert_nonmime_string (char **ps) +{ + const char *c, *c1; + + for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0) + { + char *u = *ps; + char *s; + char *fromcode; + size_t m, n; + size_t ulen = mutt_strlen (*ps); + size_t slen; + + if (!u || !*u) + return 0; + + c1 = strchr (c, ':'); + n = c1 ? c1 - c : mutt_strlen (c); + if (!n) + continue; + fromcode = safe_malloc (n + 1); + strfcpy (fromcode, c, n + 1); + m = convert_string (u, ulen, fromcode, Charset, &s, &slen); + FREE (&fromcode); + if (m != (size_t)(-1)) + { + FREE (ps); + *ps = s; + return 0; + } + } + return -1; +} + diff -uNr mutt-1.5.6.orig/charset.h mutt-1.5.6/charset.h --- mutt-1.5.6.orig/charset.h Tue Mar 4 16:49:43 2003 +++ mutt-1.5.6/charset.h Sun Feb 15 15:06:19 2004 @@ -35,6 +35,8 @@ #endif int mutt_convert_string (char **, const char *, const char *, int); +char *mutt_get_first_charset (const char *); +int mutt_convert_nonmime_string (char **); iconv_t mutt_iconv_open (const char *, const char *, int); size_t mutt_iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *, ICONV_CONST char **, const char *); diff -uNr mutt-1.5.6.orig/globals.h mutt-1.5.6/globals.h --- mutt-1.5.6.orig/globals.h Mon Feb 2 02:15:17 2004 +++ mutt-1.5.6/globals.h Sun Feb 15 15:06:19 2004 @@ -32,6 +32,7 @@ WHERE char *AliasFile; WHERE char *AliasFmt; +WHERE char *AssumedCharset; WHERE char *AttachSep; WHERE char *Attribution; WHERE char *AttachFormat; @@ -45,6 +46,7 @@ WHERE char *DsnReturn; WHERE char *Editor; WHERE char *EscChar; +WHERE char *FileCharset; WHERE char *FolderFormat; WHERE char *ForwFmt; WHERE char *Fqdn; diff -uNr mutt-1.5.6.orig/handler.c mutt-1.5.6/handler.c --- mutt-1.5.6.orig/handler.c Wed Nov 5 18:41:31 2003 +++ mutt-1.5.6/handler.c Sun Feb 15 15:06:19 2004 @@ -1718,11 +1718,21 @@ int istext = mutt_is_text_part (b); iconv_t cd = (iconv_t)(-1); - if (istext && s->flags & M_CHARCONV) + if (istext) { - char *charset = mutt_get_parameter ("charset", b->parameter); - if (charset && Charset) - cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM); + if(s->flags & M_CHARCONV) + { + char *charset = mutt_get_parameter ("charset", b->parameter); + if (!option (OPTSTRICTMIME) && !charset) + charset = mutt_get_first_charset (AssumedCharset); + if (charset && Charset) + cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM); + } + else + { + if (b->file_charset) + cd = mutt_iconv_open (Charset, b->file_charset, M_ICONV_HOOK_FROM); + } } fseek (s->fpin, b->offset, 0); diff -uNr mutt-1.5.6.orig/init.h mutt-1.5.6/init.h --- mutt-1.5.6.orig/init.h Mon Feb 2 02:15:17 2004 +++ mutt-1.5.6/init.h Sun Feb 15 15:07:40 2004 @@ -184,6 +184,23 @@ ** If set, Mutt will prompt you for carbon-copy (Cc) recipients before ** editing the body of an outgoing message. */ + { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL "us-ascii"}, + /* + ** .pp + ** This variable is a colon-separated list of character encoding + ** schemes for messages without character encoding indication. + ** Header field values and message body content without character encoding + ** indication would be assumed that they are written in one of this list. + ** By default, all the header fields and message body without any charset + ** indication are assumed to be in "us-ascii". + ** .pp + ** For example, Japanese users might prefer this: + ** .pp + ** set assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8" + ** .pp + ** However, only the first content is valid for the message body. + ** This variable is valid only if $$strict_mime is unset. + */ { "attach_format", DT_STR, R_NONE, UL &AttachFormat, UL "%u%D%I %t%4n %T%.40d%> [%.7m/%.10M, %.6e%?C?, %C?, %s] " }, /* ** .pp @@ -532,6 +549,20 @@ ** signed. ** (PGP only) */ + { "file_charset", DT_STR, R_NONE, UL &FileCharset, UL 0 }, + /* + ** .pp + ** This variable is a colon-separated list of character encoding + ** schemes for text file attatchments. + ** If unset, $$charset value will be used instead. + ** For example, the following configuration would work for Japanese + ** text handling: + ** .pp + ** set file_charset="iso-2022-jp:euc-jp:shift_jis:utf-8" + ** .pp + ** Note: "iso-2022-*" must be put at the head of the value as shown above + ** if included. + */ { "folder", DT_PATH, R_NONE, UL &Maildir, UL "~/Mail" }, /* ** .pp @@ -2476,6 +2507,19 @@ ** Setting this variable causes the ``status bar'' to be displayed on ** the first line of the screen rather than near the bottom. */ + { "strict_mime", DT_BOOL, R_NONE, OPTSTRICTMIME, 1 }, + /* + ** .pp + ** When unset, non MIME-compliant messages that doesn't have any + ** charset indication in ``Content-Type'' field can be displayed + ** (non MIME-compliant messages are often generated by old mailers + ** or buggy mailers like MS Outlook Express). + ** See also $$assumed_charset. + ** .pp + ** This option also replaces linear-white-space between encoded-word + ** and *text to a single space to prevent the display of MIME-encoded + ** ``Subject'' field from being devided into multiple lines. + */ { "strict_threads", DT_BOOL, R_RESORT|R_RESORT_INIT|R_INDEX, OPTSTRICTTHREADS, 0 }, /* ** .pp diff -uNr mutt-1.5.6.orig/mutt.h mutt-1.5.6/mutt.h --- mutt-1.5.6.orig/mutt.h Mon Feb 2 02:15:17 2004 +++ mutt-1.5.6/mutt.h Sun Feb 15 15:06:19 2004 @@ -406,6 +406,7 @@ OPTSIGONTOP, OPTSORTRE, OPTSTATUSONTOP, + OPTSTRICTMIME, OPTSTRICTTHREADS, OPTSUSPEND, OPTTEXTFLOWED, @@ -599,6 +600,7 @@ * If NULL, filename is used * instead. */ + char *file_charset; /* charset of attached file */ CONTENT *content; /* structure used to store detailed info about * the content of the attachment. this is used * to determine what content-transfer-encoding diff -uNr mutt-1.5.6.orig/parse.c mutt-1.5.6/parse.c --- mutt-1.5.6.orig/parse.c Wed Nov 5 18:41:33 2003 +++ mutt-1.5.6/parse.c Sun Feb 15 15:06:19 2004 @@ -208,9 +208,23 @@ if (*s == '"') { + int state_ascii = 1; s++; - for (i=0; *s && *s != '"' && i < sizeof (buffer) - 1; i++, s++) + for (i=0; *s && i < sizeof (buffer) - 1; i++, s++) { + if (!option (OPTSTRICTMIME)) { + /* As iso-2022-* has a characer of '"' with non-ascii state, + * ignore it. */ + if (*s == 0x1b && i < sizeof (buffer) - 2) + { + if (s[1] == '(' && (s[2] == 'B' || s[2] == 'J')) + state_ascii = 1; + else + state_ascii = 0; + } + } + if (state_ascii && *s == '"') + break; if (*s == '\\') { /* Quote the next character */ @@ -379,7 +393,9 @@ if (ct->type == TYPETEXT) { if (!(pc = mutt_get_parameter ("charset", ct->parameter))) - mutt_set_parameter ("charset", "us-ascii", &ct->parameter); + mutt_set_parameter ("charset", option (OPTSTRICTMIME) ? "us-ascii" : + (const char *) mutt_get_first_charset (AssumedCharset), + &ct->parameter); } } diff -uNr mutt-1.5.6.orig/rfc2047.c mutt-1.5.6/rfc2047.c --- mutt-1.5.6.orig/rfc2047.c Wed Nov 5 18:41:33 2003 +++ mutt-1.5.6/rfc2047.c Sun Feb 15 15:13:58 2004 @@ -706,13 +706,54 @@ return 0; } +/* return length of linear white space */ +static size_t lwslen (const char *s, size_t n) +{ + const char *p = s; + size_t len = n; + + if (n <= 0) + return 0; + + for (; p < s + n; p++) + if (!strchr (" \t\r\n", *p)) + { + len = (size_t)(p - s); + break; + } + if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */ + len = (size_t)0; + return len; +} + +/* return length of linear white space : reverse */ +static size_t lwsrlen (const char *s, size_t n) +{ + const char *p = s + n - 1; + size_t len = n; + + if (n <= 0) + return 0; + + if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */ + return (size_t)0; + + for (; p >= s; p--) + if (!strchr (" \t\r\n", *p)) + { + len = (size_t)(s + n - 1 - p); + break; + } + return len; +} + /* try to decode anything that looks like a valid RFC2047 encoded * header field, ignoring RFC822 parsing rules */ void rfc2047_decode (char **pd) { const char *p, *q; - size_t n; + size_t m, n; int found_encoded = 0; char *d0, *d; const char *s = *pd; @@ -729,6 +770,37 @@ if (!(p = find_encoded_word (s, &q))) { /* no encoded words */ + if (!option (OPTSTRICTMIME)) + { + n = mutt_strlen (s); + if (found_encoded && (m = lwslen (s, n)) != 0) + { + if (m != n) + *d = ' ', d++, dlen--; + n -= m, s += m; + } + if (ascii_strcasecmp (AssumedCharset, "us-ascii")) + { + char *t; + size_t tlen; + + t = safe_malloc (n + 1); + strfcpy (t, s, n + 1); + if (mutt_convert_nonmime_string (&t) == 0) + { + tlen = mutt_strlen (t); + strncpy (d, t, tlen); + d += tlen; + } + else + { + strncpy (d, s, n); + d += n; + } + FREE (&t); + break; + } + } strncpy (d, s, dlen); d += dlen; break; @@ -737,15 +809,37 @@ if (p != s) { n = (size_t) (p - s); - /* ignore spaces between encoded words */ - if (!found_encoded || strspn (s, " \t\r\n") != n) + /* ignore spaces between encoded words + * and linear white spaces between encoded word and *text */ + if (!option (OPTSTRICTMIME)) { - if (n > dlen) - n = dlen; - memcpy (d, s, n); - d += n; - dlen -= n; + if (found_encoded && (m = lwslen (s, n)) != 0) + { + if (m != n) + *d = ' ', d++, dlen--; + n -= m, s += m; + } + + if ((m = n - lwsrlen (s, n)) != 0) + { + if (m > dlen) + m = dlen; + memcpy (d, s, m); + d += m; + dlen -= m; + if (m != n) + *d = ' ', d++, dlen--; + } } + else if (!found_encoded || strspn (s, " \t\r\n") != n) + { + if (n > dlen) + n = dlen; + memcpy (d, s, n); + d += n; + dlen -= n; + } + } rfc2047_decode_word (d, p, dlen); @@ -766,7 +860,7 @@ { while (a) { - if (a->personal && strstr (a->personal, "=?") != NULL) + if (a->personal) rfc2047_decode (&a->personal); #ifdef EXACT_ADDRESS if (a->val && strstr (a->val, "=?") != NULL) diff -uNr mutt-1.5.6.orig/rfc2231.c mutt-1.5.6/rfc2231.c --- mutt-1.5.6.orig/rfc2231.c Wed Nov 5 18:41:33 2003 +++ mutt-1.5.6/rfc2231.c Sun Feb 15 15:06:19 2004 @@ -113,6 +113,11 @@ if (option (OPTRFC2047PARAMS) && p->value && strstr (p->value, "=?")) rfc2047_decode (&p->value); + else if (!option (OPTSTRICTMIME)) + { + if (ascii_strcasecmp (AssumedCharset, "us-ascii")) + mutt_convert_nonmime_string (&p->value); + } *last = p; last = &p->next; diff -uNr mutt-1.5.6.orig/sendlib.c mutt-1.5.6/sendlib.c --- mutt-1.5.6.orig/sendlib.c Wed Nov 5 18:41:33 2003 +++ mutt-1.5.6/sendlib.c Sun Feb 15 15:11:33 2004 @@ -496,7 +496,7 @@ } if (a->type == TYPETEXT && (!a->noconv)) - fc = fgetconv_open (fpin, Charset, + fc = fgetconv_open (fpin, a->file_charset, mutt_get_body_charset (send_charset, sizeof (send_charset), a), 0); else @@ -896,6 +896,7 @@ CONTENT *info; CONTENT_STATE state; FILE *fp = NULL; + char *fromcode; char *tocode; char buffer[100]; char chsbuf[STRING]; @@ -930,15 +931,18 @@ if (b != NULL && b->type == TYPETEXT && (!b->noconv && !b->force_charset)) { char *chs = mutt_get_parameter ("charset", b->parameter); + char *fchs = b->use_disp ? ((FileCharset && *FileCharset) ? + FileCharset : Charset) : Charset; if (Charset && (chs || SendCharset) && - convert_file_from_to (fp, Charset, chs ? chs : SendCharset, - 0, &tocode, info) != (size_t)(-1)) + convert_file_from_to (fp, fchs, chs ? chs : SendCharset, + &fromcode, &tocode, info) != (size_t)(-1)) { if (!chs) { mutt_canonical_charset (chsbuf, sizeof (chsbuf), tocode); mutt_set_parameter ("charset", chsbuf, &b->parameter); } + b->file_charset = fromcode; FREE (&tocode); safe_fclose (&fp); return info; @@ -1318,6 +1322,7 @@ body->unlink = 1; body->use_disp = 0; body->disposition = DISPINLINE; + body->noconv = 1; mutt_parse_mime_message (ctx, hdr); diff -uNr mutt-1.5.6.orig/PATCHES mutt-1.5.6/PATCHES --- mutt-1.5.6.orig/PATCHES Mon Feb 2 02:42:47 2004 +++ mutt-1.5.6/PATCHES Sun Feb 15 15:14:55 2004 @@ -0,0 +1 @@ +patch-1.5.6.tt.assumed_charset.1