2 This is the assumed_charset patch by Takashi TAKIZAWA <taki@cyber.email.ne.jp>.
4 The home page for this patch is:
6 http://www.emaillab.org/mutt/download15.html.en
8 * Patch last synced with upstream:
10 - File: http://www.emaillab.org/mutt/1.5/patch-1.5.6.tt.assumed_charset.1.gz
15 - the adjust_line and adjust_edited_file patches found in the above
16 page were formerly applied to the debian package too, but as of
17 1.5.8 they are merged upstream.
20 diff -uNr mutt-1.5.6.orig/charset.c mutt-1.5.6/charset.c
21 --- mutt-1.5.6.orig/charset.c Tue Jan 21 21:25:21 2003
22 +++ mutt-1.5.6/charset.c Sun Feb 15 15:13:26 2004
28 +char *mutt_get_first_charset (const char *charset)
30 + static char fcharset[SHORT_STRING];
34 + if (!mutt_strlen(c))
36 + if (!(c1 = strchr (c, ':')))
38 + strfcpy (fcharset, c, c1 - c + 1);
42 +static size_t convert_string (ICONV_CONST char *f, size_t flen,
43 + const char *from, const char *to,
44 + char **t, size_t *tlen)
51 + cd = mutt_iconv_open (to, from, 0);
52 + if (cd == (iconv_t)(-1))
53 + return (size_t)(-1);
55 + ob = buf = safe_malloc (obl);
56 + n = iconv (cd, &f, &flen, &ob, &obl);
57 + if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
63 + return (size_t)(-1);
69 + safe_realloc ((void **) &buf, ob - buf + 1);
76 +int mutt_convert_nonmime_string (char **ps)
80 + for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
86 + size_t ulen = mutt_strlen (*ps);
92 + c1 = strchr (c, ':');
93 + n = c1 ? c1 - c : mutt_strlen (c);
96 + fromcode = safe_malloc (n + 1);
97 + strfcpy (fromcode, c, n + 1);
98 + m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
100 + if (m != (size_t)(-1))
110 diff -uNr mutt-1.5.6.orig/charset.h mutt-1.5.6/charset.h
111 --- mutt-1.5.6.orig/charset.h Tue Mar 4 16:49:43 2003
112 +++ mutt-1.5.6/charset.h Sun Feb 15 15:06:19 2004
116 int mutt_convert_string (char **, const char *, const char *, int);
117 +char *mutt_get_first_charset (const char *);
118 +int mutt_convert_nonmime_string (char **);
120 iconv_t mutt_iconv_open (const char *, const char *, int);
121 size_t mutt_iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *, ICONV_CONST char **, const char *);
122 diff -uNr mutt-1.5.6.orig/globals.h mutt-1.5.6/globals.h
123 --- mutt-1.5.6.orig/globals.h Mon Feb 2 02:15:17 2004
124 +++ mutt-1.5.6/globals.h Sun Feb 15 15:06:19 2004
127 WHERE char *AliasFile;
128 WHERE char *AliasFmt;
129 +WHERE char *AssumedCharset;
130 WHERE char *AttachSep;
131 WHERE char *Attribution;
132 WHERE char *AttachFormat;
134 WHERE char *DsnReturn;
137 +WHERE char *FileCharset;
138 WHERE char *FolderFormat;
141 diff -uNr mutt-1.5.6.orig/handler.c mutt-1.5.6/handler.c
142 --- mutt-1.5.6.orig/handler.c Wed Nov 5 18:41:31 2003
143 +++ mutt-1.5.6/handler.c Sun Feb 15 15:06:19 2004
144 @@ -1718,11 +1718,21 @@
145 int istext = mutt_is_text_part (b);
146 iconv_t cd = (iconv_t)(-1);
148 - if (istext && s->flags & M_CHARCONV)
151 - char *charset = mutt_get_parameter ("charset", b->parameter);
152 - if (charset && Charset)
153 - cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
154 + if(s->flags & M_CHARCONV)
156 + char *charset = mutt_get_parameter ("charset", b->parameter);
157 + if (!option (OPTSTRICTMIME) && !charset)
158 + charset = mutt_get_first_charset (AssumedCharset);
159 + if (charset && Charset)
160 + cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
164 + if (b->file_charset)
165 + cd = mutt_iconv_open (Charset, b->file_charset, M_ICONV_HOOK_FROM);
169 fseek (s->fpin, b->offset, 0);
170 diff -uNr mutt-1.5.6.orig/init.h mutt-1.5.6/init.h
171 --- mutt-1.5.6.orig/init.h Mon Feb 2 02:15:17 2004
172 +++ mutt-1.5.6/init.h Sun Feb 15 15:07:40 2004
174 ** If set, Mutt will prompt you for carbon-copy (Cc) recipients before
175 ** editing the body of an outgoing message.
177 + { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL "us-ascii"},
180 + ** This variable is a colon-separated list of character encoding
181 + ** schemes for messages without character encoding indication.
182 + ** Header field values and message body content without character encoding
183 + ** indication would be assumed that they are written in one of this list.
184 + ** By default, all the header fields and message body without any charset
185 + ** indication are assumed to be in "us-ascii".
187 + ** For example, Japanese users might prefer this:
189 + ** set assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
191 + ** However, only the first content is valid for the message body.
192 + ** This variable is valid only if $$strict_mime is unset.
194 { "attach_format", DT_STR, R_NONE, UL &AttachFormat, UL "%u%D%I %t%4n %T%.40d%> [%.7m/%.10M, %.6e%?C?, %C?, %s] " },
201 + { "file_charset", DT_STR, R_NONE, UL &FileCharset, UL 0 },
204 + ** This variable is a colon-separated list of character encoding
205 + ** schemes for text file attatchments.
206 + ** If unset, $$charset value will be used instead.
207 + ** For example, the following configuration would work for Japanese
210 + ** set file_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
212 + ** Note: "iso-2022-*" must be put at the head of the value as shown above
215 { "folder", DT_PATH, R_NONE, UL &Maildir, UL "~/Mail" },
218 @@ -2476,6 +2507,19 @@
219 ** Setting this variable causes the ``status bar'' to be displayed on
220 ** the first line of the screen rather than near the bottom.
222 + { "strict_mime", DT_BOOL, R_NONE, OPTSTRICTMIME, 1 },
225 + ** When unset, non MIME-compliant messages that doesn't have any
226 + ** charset indication in ``Content-Type'' field can be displayed
227 + ** (non MIME-compliant messages are often generated by old mailers
228 + ** or buggy mailers like MS Outlook Express).
229 + ** See also $$assumed_charset.
231 + ** This option also replaces linear-white-space between encoded-word
232 + ** and *text to a single space to prevent the display of MIME-encoded
233 + ** ``Subject'' field from being devided into multiple lines.
235 { "strict_threads", DT_BOOL, R_RESORT|R_RESORT_INIT|R_INDEX, OPTSTRICTTHREADS, 0 },
238 diff -uNr mutt-1.5.6.orig/mutt.h mutt-1.5.6/mutt.h
239 --- mutt-1.5.6.orig/mutt.h Mon Feb 2 02:15:17 2004
240 +++ mutt-1.5.6/mutt.h Sun Feb 15 15:06:19 2004
250 * If NULL, filename is used
253 + char *file_charset; /* charset of attached file */
254 CONTENT *content; /* structure used to store detailed info about
255 * the content of the attachment. this is used
256 * to determine what content-transfer-encoding
257 diff -uNr mutt-1.5.6.orig/parse.c mutt-1.5.6/parse.c
258 --- mutt-1.5.6.orig/parse.c Wed Nov 5 18:41:33 2003
259 +++ mutt-1.5.6/parse.c Sun Feb 15 15:06:19 2004
264 + int state_ascii = 1;
266 - for (i=0; *s && *s != '"' && i < sizeof (buffer) - 1; i++, s++)
267 + for (i=0; *s && i < sizeof (buffer) - 1; i++, s++)
269 + if (!option (OPTSTRICTMIME)) {
270 + /* As iso-2022-* has a characer of '"' with non-ascii state,
272 + if (*s == 0x1b && i < sizeof (buffer) - 2)
274 + if (s[1] == '(' && (s[2] == 'B' || s[2] == 'J'))
280 + if (state_ascii && *s == '"')
284 /* Quote the next character */
286 if (ct->type == TYPETEXT)
288 if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
289 - mutt_set_parameter ("charset", "us-ascii", &ct->parameter);
290 + mutt_set_parameter ("charset", option (OPTSTRICTMIME) ? "us-ascii" :
291 + (const char *) mutt_get_first_charset (AssumedCharset),
296 diff -uNr mutt-1.5.6.orig/rfc2047.c mutt-1.5.6/rfc2047.c
297 --- mutt-1.5.6.orig/rfc2047.c Wed Nov 5 18:41:33 2003
298 +++ mutt-1.5.6/rfc2047.c Sun Feb 15 15:13:58 2004
299 @@ -706,13 +706,54 @@
303 +/* return length of linear white space */
304 +static size_t lwslen (const char *s, size_t n)
312 + for (; p < s + n; p++)
313 + if (!strchr (" \t\r\n", *p))
315 + len = (size_t)(p - s);
318 + if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
323 +/* return length of linear white space : reverse */
324 +static size_t lwsrlen (const char *s, size_t n)
326 + const char *p = s + n - 1;
332 + if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
335 + for (; p >= s; p--)
336 + if (!strchr (" \t\r\n", *p))
338 + len = (size_t)(s + n - 1 - p);
344 /* try to decode anything that looks like a valid RFC2047 encoded
345 * header field, ignoring RFC822 parsing rules
347 void rfc2047_decode (char **pd)
352 int found_encoded = 0;
356 if (!(p = find_encoded_word (s, &q)))
358 /* no encoded words */
359 + if (!option (OPTSTRICTMIME))
361 + n = mutt_strlen (s);
362 + if (found_encoded && (m = lwslen (s, n)) != 0)
365 + *d = ' ', d++, dlen--;
368 + if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
373 + t = safe_malloc (n + 1);
374 + strfcpy (t, s, n + 1);
375 + if (mutt_convert_nonmime_string (&t) == 0)
377 + tlen = mutt_strlen (t);
378 + strncpy (d, t, tlen);
390 strncpy (d, s, dlen);
393 @@ -737,15 +809,37 @@
396 n = (size_t) (p - s);
397 - /* ignore spaces between encoded words */
398 - if (!found_encoded || strspn (s, " \t\r\n") != n)
399 + /* ignore spaces between encoded words
400 + * and linear white spaces between encoded word and *text */
401 + if (!option (OPTSTRICTMIME))
408 + if (found_encoded && (m = lwslen (s, n)) != 0)
411 + *d = ' ', d++, dlen--;
415 + if ((m = n - lwsrlen (s, n)) != 0)
423 + *d = ' ', d++, dlen--;
426 + else if (!found_encoded || strspn (s, " \t\r\n") != n)
437 rfc2047_decode_word (d, p, dlen);
442 - if (a->personal && strstr (a->personal, "=?") != NULL)
444 rfc2047_decode (&a->personal);
446 if (a->val && strstr (a->val, "=?") != NULL)
447 diff -uNr mutt-1.5.6.orig/rfc2231.c mutt-1.5.6/rfc2231.c
448 --- mutt-1.5.6.orig/rfc2231.c Wed Nov 5 18:41:33 2003
449 +++ mutt-1.5.6/rfc2231.c Sun Feb 15 15:06:19 2004
452 if (option (OPTRFC2047PARAMS) && p->value && strstr (p->value, "=?"))
453 rfc2047_decode (&p->value);
454 + else if (!option (OPTSTRICTMIME))
456 + if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
457 + mutt_convert_nonmime_string (&p->value);
462 diff -uNr mutt-1.5.6.orig/sendlib.c mutt-1.5.6/sendlib.c
463 --- mutt-1.5.6.orig/sendlib.c Wed Nov 5 18:41:33 2003
464 +++ mutt-1.5.6/sendlib.c Sun Feb 15 15:11:33 2004
468 if (a->type == TYPETEXT && (!a->noconv))
469 - fc = fgetconv_open (fpin, Charset,
470 + fc = fgetconv_open (fpin, a->file_charset,
471 mutt_get_body_charset (send_charset, sizeof (send_charset), a),
482 @@ -930,15 +931,18 @@
483 if (b != NULL && b->type == TYPETEXT && (!b->noconv && !b->force_charset))
485 char *chs = mutt_get_parameter ("charset", b->parameter);
486 + char *fchs = b->use_disp ? ((FileCharset && *FileCharset) ?
487 + FileCharset : Charset) : Charset;
488 if (Charset && (chs || SendCharset) &&
489 - convert_file_from_to (fp, Charset, chs ? chs : SendCharset,
490 - 0, &tocode, info) != (size_t)(-1))
491 + convert_file_from_to (fp, fchs, chs ? chs : SendCharset,
492 + &fromcode, &tocode, info) != (size_t)(-1))
496 mutt_canonical_charset (chsbuf, sizeof (chsbuf), tocode);
497 mutt_set_parameter ("charset", chsbuf, &b->parameter);
499 + b->file_charset = fromcode;
503 @@ -1318,6 +1322,7 @@
506 body->disposition = DISPINLINE;
509 mutt_parse_mime_message (ctx, hdr);
511 diff -uNr mutt-1.5.6.orig/PATCHES mutt-1.5.6/PATCHES
512 --- mutt-1.5.6.orig/PATCHES Mon Feb 2 02:42:47 2004
513 +++ mutt-1.5.6/PATCHES Sun Feb 15 15:14:55 2004
515 +patch-1.5.6.tt.assumed_charset.1