# vi: ft=diff
This is the assumed_charset patch by Takashi TAKIZAWA <taki@cyber.email.ne.jp>.

The home page for this patch is:

  http://www.emaillab.org/mutt/download15.html.en

* Patch last synced with upstream:
  - Date: 2004-07-22
  - File: http://www.emaillab.org/mutt/1.5/patch-1.5.6.tt.assumed_charset.1.gz

* Changes made: NONE.

* Notes:
  - the adjust_line and adjust_edited_file patches found in the above
    page were formerly applied to the debian package too, but as of
    1.5.8 they are merged upstream.

== END PATCH
diff -uNr mutt-1.5.6.orig/charset.c mutt-1.5.6/charset.c
--- mutt-1.5.6.orig/charset.c	Tue Jan 21 21:25:21 2003
+++ mutt-1.5.6/charset.c	Sun Feb 15 15:13:26 2004
@@ -581,3 +581,86 @@
     iconv_close (fc->cd);
   FREE (_fc);
 }
+
+char *mutt_get_first_charset (const char *charset)
+{
+  static char fcharset[SHORT_STRING];
+  const char *c, *c1;
+
+  c = charset;
+  if (!mutt_strlen(c))
+    return "us-ascii";
+  if (!(c1 = strchr (c, ':')))
+    return charset;
+  strfcpy (fcharset, c, c1 - c + 1);
+  return fcharset;
+}
+
+static size_t convert_string (ICONV_CONST char *f, size_t flen,
+                             const char *from, const char *to,
+                             char **t, size_t *tlen)
+{
+  iconv_t cd;
+  char *buf, *ob;
+  size_t obl, n;
+  int e;
+
+  cd = mutt_iconv_open (to, from, 0);
+  if (cd == (iconv_t)(-1))
+    return (size_t)(-1);
+  obl = 4 * flen + 1;
+  ob = buf = safe_malloc (obl);
+  n = iconv (cd, &f, &flen, &ob, &obl);
+  if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
+  {
+    e = errno;
+    FREE (&buf);
+    iconv_close (cd);
+    errno = e;
+    return (size_t)(-1);
+  }
+  *ob = '\0';
+
+  *tlen = ob - buf;
+
+  safe_realloc ((void **) &buf, ob - buf + 1);
+  *t = buf;
+  iconv_close (cd);
+
+  return n;
+}
+
+int mutt_convert_nonmime_string (char **ps)
+{
+  const char *c, *c1;
+
+  for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
+  {
+    char *u = *ps;
+    char *s;
+    char *fromcode;
+    size_t m, n;
+    size_t ulen = mutt_strlen (*ps);
+    size_t slen;
+
+    if (!u || !*u)
+      return 0;
+
+    c1 = strchr (c, ':');
+    n = c1 ? c1 - c : mutt_strlen (c);
+    if (!n)
+      continue;
+    fromcode = safe_malloc (n + 1);
+    strfcpy (fromcode, c, n + 1);
+    m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
+    FREE (&fromcode);
+    if (m != (size_t)(-1))
+    {
+      FREE (ps);
+      *ps = s;
+      return 0;
+    }
+  }
+  return -1;
+}
+
diff -uNr mutt-1.5.6.orig/charset.h mutt-1.5.6/charset.h
--- mutt-1.5.6.orig/charset.h	Tue Mar  4 16:49:43 2003
+++ mutt-1.5.6/charset.h	Sun Feb 15 15:06:19 2004
@@ -35,6 +35,8 @@
 #endif
 
 int mutt_convert_string (char **, const char *, const char *, int);
+char *mutt_get_first_charset (const char *);
+int mutt_convert_nonmime_string (char **);
 
 iconv_t mutt_iconv_open (const char *, const char *, int);
 size_t mutt_iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *, ICONV_CONST char **, const char *);
diff -uNr mutt-1.5.6.orig/globals.h mutt-1.5.6/globals.h
--- mutt-1.5.6.orig/globals.h	Mon Feb  2 02:15:17 2004
+++ mutt-1.5.6/globals.h	Sun Feb 15 15:06:19 2004
@@ -32,6 +32,7 @@
 
 WHERE char *AliasFile;
 WHERE char *AliasFmt;
+WHERE char *AssumedCharset;
 WHERE char *AttachSep;
 WHERE char *Attribution;
 WHERE char *AttachFormat;
@@ -45,6 +46,7 @@
 WHERE char *DsnReturn;
 WHERE char *Editor;
 WHERE char *EscChar;
+WHERE char *FileCharset;
 WHERE char *FolderFormat;
 WHERE char *ForwFmt;
 WHERE char *Fqdn;
diff -uNr mutt-1.5.6.orig/handler.c mutt-1.5.6/handler.c
--- mutt-1.5.6.orig/handler.c	Wed Nov  5 18:41:31 2003
+++ mutt-1.5.6/handler.c	Sun Feb 15 15:06:19 2004
@@ -1718,11 +1718,21 @@
   int istext = mutt_is_text_part (b);
   iconv_t cd = (iconv_t)(-1);
 
-  if (istext && s->flags & M_CHARCONV)
+  if (istext)
   {
-    char *charset = mutt_get_parameter ("charset", b->parameter);
-    if (charset && Charset)
-      cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
+    if(s->flags & M_CHARCONV)
+    {
+      char *charset = mutt_get_parameter ("charset", b->parameter);
+      if (!option (OPTSTRICTMIME) && !charset)
+        charset = mutt_get_first_charset (AssumedCharset);
+      if (charset && Charset)
+        cd = mutt_iconv_open (Charset, charset, M_ICONV_HOOK_FROM);
+    }
+    else
+    {
+      if (b->file_charset)
+        cd = mutt_iconv_open (Charset, b->file_charset, M_ICONV_HOOK_FROM);
+    }
   }
 
   fseek (s->fpin, b->offset, 0);
diff -uNr mutt-1.5.6.orig/init.h mutt-1.5.6/init.h
--- mutt-1.5.6.orig/init.h	Mon Feb  2 02:15:17 2004
+++ mutt-1.5.6/init.h	Sun Feb 15 15:07:40 2004
@@ -184,6 +184,23 @@
   ** If set, Mutt will prompt you for carbon-copy (Cc) recipients before
   ** editing the body of an outgoing message.
   */  
+  { "assumed_charset", DT_STR, R_NONE, UL &AssumedCharset, UL "us-ascii"},
+  /*
+  ** .pp
+  ** This variable is a colon-separated list of character encoding
+  ** schemes for messages without character encoding indication.
+  ** Header field values and message body content without character encoding
+  ** indication would be assumed that they are written in one of this list.
+  ** By default, all the header fields and message body without any charset
+  ** indication are assumed to be in "us-ascii".
+  ** .pp
+  ** For example, Japanese users might prefer this:
+  ** .pp
+  **   set assumed_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
+  ** .pp
+  ** However, only the first content is valid for the message body.
+  ** This variable is valid only if $$strict_mime is unset.
+  */
   { "attach_format",	DT_STR,  R_NONE, UL &AttachFormat, UL "%u%D%I %t%4n %T%.40d%> [%.7m/%.10M, %.6e%?C?, %C?, %s] " },
   /*
   ** .pp
@@ -532,6 +549,20 @@
   ** signed.
   ** (PGP only)
   */
+  { "file_charset",    DT_STR,  R_NONE, UL &FileCharset, UL 0 },
+  /*
+  ** .pp
+  ** This variable is a colon-separated list of character encoding
+  ** schemes for text file attatchments.
+  ** If unset, $$charset value will be used instead.
+  ** For example, the following configuration would work for Japanese
+  ** text handling:
+  ** .pp
+  **   set file_charset="iso-2022-jp:euc-jp:shift_jis:utf-8"
+  ** .pp
+  ** Note: "iso-2022-*" must be put at the head of the value as shown above
+  ** if included.
+  */
   { "folder",		DT_PATH, R_NONE, UL &Maildir, UL "~/Mail" },
   /*
   ** .pp
@@ -2476,6 +2507,19 @@
   ** Setting this variable causes the ``status bar'' to be displayed on
   ** the first line of the screen rather than near the bottom.
   */
+  { "strict_mime",    DT_BOOL, R_NONE, OPTSTRICTMIME, 1 },
+  /*
+  ** .pp
+  ** When unset, non MIME-compliant messages that doesn't have any
+  ** charset indication in ``Content-Type'' field can be displayed
+  ** (non MIME-compliant messages are often generated by old mailers
+  ** or buggy mailers like MS Outlook Express).
+  ** See also $$assumed_charset.
+  ** .pp
+  ** This option also replaces linear-white-space between encoded-word
+  ** and *text to a single space to prevent the display of MIME-encoded
+  ** ``Subject'' field from being devided into multiple lines.
+  */
   { "strict_threads",	DT_BOOL, R_RESORT|R_RESORT_INIT|R_INDEX, OPTSTRICTTHREADS, 0 },
   /*
   ** .pp
diff -uNr mutt-1.5.6.orig/mutt.h mutt-1.5.6/mutt.h
--- mutt-1.5.6.orig/mutt.h	Mon Feb  2 02:15:17 2004
+++ mutt-1.5.6/mutt.h	Sun Feb 15 15:06:19 2004
@@ -406,6 +406,7 @@
   OPTSIGONTOP,
   OPTSORTRE,
   OPTSTATUSONTOP,
+  OPTSTRICTMIME,
   OPTSTRICTTHREADS,
   OPTSUSPEND,
   OPTTEXTFLOWED,
@@ -599,6 +600,7 @@
 				 * If NULL, filename is used 
 				 * instead.
 				 */
+  char *file_charset;           /* charset of attached file */
   CONTENT *content;             /* structure used to store detailed info about
 				 * the content of the attachment.  this is used
 				 * to determine what content-transfer-encoding
diff -uNr mutt-1.5.6.orig/parse.c mutt-1.5.6/parse.c
--- mutt-1.5.6.orig/parse.c	Wed Nov  5 18:41:33 2003
+++ mutt-1.5.6/parse.c	Sun Feb 15 15:06:19 2004
@@ -208,9 +208,23 @@
 
       if (*s == '"')
       {
+        int state_ascii = 1;
 	s++;
-	for (i=0; *s && *s != '"' && i < sizeof (buffer) - 1; i++, s++)
+	for (i=0; *s && i < sizeof (buffer) - 1; i++, s++)
 	{
+	  if (!option (OPTSTRICTMIME)) {
+            /* As iso-2022-* has a characer of '"' with non-ascii state,
+	     * ignore it. */
+            if (*s == 0x1b && i < sizeof (buffer) - 2)
+            {
+              if (s[1] == '(' && (s[2] == 'B' || s[2] == 'J'))
+                state_ascii = 1;
+              else
+                state_ascii = 0;
+            }
+          }
+          if (state_ascii && *s == '"')
+            break;
 	  if (*s == '\\')
 	  {
 	    /* Quote the next character */
@@ -379,7 +393,9 @@
   if (ct->type == TYPETEXT)
   {
     if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
-      mutt_set_parameter ("charset", "us-ascii", &ct->parameter);
+      mutt_set_parameter ("charset", option (OPTSTRICTMIME) ? "us-ascii" :
+                         (const char *) mutt_get_first_charset (AssumedCharset),
+                         &ct->parameter);
   }
 
 }
diff -uNr mutt-1.5.6.orig/rfc2047.c mutt-1.5.6/rfc2047.c
--- mutt-1.5.6.orig/rfc2047.c	Wed Nov  5 18:41:33 2003
+++ mutt-1.5.6/rfc2047.c	Sun Feb 15 15:13:58 2004
@@ -706,13 +706,54 @@
   return 0;
 }
 
+/* return length of linear white space */
+static size_t lwslen (const char *s, size_t n)
+{
+  const char *p = s;
+  size_t len = n;
+
+  if (n <= 0)
+    return 0;
+
+  for (; p < s + n; p++)
+    if (!strchr (" \t\r\n", *p))
+    {
+      len = (size_t)(p - s);
+      break;
+    }
+  if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
+    len = (size_t)0;
+  return len;
+}
+
+/* return length of linear white space : reverse */
+static size_t lwsrlen (const char *s, size_t n)
+{
+  const char *p = s + n - 1;
+  size_t len = n;
+
+  if (n <= 0)
+    return 0;
+
+  if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
+    return (size_t)0;
+
+  for (; p >= s; p--)
+    if (!strchr (" \t\r\n", *p))
+    {
+      len = (size_t)(s + n - 1 - p);
+      break;
+    }
+  return len;
+}
+
 /* try to decode anything that looks like a valid RFC2047 encoded
  * header field, ignoring RFC822 parsing rules
  */
 void rfc2047_decode (char **pd)
 {
   const char *p, *q;
-  size_t n;
+  size_t m, n;
   int found_encoded = 0;
   char *d0, *d;
   const char *s = *pd;
@@ -729,6 +770,37 @@
     if (!(p = find_encoded_word (s, &q)))
     {
       /* no encoded words */
+      if (!option (OPTSTRICTMIME))
+      {
+        n = mutt_strlen (s);
+        if (found_encoded && (m = lwslen (s, n)) != 0)
+        {
+          if (m != n)
+            *d = ' ', d++, dlen--;
+          n -= m, s += m;
+        }
+        if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
+        {
+          char *t;
+          size_t tlen;
+
+          t = safe_malloc (n + 1);
+          strfcpy (t, s, n + 1);
+          if (mutt_convert_nonmime_string (&t) == 0)
+          {
+            tlen = mutt_strlen (t);
+            strncpy (d, t, tlen);
+            d += tlen;
+          }
+          else
+          {
+            strncpy (d, s, n);
+            d += n;
+          }
+          FREE (&t);
+          break;
+        }
+      }
       strncpy (d, s, dlen);
       d += dlen;
       break;
@@ -737,15 +809,37 @@
     if (p != s)
     {
       n = (size_t) (p - s);
-      /* ignore spaces between encoded words */
-      if (!found_encoded || strspn (s, " \t\r\n") != n)
+      /* ignore spaces between encoded words
+       * and linear white spaces between encoded word and *text */
+      if (!option (OPTSTRICTMIME))
       {
-	if (n > dlen)
-	  n = dlen;
-	memcpy (d, s, n);
-	d += n;
-	dlen -= n;
+        if (found_encoded && (m = lwslen (s, n)) != 0)
+        {
+          if (m != n)
+            *d = ' ', d++, dlen--;
+          n -= m, s += m;
+        }
+
+        if ((m = n - lwsrlen (s, n)) != 0)
+        {
+          if (m > dlen)
+            m = dlen;
+          memcpy (d, s, m);
+          d += m;
+          dlen -= m;
+          if (m != n)
+            *d = ' ', d++, dlen--;
+        }
       }
+      else if (!found_encoded || strspn (s, " \t\r\n") != n)
+      {
+        if (n > dlen)
+          n = dlen;
+        memcpy (d, s, n);
+        d += n;
+        dlen -= n;
+      }
+
     }
 
     rfc2047_decode_word (d, p, dlen);
@@ -766,7 +860,7 @@
 {
   while (a)
   {
-    if (a->personal && strstr (a->personal, "=?") != NULL)
+    if (a->personal)
       rfc2047_decode (&a->personal);
 #ifdef EXACT_ADDRESS
     if (a->val && strstr (a->val, "=?") != NULL)
diff -uNr mutt-1.5.6.orig/rfc2231.c mutt-1.5.6/rfc2231.c
--- mutt-1.5.6.orig/rfc2231.c	Wed Nov  5 18:41:33 2003
+++ mutt-1.5.6/rfc2231.c	Sun Feb 15 15:06:19 2004
@@ -113,6 +113,11 @@
 
       if (option (OPTRFC2047PARAMS) && p->value && strstr (p->value, "=?"))
 	rfc2047_decode (&p->value);
+      else if (!option (OPTSTRICTMIME))
+      {
+        if (ascii_strcasecmp (AssumedCharset, "us-ascii"))
+          mutt_convert_nonmime_string (&p->value);
+      }
 
       *last = p;
       last = &p->next;
diff -uNr mutt-1.5.6.orig/sendlib.c mutt-1.5.6/sendlib.c
--- mutt-1.5.6.orig/sendlib.c	Wed Nov  5 18:41:33 2003
+++ mutt-1.5.6/sendlib.c	Sun Feb 15 15:11:33 2004
@@ -496,7 +496,7 @@
   }
 
   if (a->type == TYPETEXT && (!a->noconv))
-    fc = fgetconv_open (fpin, Charset, 
+    fc = fgetconv_open (fpin, a->file_charset, 
 			mutt_get_body_charset (send_charset, sizeof (send_charset), a),
 			0);
   else
@@ -896,6 +896,7 @@
   CONTENT *info;
   CONTENT_STATE state;
   FILE *fp = NULL;
+  char *fromcode;
   char *tocode;
   char buffer[100];
   char chsbuf[STRING];
@@ -930,15 +931,18 @@
   if (b != NULL && b->type == TYPETEXT && (!b->noconv && !b->force_charset))
   {
     char *chs = mutt_get_parameter ("charset", b->parameter);
+    char *fchs = b->use_disp ? ((FileCharset && *FileCharset) ?
+                                FileCharset : Charset) : Charset;
     if (Charset && (chs || SendCharset) &&
-	convert_file_from_to (fp, Charset, chs ? chs : SendCharset,
-			      0, &tocode, info) != (size_t)(-1))
+        convert_file_from_to (fp, fchs, chs ? chs : SendCharset,
+                              &fromcode, &tocode, info) != (size_t)(-1))
     {
       if (!chs)
       {
 	mutt_canonical_charset (chsbuf, sizeof (chsbuf), tocode);
 	mutt_set_parameter ("charset", chsbuf, &b->parameter);
       }
+      b->file_charset = fromcode;
       FREE (&tocode);
       safe_fclose (&fp);
       return info;
@@ -1318,6 +1322,7 @@
   body->unlink = 1;
   body->use_disp = 0;
   body->disposition = DISPINLINE;
+  body->noconv = 1;
 
   mutt_parse_mime_message (ctx, hdr);
 
diff -uNr mutt-1.5.6.orig/PATCHES mutt-1.5.6/PATCHES
--- mutt-1.5.6.orig/PATCHES	Mon Feb  2 02:42:47 2004
+++ mutt-1.5.6/PATCHES	Sun Feb 15 15:14:55 2004
@@ -0,0 +1 @@
+patch-1.5.6.tt.assumed_charset.1