	Parsing MIME headers

	Parses the string [[buf]] into [[header]]. The fields
	[[subject]], [[title]], [[from]], [[newsgroups]],
	[[content_type]], [[lines]], [[reply_to]] and [[date]] will
	hold (a copy of) the contents of the corresponding header
	line. The keyword itself (`Subject:', `From:', etc.) is not
	copied.

	The field [[boundary]] will be [[NULL]], unless there was a
	Content-Type with a `boundary=' parameter.

	The field [[author]] will be set to the real name of the
	article's author, if available. Other wise it will be a copy
	of the [[from]] field. E.g., when a header line is of the form
	[[From: John Doe <john@doe.xx>]] or [[From: john@doe.xx (John
	Doe)]] the author `John Doe' can be extracted.

	The parser also tries to (heuristically) determine if the
	article came via a mailing list. If so, it stores the name of
	the list in [[mailing_list]], otherwise this field is [[NULL].
	Give-aways for mailing lists are the lines
	[[X-Listprocessor-Version:]] and [[To: Multiple recipients]].

	The [[charset]] is extracted from the [[content_type]] field;
	the default is [[US-ASCII]] (=0).

	The default for the [[content_transfer_encoding]] header is
	[[MIME_7bit]].

	The [[base]] header line is not a normal header, but is
	inserted by the Argo agents to save the URL of the document
	itself. It conforms to the [[<BASE>]] element of HTML and most
	filters will use it to generate such a [[<BASE>]] element.

<<*>>=
#include <config.h>
#include <str.h>
#include "ISOcharset.e"
#include "MIMEenc.e"
#include "MIMEhead.e"

EXPORT void parse_header(char *buf, MIME_header *header)
{
    char *p, *q, *charset = NULL, *encoding = NULL;
    Bool mailinglist = FALSE;

    assert(buf != NULL);

    buf = newstring(buf);			/* Local copy */
    memset(header, '\0', sizeof(*header));	/* Initialize */
    p = buf;
    while(p) {
	/*
	 * Concatenate any continuation lines
	 */
	q = p;
	while ((q = strpbrk(q, "\r\n")) && isspace(q[1]))
	    *(q++) = ' ';
	if (q) *(q++) = '\0';			/* End the line */

	/*
	 * Save some special headers
	 */
	if (case_n_eq(p, "Subject:", 8))
	    header->head[Subject] = newstring(skip_spaces(p + 8));
	else if (case_n_eq(p, "Title:", 6))
	    header->head[Title] = newstring(skip_spaces(p + 6));
	else if (case_n_eq(p, "Expires:", 8))
	    header->head[Expires] = newstring(skip_spaces(p + 8));
	else if (case_n_eq(p, "From:", 5))
	    header->head[From] = newstring(skip_spaces(p + 5));
	else if (case_n_eq(p, "Base:", 5))
	    header->head[Base] = newstring(skip_spaces(p + 5));
	else if (case_n_eq(p, "Newsgroups:", 11))
	    header->head[Newsgroups] = newstring(skip_spaces(p + 11));
	else if (case_n_eq(p, "Content-Length:", 15))
	    header->head[Content_Length] = newstring(skip_spaces(p + 15));
	else if (case_n_eq(p, "Content-Type:", 13))
	    header->head[Content_Type] = newstring(skip_spaces(p + 13));
	else if (case_n_eq(p, "Reply-To:", 9))
	    header->head[Reply_To] = newstring(skip_spaces(p + 9));
	else if (case_n_eq(p, "References:", 11))
	    header->head[References] = newstring(skip_spaces(p + 11));
	else if (case_n_eq(p, "Date:", 5))
	    header->head[Date] = newstring(skip_spaces(p + 5));
	else if (case_n_eq(p, "Lines:", 6))
	    header->head[Lines] = newstring(skip_spaces(p + 6));
	else if (case_n_eq(p, "X-Listproc", 10))
	    mailinglist = TRUE;
	else if (case_n_eq(p, "To: Multiple", 12))
	    mailinglist = TRUE;
	else if (case_n_eq(p, "Sender:", 7))
	    header->head[Sender] = newstring(skip_spaces(p + 7));
	else if (case_n_eq(p, "Location:", 9))
	    header->head[Location] = newstring(skip_spaces(p + 9));
	else if (case_n_eq(p, "Content-Transfer-Encoding:", 26))
	    encoding = newstring(skip_spaces(p + 26));
	else if (case_n_eq(p, "Content-Encoding:", 17))
	    encoding = newstring(skip_spaces(p + 17));

	p = q;
    }
    del_trailing_blanks(header->head[Subject]);
    del_trailing_blanks(header->head[Title]);
    del_trailing_blanks(header->head[Sender]);
    del_trailing_blanks(header->head[From]);
    del_trailing_blanks(header->head[Base]);
    del_trailing_blanks(header->head[Newsgroups]);
    del_trailing_blanks(header->head[Content_Type]);
    del_trailing_blanks(header->head[Content_Length]);
    del_trailing_blanks(header->head[Reply_To]);
    del_trailing_blanks(header->head[References]);
    del_trailing_blanks(header->head[Date]);
    del_trailing_blanks(header->head[Lines]);
    del_trailing_blanks(header->head[Location]);
    del_trailing_blanks(encoding);

    /*
     * Is it a mailing list?
     */
    if (mailinglist)
	if (header->head[Sender])
	    header->head[Mailing_List] = newstring(header->head[Sender]);
	else
	    header->head[Mailing_List] = newstring(header->head[From]);

    /*
     * Look for real (human) author
     */
    if (header->head[From] && (p = strchr(header->head[From], '('))) {
	header->head[Author] = newstring(p + 1);
	if ((p = strchr(header->head[Author], ')'))) *p = '\0';
    } else {
	header->head[Author] = newstring(header->head[From]);
	if ((p = strchr(header->head[Author], '<'))) *p = '\0';
    }

    /*
     * Look for charset
     */
    if (header->head[Content_Type]
	&& (p = strstr(header->head[Content_Type], "charset="))) {
	charset = newstring(p + 8);
	if ((p = strpbrk(charset, " \t"))) *p = '\0';
	header->charset = str2charset(charset);
    }

    /*
     * Look for boundary if multipart message
     */
    if (header->head[Content_Type]
	&& (p = strstr(header->head[Content_Type], "boundary="))) {
	header->head[Boundary] = newstring(p + 9);
	if ((p = strpbrk(header->head[Boundary], " \t"))) *p = '\0';
    }

    /*
     * Look up the encoding
     */
    header->content_transfer_encoding = lookup_encoding(encoding);

    dispose(buf);
    dispose(charset);
    dispose(encoding);
}
