	Retrieving documents with HTTP

	This agent conforms to W3A/A. It uses some utility routines
	that are not shown.

	TO DO: set [[O_NONBLOCK]] before reading the MIME header.
	(Currently, if [[O_NONBLOCK]] is requested, it is only applied
	after the MIME header has been read. This can still cause
	blocking.)

	TO DO: handle content encodings.

	TO DO: fix handling of old HTTP servers; currently their first
	line is lost.

<<*>>=
#include <config.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <pwd.h>				/* To find who we are */
#include <w3a.h>
#include <tcp.h>				/* connectTCP() */
#include <str.h>				/* String and heap functions */
#include <url.h>				/* URL parsing */
#include <mime.h>				/* Read/parse MIME headers */

static struct {
    W3ADocumentInfo info;
    FILE *f;
} *conn_info[FD_SETSIZE];
@

	[[send_HTTP_request]] sends a request to an HTTP server.
	Arguments are: [[f]] = the socket; [[selector]] = the thing
	that is requested; [[method]] = HTTP method to use;
	[[referer]] = URL of document with source of hyperlink.

	An auxiliary function [[send_HTRQ_headers]] sends the MIME
	headers for an HTTP request. The [[From:]] header should give
	the E-mail address of the user. The [[Accept:]] header gives a
	list of accepted file formats. There may be more than one
	Accept: header. [[User-Agent:]] and [[Referer:]] are also sent.

<<*>>=
#define HTTPVERSION "HTTP/1.0"			/* HTTP protocol version */
#define MAXHOSTNAMELEN 256			/* Name of local machine */


static void send_HTRQ_headers(FILE *f, const char *referer)
{
    struct passwd *pwent;			/* Info about user */
    struct hostent *phe;			/* Info about localhost */
    char host[MAXHOSTNAMELEN];			/* Name of local machine */
    W3ABrowserInfo info;			/* Accepted formats */
    int i;

    if ((pwent = getpwuid(getuid()))
	&& (gethostname(host, sizeof(host)) == 0)
	&& (phe = gethostbyname(host)))
	fprintf(f, "From: %s@%s\r\n", pwent->pw_name, phe->h_name);

    W3AbrowserInfo(&info);
    for (i = 0; i < info.nformats; i++)
	if (info.preferences[i] == 1.0)
	    fprintf(f, "Accept: %s\015\012", info.formats[i]);
	else
	    fprintf(f, "Accept: %s; q=%f\015\012", info.formats[i],
		    info.preferences[i]);
    fprintf(f, "User-Agent: %s\015\012", info.version);
    if (referer) fprintf(f, "Referer: %s\015\012", referer);
}


static Bool send_HTTP_request(FILE *f, URI uri, int method,
			      const char *referer)
{
    char *path, *search, *meth;

    path = strip2str(uri.path);
    search = uri.search ? strip2str(uri.search) : NULL;

    switch (method) {
    case GET_METHOD: meth = "GET"; break;
    case PUT_METHOD: meth = "PUT"; break;
    case POST_METHOD: meth = "POST"; break;
    case HEAD_METHOD: meth = "HEAD"; break;
    default: errno = EMETHOD; return FALSE;	/* Illegal method */
    }
    fprintf(f, "%s %s%s%s %s\r\n", meth, strip2str(uri.path),
	    search ? "?" : "", search ? search : "", HTTPVERSION);
    send_HTRQ_headers(f, referer);

    fprintf(f, "\r\n");				/* End of headers */
    fflush(f);					/* Make ready for read */
    return TRUE;
}
@

	The exported functions are: [[initHTTP]], [[openHTTP]],
	[[readHTTP]], [[writeHTTP]], [[infoHTTP]], [[closeHTTP]], and
	[[deleteHTTP]]. Deleting a document is not implemented yet.

<<*>>=
Bool initHTTP()
{
    /* Nothing to initialize */
}


int openHTTP(const char *url, int method, int flags, const char *referer)
{
    URI uri;
    char *host, *port;
    int s, i;
    FILE *f;
    char buf[BUFSIZ];
    MIME_header header;

    if (! URL_parse(url, &uri)) {
	errno = EURL;				/* Bad URL syntax */
	return -1;
    }

    port = uri.port ? strip2str(uri.port) : "80";
    host = strip2str(uri.host);
    if ((s = connectTCP(host, port)) == -1)
	return -1;				/* Could not connect */
    if (! (f = fdopen(s, "r+")))
	return -1;				/* I/O error */

    if (! send_HTTP_request(f, uri, method, referer))
	return -1;				/* Illegal method */

    if (! fgets(buf, sizeof(buf), f))		/* Read status line */
	return -1;				/* I/O error */

    new(conn_info[s]);
    conn_info[s]->info.url = newstring(url);
    conn_info[s]->info.mime_type = NULL;
    conn_info[s]->info.mime_params = NULL;
    conn_info[s]->info.title = NULL;
    conn_info[s]->info.referer = newstring(referer);
    conn_info[s]->info.status = NULL;

    if (! n_eq(buf, "HTTP", 4)) {		/* Old server */
	conn_info[s]->info.mime_type = newstring("text/html");
	/* Sorry, we loose the first line... */
    } else {					/* HTTP/1.0 or newer */
	for (i = 4; !isspace(buf[i]); i++) ;
	for (; buf[i] && isspace(buf[i]); i++) ;
	conn_info[s]->info.status = newstring(buf + i);
    }
    read_header(f, &header, NULL);
    if (header.head[Title])
	conn_info[s]->info.title = newstring(header.head[Title]);
    if (header.head[Base])
	conn_info[s]->info.url = newstring(header.head[Base]);
    if (header.head[Content_Type])
	conn_info[s]->info.mime_type = newstring(header.head[Content_Type]);
    else
	conn_info[s]->info.mime_type = newstring("text/html");
    /* Something with mime_params, charset?... */

    if (flags & O_NONBLOCK)
	if (fcntl(s, F_SETFL, O_NONBLOCK) == -1)
	    return -1;				/* I/O error */

    conn_info[s]->f = f;
    return s;
}


int readHTTP(int fd, char *buf, size_t nbytes)
{
    assert(conn_info[fd]);
    return fread(buf, 1, nbytes, conn_info[fd]->f);
}


int writeHTTP(int fd, const char *buf, size_t nbytes)
{
    assert(conn_info[fd]);
    return fwrite(buf, 1, nbytes, conn_info[fd]->f);

}


Bool infoHTTP(int fd, W3ADocumentInfo *buf)
{
    buf->url = newstring(conn_info[fd]->info.url);
    buf->mime_type = newstring(conn_info[fd]->info.mime_type);
    buf->mime_params = newstring(conn_info[fd]->info.mime_params);
    buf->title = newstring(conn_info[fd]->info.title);
    buf->referer = newstring(conn_info[fd]->info.referer);
    buf->status = newstring(conn_info[fd]->info.status);
    return TRUE;
}


Bool closeHTTP(int fd)
{
    int status;

    assert(conn_info[fd]);
    status = fclose(conn_info[fd]->f);
    dispose(conn_info[fd]);
    return status != -1;
}


Bool deleteHTTP(const char *url)
{
    errno = ENYI;				/* Not yet implemented */
    return FALSE;
}
