ksgi
/
child.c
2264 строки · 52.7 Кб
1/* $Id$ */
2/*
3* Copyright (c) 2012, 2014--2020 Kristaps Dzonsons <kristaps@bsd.lv>
4*
5* Permission to use, copy, modify, and distribute this software for any
6* purpose with or without fee is hereby granted, provided that the above
7* copyright notice and this permission notice appear in all copies.
8*
9* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*/
17#include "config.h"18
19#include <arpa/inet.h>20
21#include <assert.h>22#include <ctype.h>23#include <errno.h>24#include <inttypes.h>25#include <limits.h>26#if HAVE_MD527# include <sys/types.h>28# include <md5.h>29#endif30#include <poll.h>31#include <stdarg.h>32#include <stdio.h>33#include <stddef.h>34#include <stdint.h>35#include <stdlib.h>36#include <string.h>37#include <unistd.h>38
39#include "kcgi.h"40#include "extern.h"41
42#define MD5Updatec(_ctx, _b, _sz) \43MD5Update((_ctx), (const uint8_t *)(_b), (_sz))44
45enum mimetype {46MIMETYPE_UNKNOWN,47MIMETYPE_TRANSFER_ENCODING,48MIMETYPE_DISPOSITION,49MIMETYPE_TYPE
50};51
52/*
53* For handling HTTP multipart forms.
54* This consists of data for a single multipart form entry.
55*/
56struct mime {57char *disp; /* content disposition */58char *name; /* name of form entry */59size_t namesz; /* size of "name" string */60char *file; /* whether a file was specified */61char *ctype; /* content type */62size_t ctypepos; /* position of ctype in mimes */63char *xcode; /* encoding type */64char *bound; /* form entry boundary */65};66
67/*
68* Both CGI and FastCGI use an environment for their HTTP parameters.
69* CGI gets it from the actual environment; FastCGI from a transmitted
70* environment.
71* We use an abstract representation of those key-value pairs here so
72* that we can use the same functions for both.
73*/
74struct env {75char *key; /* key (e.g., HTTP_HOST) */76size_t keysz;77char *val; /* value (e.g., `foo.com') */78size_t valsz;79};80
81/*
82* Types of FastCGI requests.
83* Defined in the FastCGI v1.0 spec, section 8.
84*/
85enum fcgi_type {86FCGI_BEGIN_REQUEST = 1,87FCGI_ABORT_REQUEST = 2,88FCGI_END_REQUEST = 3,89FCGI_PARAMS = 4,90FCGI_STDIN = 5,91FCGI_STDOUT = 6,92FCGI_STDERR = 7,93FCGI_DATA = 8,94FCGI_GET_VALUES = 9,95FCGI_GET_VALUES_RESULT = 10,96FCGI_UNKNOWN_TYPE = 11,97FCGI__MAX
98};99
100/*
101* The FastCGI `FCGI_Header' header layout.
102* Defined in the FastCGI v1.0 spec, section 8.
103*/
104struct fcgi_hdr {105uint8_t version;106uint8_t type;107uint16_t requestId;108uint16_t contentLength;109uint8_t paddingLength;110uint8_t reserved;111};112
113/*
114* The FastCGI `FCGI_BeginRequestBody' header layout.
115* Defined in the FastCGI v1.0 spec, section 8.
116*/
117struct fcgi_bgn {118uint16_t role;119uint8_t flags;120uint8_t res[5];121};122
123/*
124* A buffer of reads from kfcgi_control().
125*/
126struct fcgi_buf {127size_t sz; /* bytes in buffer */128size_t pos; /* current position (from last read) */129int fd; /* file descriptor */130char *buf; /* buffer itself */131};132
133/*
134* Parameters required to validate fields.
135*/
136struct parms {137int fd;138const char *const *mimes;139size_t mimesz;140const struct kvalid *keys;141size_t keysz;142enum input type;143};144
145const char *const kmethods[KMETHOD__MAX] = {146"ACL", /* KMETHOD_ACL */147"CONNECT", /* KMETHOD_CONNECT */148"COPY", /* KMETHOD_COPY */149"DELETE", /* KMETHOD_DELETE */150"GET", /* KMETHOD_GET */151"HEAD", /* KMETHOD_HEAD */152"LOCK", /* KMETHOD_LOCK */153"MKCALENDAR", /* KMETHOD_MKCALENDAR */154"MKCOL", /* KMETHOD_MKCOL */155"MOVE", /* KMETHOD_MOVE */156"OPTIONS", /* KMETHOD_OPTIONS */157"POST", /* KMETHOD_POST */158"PROPFIND", /* KMETHOD_PROPFIND */159"PROPPATCH", /* KMETHOD_PROPPATCH */160"PUT", /* KMETHOD_PUT */161"REPORT", /* KMETHOD_REPORT */162"TRACE", /* KMETHOD_TRACE */163"UNLOCK", /* KMETHOD_UNLOCK */164};165
166static const char *const krequs[KREQU__MAX] = {167"HTTP_ACCEPT", /* KREQU_ACCEPT */168"HTTP_ACCEPT_CHARSET", /* KREQU_ACCEPT_CHARSET */169"HTTP_ACCEPT_ENCODING", /* KREQU_ACCEPT_ENCODING */170"HTTP_ACCEPT_LANGUAGE", /* KREQU_ACCEPT_LANGUAGE */171"HTTP_AUTHORIZATION", /* KREQU_AUTHORIZATION */172"HTTP_DEPTH", /* KREQU_DEPTH */173"HTTP_FROM", /* KREQU_FROM */174"HTTP_HOST", /* KREQU_HOST */175"HTTP_IF", /* KREQU_IF */176"HTTP_IF_MODIFIED_SINCE", /* KREQU_IF_MODIFIED_SINCE */177"HTTP_IF_MATCH", /* KREQU_IF_MATCH */178"HTTP_IF_NONE_MATCH", /* KREQU_IF_NONE_MATCH */179"HTTP_IF_RANGE", /* KREQU_IF_RANGE */180"HTTP_IF_UNMODIFIED_SINCE", /* KREQU_IF_UNMODIFIED_SINCE */181"HTTP_MAX_FORWARDS", /* KREQU_MAX_FORWARDS */182"HTTP_PROXY_AUTHORIZATION", /* KREQU_PROXY_AUTHORIZATION */183"HTTP_RANGE", /* KREQU_RANGE */184"HTTP_REFERER", /* KREQU_REFERER */185"HTTP_USER_AGENT", /* KREQU_USER_AGENT */186};187
188static const char *const kauths[KAUTH_UNKNOWN] = {189NULL, /* KAUTH_NONE */190"basic", /* KAUTH_BASIC */191"digest", /* KAUTH_DIGEST */192"bearer", /* KAUTH_BEARER */193};194
195/*
196* Parse the type/subtype field out of a content-type.
197* The content-type is defined (among other places) in RFC 822, and is
198* either the whole string or up until the ';', which marks the
199* beginning of the parameters.
200*/
201static size_t202str2ctype(const struct parms *pp, const char *ctype)203{
204size_t i, sz;205
206if (NULL == ctype)207return(pp->mimesz);208
209/* Stop at the content-type parameters. */210sz = strcspn(ctype, ";");211
212for (i = 0; i < pp->mimesz; i++)213if (sz == strlen(pp->mimes[i]) &&2140 == strncasecmp(pp->mimes[i], ctype, sz))215break;216
217return(i);218}
219
220/*
221* Given a parsed field "key" with value "val" of size "valsz" and MIME
222* information "mime", first try to look it up in the array of
223* recognised keys ("pp->keys") and optionally validate.
224* Then output the type, parse status (key, type, etc.), and values read
225* by the parent input() function.
226*/
227static void228output(const struct parms *pp, char *key,229char *val, size_t valsz, struct mime *mime)230{
231size_t i;232ptrdiff_t diff;233char *save;234struct kpair pair;235
236memset(&pair, 0, sizeof(struct kpair));237
238pair.key = key;239pair.val = save = val;240pair.valsz = valsz;241pair.file = NULL == mime ? NULL : mime->file;242pair.ctype = NULL == mime ? NULL : mime->ctype;243pair.xcode = NULL == mime ? NULL : mime->xcode;244pair.ctypepos = NULL == mime ? pp->mimesz : mime->ctypepos;245pair.type = KPAIR__MAX;246
247/*248* Look up the key name in our key table.
249* If we find it and it has a validator, then run the validator
250* and record the output.
251* If we fail, reset the type and clear the results.
252* Either way, the keypos parameter is going to be the key
253* identifier or keysz if none is found.
254*/
255
256for (i = 0; i < pp->keysz; i++) {257if (strcmp(pp->keys[i].name, pair.key))258continue;259if (NULL == pp->keys[i].valid)260break;261if ( ! pp->keys[i].valid(&pair)) {262pair.state = KPAIR_INVALID;263pair.type = KPAIR__MAX;264memset(&pair.parsed, 0, sizeof(union parsed));265} else266pair.state = KPAIR_VALID;267break;268}269pair.keypos = i;270
271fullwrite(pp->fd, &pp->type, sizeof(enum input));272fullwriteword(pp->fd, pair.key);273fullwrite(pp->fd, &pair.valsz, sizeof(size_t));274fullwrite(pp->fd, pair.val, pair.valsz);275fullwrite(pp->fd, &pair.state, sizeof(enum kpairstate));276fullwrite(pp->fd, &pair.type, sizeof(enum kpairtype));277fullwrite(pp->fd, &pair.keypos, sizeof(size_t));278
279if (KPAIR_VALID == pair.state)280switch (pair.type) {281case (KPAIR_DOUBLE):282fullwrite(pp->fd,283&pair.parsed.d, sizeof(double));284break;285case (KPAIR_INTEGER):286fullwrite(pp->fd,287&pair.parsed.i, sizeof(int64_t));288break;289case (KPAIR_STRING):290assert(pair.parsed.s >= pair.val);291assert(pair.parsed.s <= pair.val + pair.valsz);292diff = pair.val - pair.parsed.s;293fullwrite(pp->fd, &diff, sizeof(ptrdiff_t));294break;295default:296break;297}298
299fullwriteword(pp->fd, pair.file);300fullwriteword(pp->fd, pair.ctype);301fullwrite(pp->fd, &pair.ctypepos, sizeof(size_t));302fullwriteword(pp->fd, pair.xcode);303
304/*305* We can write a new "val" in the validator allocated on the
306* heap: if we do, free it here.
307*/
308
309if (save != pair.val)310free(pair.val);311}
312
313/*
314* Read full stdin request into memory.
315* This reads at most "len" bytes and NUL-terminates the results, the
316* length of which may be less than "len" and is stored in *szp if not
317* NULL.
318* Returns the pointer to the data.
319* NOTE: we can't use fullread() here because we may not get the total
320* number of bytes requested.
321* NOTE: "szp" can legit be set to zero.
322*/
323static char *324scanbuf(size_t len, size_t *szp)325{
326ssize_t ssz;327size_t sz;328char *p;329int rc;330struct pollfd pfd;331
332pfd.fd = STDIN_FILENO;333pfd.events = POLLIN;334
335/* Allocate the entire buffer here. */336
337if ((p = kxmalloc(len + 1)) == NULL)338_exit(EXIT_FAILURE);339
340/*341* Keep reading til we get all the data or the sender stops
342* giving us data---whichever comes first.
343* Use kutil_warn[x] and _exit to avoid flushing buffers.
344*/
345
346for (sz = 0; sz < len; sz += (size_t)ssz) {347if ((rc = poll(&pfd, 1, INFTIM)) < 0) {348kutil_warn(NULL, NULL, "poll");349_exit(EXIT_FAILURE);350} else if (0 == rc) {351kutil_warnx(NULL, NULL, "poll: timeout!?");352ssz = 0;353continue;354}355
356if (!(pfd.revents & POLLIN))357break;358
359if ((ssz = read(STDIN_FILENO, p + sz, len - sz)) < 0) {360kutil_warn(NULL, NULL, "read");361_exit(EXIT_FAILURE);362} else if (ssz == 0)363break;364}365
366if (sz < len)367kutil_warnx(NULL, NULL, "content size mismatch: "368"have %zu while %zu specified", sz, len);369
370/* ALWAYS NUL-terminate. */371
372p[sz] = '\0';373
374if (szp != NULL)375*szp = sz;376
377return p;378}
379
380/*
381* Reset a particular mime component.
382* We can get duplicates, so reallocate.
383*/
384static void385mime_reset(char **dst, const char *src)386{
387
388free(*dst);389if ((*dst = kxstrdup(src)) == NULL)390_exit(EXIT_FAILURE);391}
392
393/*
394* Free up all MIME headers.
395* We might call this more than once, so make sure that it can be
396* invoked again by setting the memory to zero.
397*/
398static void399mime_free(struct mime *mime)400{
401
402free(mime->disp);403free(mime->name);404free(mime->file);405free(mime->ctype);406free(mime->xcode);407free(mime->bound);408memset(mime, 0, sizeof(struct mime));409}
410
411/*
412* Parse out all MIME headers.
413* This is defined by RFC 2045.
414* This returns TRUE if we've parsed up to (and including) the last
415* empty CRLF line, or FALSE if something has gone wrong (e.g., parse
416* error, out of memory).
417* If FALSE, parsing should stop immediately.
418*/
419static int420mime_parse(const struct parms *pp, struct mime *mime,421char *buf, size_t len, size_t *pos)422{
423char *key, *val, *keyend, *end, *start, *line;424enum mimetype type;425int rc = 0;426
427mime_free(mime);428
429while (*pos < len) {430/* Each MIME line ends with a CRLF. */431
432start = &buf[*pos];433end = memmem(start, len - *pos, "\r\n", 2);434if (end == NULL) {435kutil_warnx(NULL, NULL, "RFC error: "436"MIME header line without CRLF");437return 0;438}439
440/*441* NUL-terminate to make a nice line.
442* Then re-set our starting position.
443*/
444
445*end = '\0';446*pos += (end - start) + 2;447
448/* Empty CRLF line: we're done here! */449
450if (*start == '\0') {451rc = 1;452break;453}454
455/*456* Find end of MIME statement name.
457* The RFCs disagree on white-space before the colon,
458* but as it's allowed in the original RFC 822 and
459* obsolete syntax should be supported, we do so here.
460*/
461
462key = start;463if ((val = strchr(key, ':')) == NULL) {464kutil_warnx(NULL, NULL, "RFC error: "465"MIME header without colon separator");466return 0;467} else if (key != val) {468keyend = val - 1;469while (keyend >= key && *keyend == ' ')470*keyend-- = '\0';471}472
473*val++ = '\0';474while (*val == ' ')475val++;476
477if (*key == '\0')478kutil_warnx(NULL, NULL, "RFC "479"warning: empty MIME header name");480
481/*482* Set "line" to be at the MIME value subpart, for
483* example, "Content-type: text/plain; charset=us-ascii"
484* would put us at the parts before "charset".
485*/
486
487line = NULL;488if ((line = strchr(val, ';')) != NULL)489*line++ = '\0';490
491/*492* Allow these specific MIME header statements.
493* We'll follow up by parsing specific information from
494* the header values, so remember what we parsed.
495*/
496
497if (strcasecmp(key, "content-transfer-encoding") == 0) {498mime_reset(&mime->xcode, val);499type = MIMETYPE_TRANSFER_ENCODING;500} else if (strcasecmp(key, "content-disposition") == 0) {501mime_reset(&mime->disp, val);502type = MIMETYPE_DISPOSITION;503} else if (strcasecmp(key, "content-type") == 0) {504mime_reset(&mime->ctype, val);505type = MIMETYPE_TYPE;506} else507type = MIMETYPE_UNKNOWN;508
509/*510* Process subpart only for content-type and
511* content-disposition.
512* The rest have no information we want: silently ignore them.
513*/
514
515if (type != MIMETYPE_TYPE &&516type != MIMETYPE_DISPOSITION)517continue;518
519while ((key = line) != NULL) {520while (*key == ' ')521key++;522if (*key == '\0')523break;524
525/*526* It's not clear whether we're allowed to have
527* OWS before the separator, but allow for it
528* anyway.
529*/
530
531if ((val = strchr(key, '=')) == NULL) {532kutil_warnx(NULL, NULL, "RFC error: "533"MIME header without sub-part "534"separator");535return 0;536} else if (key != val) {537keyend = val - 1;538while (keyend >= key && *keyend == ' ')539*keyend-- = '\0';540}541
542*val++ = '\0';543
544if (*key == '\0')545kutil_warnx(NULL, NULL, "RFC warning: "546"empty MIME sub-part name");547
548/* Quoted string. */549
550if (*val == '"') {551val++;552line = strchr(val, '"');553if (line == NULL) {554kutil_warnx(NULL, NULL, "RFC "555"error: quoted MIME "556"header sub-part not "557"terminated");558return 0;559}560*line++ = '\0';561
562/*563* It's unclear as to whether this is
564* allowed (white-space before the
565* semicolon separator), but let's
566* accommodate for it anyway.
567*/
568
569while (*line == ' ')570line++;571if (*line == ';')572line++;573} else if ((line = strchr(val, ';')) != NULL)574*line++ = '\0';575
576/* White-listed sub-commands. */577
578if (type == MIMETYPE_DISPOSITION) {579if (strcasecmp(key, "filename") == 0)580mime_reset(&mime->file, val);581else if (strcasecmp(key, "name") == 0)582mime_reset(&mime->name, val);583} else if (type == MIMETYPE_TYPE) {584if (strcasecmp(key, "boundary") == 0)585mime_reset(&mime->bound, val);586}587}588}589
590mime->ctypepos = str2ctype(pp, mime->ctype);591
592if (!rc)593kutil_warnx(NULL, NULL, "RFC error: unexpected "594"end of file while parsing MIME headers");595
596return rc;597}
598
599/*
600* Parse keys and values separated by newlines.
601* I'm not aware of any standard that defines this, but the W3
602* guidelines for HTML give a rough idea.
603* FIXME: deprecate this.
604*/
605static void606parse_pairs_text(const struct parms *pp, char *p)607{
608char *key, *val;609
610kutil_warnx(NULL, NULL, "RFC warning: "611"text/plain encoding is deprecated");612
613while (p != NULL && *p != '\0') {614while (*p == ' ')615p++;616
617/*618* Key/value pair.
619* No value is a warning (not processed).
620*/
621
622key = p;623val = NULL;624if (NULL != (p = strchr(p, '='))) {625*p++ = '\0';626val = p;627if ((p = strstr(val, "\r\n")) != NULL) {628*p = '\0';629p += 2;630}631} else {632if ((p = strstr(key, "\r\n")) != NULL) {633*p = '\0';634p += 2;635}636kutil_warnx(NULL, NULL, "RFC warning: "637"key with no value");638continue;639}640
641if (*key == '\0')642kutil_warnx(NULL, NULL, "RFC warning: "643"zero-length key");644else645output(pp, key, val, strlen(val), NULL);646}647}
648
649/*
650* Parse an HTTP message that has a given content-type.
651* This happens with, e.g., PUT requests.
652* We fake up a "name" for this (it's not really a key-value pair) of an
653* empty string, then pass that to the validator and forwarder.
654*/
655static void656parse_body(const char *ct, const struct parms *pp, char *b, size_t bsz)657{
658char name;659struct mime mime;660
661memset(&mime, 0, sizeof(struct mime));662
663if ((mime.ctype = kxstrdup(ct)) == NULL)664_exit(EXIT_FAILURE);665mime.ctypepos = str2ctype(pp, mime.ctype);666
667name = '\0';668output(pp, &name, b, bsz, &mime);669free(mime.ctype);670}
671
672/*
673* Parse out key-value pairs from an HTTP cookie.
674* These are not URL encoded (at this phase): they're just simple
675* key-values "crumbs" with opaque values.
676* This is defined by RFC 6265, however, we don't [yet] do the
677* quoted-string implementation, nor do we check for accepted
678* characters so long as the delimiters aren't used.
679*/
680static void681parse_pairs(const struct parms *pp, char *p)682{
683char *key, *val;684
685while (p != NULL && *p != '\0') {686while (*p == ' ')687p++;688
689/*690* Don't allow key-pair without a value.
691* Keys shouldn't be zero-length.
692*/
693
694key = p;695val = NULL;696if ((p = strchr(p, '=')) != NULL) {697*p++ = '\0';698val = p;699if ((p = strchr(p, ';')) != NULL)700*p++ = '\0';701} else {702if ((p = strchr(key, ';')) != NULL)703p++;704kutil_warnx(NULL, NULL, "RFC error: "705"cookie key pair without value");706continue;707}708
709/* This is sort-of allowed. */710
711if (*key == '\0')712kutil_warnx(NULL, NULL, "RFC warning: "713"cookie zero-length key");714else715output(pp, key, val, strlen(val), NULL);716}717}
718
719/*
720* Parse out key-value pairs from an HTTP request variable.
721* This is either a POST or GET string.
722* This MUST be a non-binary (i.e., NUL-terminated) string!
723*/
724static void725parse_pairs_urlenc(const struct parms *pp, char *p)726{
727char *key, *val;728
729assert(p != NULL);730
731while (*p != '\0') {732while (*p == ' ')733p++;734
735key = p;736
737/*738* Look ahead to either '=' or one of the key-value
739* terminators (or the end of the string).
740* If we have the equal sign, then we're a key-value
741* pair; otherwise, we're a standalone key value.
742*/
743
744p += strcspn(p, "=;&");745
746if (*p == '=') {747*p++ = '\0';748val = p;749p += strcspn(p, ";&");750} else751val = p;752
753if (*p != '\0')754*p++ = '\0';755
756/*757* Both the key and the value can be URL encoded, so
758* decode those into the character string now.
759* If decoding fails, don't decode the given pair, but
760* instead move on to the next one after logging the
761* failure.
762*/
763
764if (*key == '\0')765kutil_warnx(NULL, NULL, "RFC warning: "766"zero-length URL-encoded key");767else if (khttp_urldecode_inplace(key) == KCGI_FORM)768kutil_warnx(NULL, NULL, "RFC warning: "769"malformed key URL-encoding");770else if (khttp_urldecode_inplace(val) == KCGI_FORM)771kutil_warnx(NULL, NULL, "RFC warning: "772"malformed value URL-encoding");773else774output(pp, key, val, strlen(val), NULL);775}776}
777
778/*
779* This is described by the "multipart-body" BNF part of RFC 2046,
780* section 5.1.1.
781* We return TRUE if the parse was ok, FALSE if errors occurred (all
782* calling parsers should bail too).
783*/
784static int785parse_multiform(const struct parms *pp, char *name,786const char *bound, char *buf, size_t len, size_t *pos)787{
788struct mime mime;789size_t endpos, bbsz, partsz;790char *ln, *bb;791int rc, first;792
793/* Define our buffer boundary. */794
795if ((rc = kxasprintf(&bb, "\r\n--%s", bound)) == -1)796_exit(EXIT_FAILURE);797
798assert(rc > 0);799bbsz = rc;800rc = 0;801
802memset(&mime, 0, sizeof(struct mime));803
804/* Read to the next instance of a buffer boundary. */805
806for (first = 1; *pos < len; first = 0, *pos = endpos) {807/*808* The (first ? 2 : 0) is because the first prologue
809* boundary will not incur an initial CRLF, so our bb is
810* past the CRLF and two bytes smaller.
811*/
812
813ln = memmem(&buf[*pos], len - *pos,814bb + (first ? 2 : 0),815bbsz - (first ? 2 : 0));816
817if (ln == NULL) {818kutil_warnx(NULL, NULL, "RFC error: "819"EOF when scanning for boundary");820goto out;821}822
823/*824* Set "endpos" to point to the beginning of the next
825* multipart component, i.e, the end of the boundary
826* "bb" string.
827* Again, be respectful of whether we should scan after
828* the lack of initial CRLF.
829*/
830
831endpos = *pos + (ln - &buf[*pos]) +832bbsz - (first ? 2 : 0);833
834/* Check buffer space. */835
836if (endpos > len - 2) {837kutil_warnx(NULL, NULL, "RFC error: multipart "838"section writes into trailing CRLF");839goto out;840}841
842/*843* Terminating boundary has an initial trailing "--".
844* If not terminating, must be followed by a CRLF.
845* If terminating, RFC 1341 says we can ignore whatever
846* comes after the last boundary.
847*/
848
849if (memcmp(&buf[endpos], "--", 2)) {850while (endpos < len && buf[endpos] == ' ')851endpos++;852if (endpos > len - 2 ||853memcmp(&buf[endpos], "\r\n", 2)) {854kutil_warnx(NULL, NULL, "RFC error: "855"multipart boundary without "856"CRLF");857goto out;858}859endpos += 2;860} else861endpos = len;862
863/* First section: jump directly to reprocess. */864
865if (first)866continue;867
868/*869* Zero-length part.
870* This shouldn't occur, but if it does, it'll screw up
871* the MIME parsing (which requires a blank CRLF before
872* considering itself finished).
873*/
874
875if ((partsz = ln - &buf[*pos]) == 0) {876kutil_warnx(NULL, NULL, "RFC error: "877"zero-length multipart section");878continue;879}880
881/* We now read our MIME headers, bailing on error. */882
883if (!mime_parse(pp, &mime, buf, *pos + partsz, pos)) {884kutil_warnx(NULL, NULL, "RFC error: "885"nested error parsing MIME headers");886goto out;887}888
889/*890* As per RFC 2388, we need a name and disposition.
891* Note that multipart/mixed bodies will inherit the
892* name of their parent, so the mime.name is ignored.
893*/
894
895if (mime.name == NULL && name == NULL) {896kutil_warnx(NULL, NULL,897"RFC error: no MIME name");898continue;899} else if (mime.disp == NULL) {900kutil_warnx(NULL, NULL,901"RFC error: no MIME disposition");902continue;903}904
905/*906* As per RFC 2045, we default to text/plain.
907* We then re-lookup the ctypepos after doing so.
908*/
909
910if (mime.ctype == NULL) {911mime.ctype = kxstrdup("text/plain");912if (mime.ctype == NULL)913_exit(EXIT_FAILURE);914mime.ctypepos = str2ctype(pp, mime.ctype);915}916
917partsz = ln - &buf[*pos];918
919/*920* Multipart sub-handler.
921* We only recognise the multipart/mixed handler.
922* This will route into our own function, inheriting the
923* current name for content.
924*/
925
926if (strcasecmp(mime.ctype, "multipart/mixed") == 0) {927if (mime.bound == NULL) {928kutil_warnx(NULL, NULL, "RFC error: "929"no mixed multipart boundary");930goto out;931}932if (!parse_multiform(pp,933name != NULL ? name : mime.name,934mime.bound, buf, *pos + partsz, pos)) {935kutil_warnx(NULL, NULL, "RFC error: "936"nested error parsing mixed "937"multipart section");938goto out;939}940continue;941}942
943assert(buf[*pos + partsz] == '\r' ||944buf[*pos + partsz] == '\0');945
946if (buf[*pos + partsz] != '\0')947buf[*pos + partsz] = '\0';948
949/* Assign all of our key-value pair data. */950
951output(pp, name != NULL ? name : mime.name,952&buf[*pos], partsz, &mime);953}954
955/*956* According to the specification, we can have transport
957* padding, a CRLF, then the epilogue.
958* But since we don't care about that crap, just pretend that
959* everything's fine and exit.
960*/
961
962rc = 1;963out:964free(bb);965mime_free(&mime);966return rc;967}
968
969/*
970* Parse the boundary from a multipart CONTENT_TYPE and pass it to the
971* actual parsing engine.
972* This doesn't actually handle any part of the MIME specification.
973*/
974static void975parse_multi(const struct parms *pp, char *line, char *b, size_t bsz)976{
977char *cp;978size_t len = 0;979
980while (*line == ' ')981line++;982
983if (*line++ != ';') {984kutil_warnx(NULL, NULL, "RFC error: expected "985"semicolon following multipart declaration");986return;987}988
989while (*line == ' ')990line++;991
992/* We absolutely need the boundary marker. */993
994if (strncmp(line, "boundary", 8)) {995kutil_warnx(NULL, NULL, "RFC error: expected "996"boundary following multipart declaration");997return;998}999
1000line += 8;1001
1002while (*line == ' ')1003line++;1004
1005if (*line++ != '=') {1006kutil_warnx(NULL, NULL, "RFC error: expected "1007"key-value for multipart boundary");1008return;1009}1010
1011while (*line == ' ')1012line++;1013
1014/*1015* Make sure the line is terminated in the right place.
1016* XXX: if it's not, what we do may not properly follow RFC
1017* 2046, 5.1.1, which specifically lays out the boundary
1018* characters.
1019* We simply jump to the first whitespace.
1020*/
1021
1022if (*line == '"') {1023if ((cp = strchr(++line, '"')) == NULL) {1024kutil_warnx(NULL, NULL, "RFC error: "1025"unterminated boundary quoted string");1026return;1027}1028*cp = '\0';1029} else1030line[strcspn(line, " ")] = '\0';1031
1032/*1033* If we have data following the boundary declaration, we simply
1034* ignore it.
1035* The RFC mandates the existence of the boundary, but is silent
1036* as to whether anything can come after it.
1037*/
1038
1039parse_multiform(pp, NULL, line, b, bsz, &len);1040}
1041
1042/*
1043* Output all of the HTTP_xxx headers.
1044* This transforms the HTTP_xxx header (CGI form) into HTTP form, which
1045* is the second part title-cased, e.g., HTTP_FOO = Foo.
1046* Disallow zero-length values as per RFC 3875, 4.1.18.
1047*/
1048static void1049kworker_child_env(const struct env *env, int fd, size_t envsz)1050{
1051size_t i, j, sz, reqs;1052int first;1053enum krequ requ;1054char c;1055const char *cp;1056
1057for (reqs = i = 0; i < envsz; i++)1058if (strncmp(env[i].key, "HTTP_", 5) == 0 &&1059env[i].key[5] != '\0')1060reqs++;1061
1062fullwrite(fd, &reqs, sizeof(size_t));1063
1064/*1065* Process known headers (starting with HTTP_).
1066* We must have non-zero-length keys.
1067*/
1068
1069for (i = 0; i < envsz; i++) {1070if (strncmp(env[i].key, "HTTP_", 5) ||1071env[i].key[5] == '\0')1072continue;1073
1074for (requ = 0; requ < KREQU__MAX; requ++)1075if (strcmp(krequs[requ], env[i].key) == 0)1076break;1077
1078fullwrite(fd, &requ, sizeof(enum krequ));1079
1080/*1081* According to RFC 3875, 4.1.18, HTTP headers are
1082* re-written into CGI environment variables by
1083* uppercasing and converting dashes to underscores.
1084* In this part, we [try to] reverse that so that the
1085* headers are properly identified.
1086* (We also skip the HTTP_ leading part.)
1087*/
1088
1089sz = env[i].keysz - 5;1090cp = env[i].key + 5;1091fullwrite(fd, &sz, sizeof(size_t));1092
1093for (j = 0, first = 1; j < sz; j++) {1094if (cp[j] == '_') {1095c = '-';1096first = 1;1097} else if (first) {1098c = cp[j];1099first = 0;1100} else1101c = tolower((unsigned char)cp[j]);1102
1103fullwrite(fd, &c, 1);1104}1105
1106fullwrite(fd, &env[i].valsz, sizeof(size_t));1107fullwrite(fd, env[i].val, env[i].valsz);1108}1109}
1110
1111/*
1112* Like getenv() but for our env structure.
1113*/
1114static char *1115kworker_env(struct env *env, size_t envsz, const char *key)1116{
1117size_t i;1118
1119for (i = 0; i < envsz; i++)1120if (strcmp(env[i].key, key) == 0)1121return env[i].val;1122return NULL;1123}
1124
1125/*
1126* Output the method found in our environment.
1127* Returns the method.
1128* Defaults to KMETHOD_GET, uses KETHOD__MAX if the method was bad.
1129*/
1130static enum kmethod1131kworker_child_method(struct env *env, int fd, size_t envsz)1132{
1133enum kmethod meth;1134const char *cp;1135
1136/* RFC 3875, 4.1.12. */1137/* We assume GET if not supplied. */1138
1139meth = KMETHOD_GET;1140if ((cp = kworker_env(env, envsz, "REQUEST_METHOD")) != NULL)1141for (meth = 0; meth < KMETHOD__MAX; meth++)1142if (strcmp(kmethods[meth], cp) == 0)1143break;1144
1145fullwrite(fd, &meth, sizeof(enum kmethod));1146return meth;1147}
1148
1149/*
1150* Output the web server's authentication.
1151* Defaults to KAUTH_NONE.
1152*/
1153static void1154kworker_child_auth(struct env *env, int fd, size_t envsz)1155{
1156enum kauth auth = KAUTH_NONE;1157const char *cp;1158
1159/* Determine authentication: RFC 3875, 4.1.1. */1160
1161if ((cp = kworker_env(env, envsz, "AUTH_TYPE")) != NULL)1162for (auth = 0; auth < KAUTH_UNKNOWN; auth++) {1163if (kauths[auth] == NULL)1164continue;1165if (strcmp(kauths[auth], cp) == 0)1166break;1167}1168
1169fullwrite(fd, &auth, sizeof(enum kauth));1170}
1171
1172/*
1173* Send the raw (i.e., un-webserver-filtered) authorisation to the
1174* parent.
1175* Most web servers will `handle this for us'. Ugh.
1176*/
1177static int1178kworker_child_rawauth(struct env *env, int fd, size_t envsz)1179{
1180
1181return kworker_auth_child(fd,1182kworker_env(env, envsz, "HTTP_AUTHORIZATION"));1183}
1184
1185/*
1186* Send our HTTP scheme (secure or not) to the parent.
1187*/
1188static void1189kworker_child_scheme(struct env *env, int fd, size_t envsz)1190{
1191const char *cp;1192enum kscheme scheme;1193
1194/*1195* This isn't defined in any RFC.
1196* It seems to be the best way of getting whether we're HTTPS,
1197* as the SERVER_PROTOCOL (RFC 3875, 4.1.16) doesn't reliably
1198* return the scheme.
1199*/
1200
1201if ((cp = kworker_env(env, envsz, "HTTPS")) == NULL)1202cp = "off";1203
1204scheme = strcasecmp(cp, "on") == 0 ?1205KSCHEME_HTTPS : KSCHEME_HTTP;1206fullwrite(fd, &scheme, sizeof(enum kscheme));1207}
1208
1209/*
1210* Send remote address to the parent.
1211* This is required by RFC 3875, 4.1.8.
1212* Use 127.0.0.1 on protocol violation.
1213*/
1214static void1215kworker_child_remote(struct env *env, int fd, size_t envsz)1216{
1217const char *cp;1218
1219if ((cp = kworker_env(env, envsz, "REMOTE_ADDR")) == NULL) {1220kutil_warnx(NULL, NULL, "RFC warning: "1221"remote address not set");1222cp = "127.0.0.1";1223}1224
1225fullwriteword(fd, cp);1226}
1227
1228/*
1229* Parse and send the port to the parent.
1230* This is required by RFC 3875, 4.1.15.
1231* Use port 80 if not provided or on parse error.
1232*/
1233static void1234kworker_child_port(struct env *env, int fd, size_t envsz)1235{
1236uint16_t port = 80;1237const char *cp, *er;1238
1239if ((cp = kworker_env(env, envsz, "SERVER_PORT")) != NULL) {1240port = strtonum(cp, 0, UINT16_MAX, &er);1241if (er != NULL) {1242kutil_warnx(NULL, NULL, "RFC warning: "1243"invalid server port value");1244port = 80;1245}1246} else1247kutil_warnx(NULL, NULL, "RFC warning: "1248"server port not set");1249
1250fullwrite(fd, &port, sizeof(uint16_t));1251}
1252
1253/*
1254* Send requested host to the parent.
1255* This is required by RFC 7230, 5.4.
1256* Use "localhost" if not provided.
1257*/
1258static void1259kworker_child_httphost(struct env *env, int fd, size_t envsz)1260{
1261const char *cp;1262
1263if ((cp = kworker_env(env, envsz, "HTTP_HOST")) == NULL) {1264kutil_warnx(NULL, NULL, "RFC warning: host not set");1265cp = "localhost";1266}1267
1268fullwriteword(fd, cp);1269}
1270
1271/*
1272* Send script name to the parent.
1273* This is required by RFC 3875, 4.1.13.
1274* Use the empty string on error.
1275*/
1276static void1277kworker_child_scriptname(struct env *env, int fd, size_t envsz)1278{
1279const char *cp;1280
1281if ((cp = kworker_env(env, envsz, "SCRIPT_NAME")) == NULL) {1282kutil_warnx(NULL, NULL, "RFC warning: "1283"script name not set");1284cp = "";1285}1286
1287fullwriteword(fd, cp);1288}
1289
1290/*
1291* Parse all path information (subpath, path, etc.) and send to parent.
1292*/
1293static void1294kworker_child_path(struct env *env, int fd, size_t envsz)1295{
1296char *cp, *ep, *sub;1297size_t len;1298
1299/*1300* Parse the first path element (the page we want to access),
1301* subsequent path information, and the file suffix. We convert
1302* suffix and path element into the respective enum's inline.
1303*/
1304
1305cp = kworker_env(env, envsz, "PATH_INFO");1306fullwriteword(fd, cp);1307
1308/* This isn't possible in the real world. */1309
1310if (cp != NULL && *cp == '/')1311cp++;1312
1313if (cp != NULL && *cp != '\0') {1314ep = cp + strlen(cp) - 1;1315while (ep > cp && *ep != '/' && *ep != '.')1316ep--;1317
1318/* Start with writing our suffix. */1319
1320if (*ep == '.') {1321*ep++ = '\0';1322fullwriteword(fd, ep);1323} else1324fullwriteword(fd, NULL);1325
1326/* Now find the top-most path part. */1327
1328if ((sub = strchr(cp, '/')) != NULL)1329*sub++ = '\0';1330
1331/* Send the base path. */1332
1333fullwriteword(fd, cp);1334
1335/* Send the path part. */1336
1337fullwriteword(fd, sub);1338} else {1339len = 0;1340
1341/* Suffix, base path, and path part. */1342
1343fullwrite(fd, &len, sizeof(size_t));1344fullwrite(fd, &len, sizeof(size_t));1345fullwrite(fd, &len, sizeof(size_t));1346}1347}
1348
1349/*
1350* Construct the body hash component of an HTTP digest hash.
1351* See khttpdigest_validatehash(3) for where this is used.
1352* See RFC 2617.
1353* We only do this if our authorisation requires it!
1354*/
1355static void1356kworker_child_bodymd5(int fd, const char *b, size_t bsz, int md5)1357{
1358MD5_CTX ctx;1359unsigned char hab[MD5_DIGEST_LENGTH];1360size_t sz;1361
1362if (!md5) {1363sz = 0;1364fullwrite(fd, &sz, sizeof(size_t));1365return;1366}1367
1368MD5Init(&ctx);1369MD5Updatec(&ctx, b, bsz);1370MD5Final(hab, &ctx);1371
1372/* This is a binary write! */1373
1374sz = MD5_DIGEST_LENGTH;1375fullwrite(fd, &sz, sizeof(size_t));1376fullwrite(fd, hab, sz);1377}
1378
1379/*
1380* Parse and send the body of the request to the parent.
1381* This is arguably the most complex part of the system.
1382*/
1383static void1384kworker_child_body(struct env *env, int fd, size_t envsz,1385struct parms *pp, enum kmethod meth, char *b,1386size_t bsz, unsigned int debugging, int md5)1387{
1388size_t i, len = 0, sz;1389char *cp, *bp = b;1390const char *end;1391int wrap;1392
1393/*1394* The CONTENT_LENGTH must be a valid integer.
1395* Since we're storing into "len", make sure it's in size_t.
1396* If there's an error, it will default to zero.
1397* Note that LLONG_MAX < SIZE_MAX.
1398* RFC 3875, 4.1.2.
1399*/
1400
1401if ((cp = kworker_env(env, envsz, "CONTENT_LENGTH")) != NULL)1402len = strtonum(cp, 0, LLONG_MAX, NULL);1403
1404/* If zero, remember to print our MD5 value. */1405
1406if (len == 0) {1407kworker_child_bodymd5(fd, "", 0, md5);1408return;1409}1410
1411/* Check FastCGI input lengths. */1412
1413if (bp != NULL && bsz != len)1414kutil_warnx(NULL, NULL, "RFC warning: real (%zu) "1415"and reported (%zu) content lengths differ",1416bsz, len);1417
1418/*1419* If a CONTENT_TYPE has been specified (i.e., POST or GET has
1420* been set -- we don't care which), then switch on that type
1421* for parsing out key value pairs.
1422* RFC 3875, 4.1.3.
1423* HTML5, 4.10.
1424* We only support the main three content types.
1425*/
1426
1427pp->type = IN_FORM;1428cp = kworker_env(env, envsz, "CONTENT_TYPE");1429
1430/*1431* If we're CGI, read the request now.
1432* Note that the "bsz" can come out as zero.
1433*/
1434
1435if (b == NULL)1436b = scanbuf(len, &bsz);1437
1438assert(b != NULL);1439
1440/* If requested, print our MD5 value. */1441
1442kworker_child_bodymd5(fd, b, bsz, md5);1443
1444/*1445* If we're debugging read bodies, emit the body line by line
1446* (or split at the 80-character mark).
1447*/
1448
1449if (bsz && (debugging & KREQ_DEBUG_READ_BODY)) {1450i = 0;1451do {1452if ((end = memchr(&b[i], '\n', bsz - i)) == NULL)1453sz = bsz - i;1454else1455sz = (size_t)(end - &b[i]);1456if ((wrap = sz > 80))1457sz = 80;1458kutil_info(NULL, NULL, "%lu-rx: %.*s%s",1459(unsigned long)getpid(), (int)sz,1460&b[i], wrap ? "..." : "");1461
1462i += wrap ? sz : sz + 1;1463} while (i < bsz);1464kutil_info(NULL, NULL, "%lu-rx: %zu B",1465(unsigned long)getpid(), bsz);1466}1467
1468if (cp != NULL) {1469if (strcasecmp(cp, "application/x-www-form-urlencoded") == 0)1470parse_pairs_urlenc(pp, b);1471else if (strncasecmp(cp, "multipart/form-data", 19) == 0)1472parse_multi(pp, cp + 19, b, bsz);1473else if (meth == KMETHOD_POST && strcasecmp(cp, "text/plain") == 0)1474parse_pairs_text(pp, b);1475else1476parse_body(cp, pp, b, bsz);1477} else1478parse_body(kmimetypes[KMIME_APP_OCTET_STREAM], pp, b, bsz);1479
1480/* Free CGI parsed buffer (FastCGI is done elsewhere). */1481
1482if (bp == NULL)1483free(b);1484}
1485
1486/*
1487* Send query string data to parent.
1488* Even POST requests are allowed to have QUERY_STRING elements.
1489* Note: both QUERY_STRING and CONTENT_TYPE fields share the same field
1490* space.
1491*/
1492static void1493kworker_child_query(struct env *env,1494int fd, size_t envsz, struct parms *pp)1495{
1496char *cp;1497
1498pp->type = IN_QUERY;1499if (NULL != (cp = kworker_env(env, envsz, "QUERY_STRING")))1500parse_pairs_urlenc(pp, cp);1501}
1502
1503/*
1504* Send cookies to our parent.
1505* These use the same syntax as QUERY_STRING elements, but don't share
1506* the same namespace (just as a means to differentiate the same names).
1507*/
1508static void1509kworker_child_cookies(struct env *env,1510int fd, size_t envsz, struct parms *pp)1511{
1512char *cp;1513
1514pp->type = IN_COOKIE;1515if ((cp = kworker_env(env, envsz, "HTTP_COOKIE")) != NULL)1516parse_pairs(pp, cp);1517}
1518
1519/*
1520* Terminate the input fields for the parent.
1521*/
1522static void1523kworker_child_last(int fd)1524{
1525enum input last = IN__MAX;1526
1527fullwrite(fd, &last, sizeof(enum input));1528}
1529
1530/*
1531* This is the child kcgi process that's going to do the unsafe reading
1532* of network data to parse input.
1533* When it parses a field, it outputs the key, key size, value, and
1534* value size along with the field type.
1535* We use the CGI specification in RFC 3875.
1536*/
1537enum kcgi_err1538kworker_child(int wfd,1539const struct kvalid *keys, size_t keysz,1540const char *const *mimes, size_t mimesz,1541unsigned int debugging)1542{
1543struct parms pp;1544char *cp;1545const char *start;1546char **evp;1547int md5;1548enum kmethod meth;1549size_t i;1550extern char **environ;1551struct env *envs = NULL;1552size_t envsz;1553
1554pp.fd = wfd;1555pp.keys = keys;1556pp.keysz = keysz;1557pp.mimes = mimes;1558pp.mimesz = mimesz;1559
1560/*1561* Pull the entire environment into an array.
1562*/
1563for (envsz = 0, evp = environ; NULL != *evp; evp++)1564envsz++;1565
1566if (envsz) {1567envs = kxcalloc(envsz, sizeof(struct env));1568if (envs == NULL)1569return KCGI_ENOMEM;1570}1571
1572/*1573* Pull all reasonable values from the environment into "envs".
1574* Filter out variables that don't meet RFC 3875, section 4.1.
1575* However, we're a bit more relaxed: we don't let through
1576* zero-length, non-ASCII, control characters, and whitespace.
1577*/
1578
1579for (i = 0, evp = environ; *evp != NULL; evp++) {1580if ((cp = strchr(*evp, '=')) == NULL || cp == *evp)1581continue;1582for (start = *evp; *start != '='; start++)1583if (!isascii((unsigned char)*start) ||1584!isgraph((unsigned char)*start))1585break;1586
1587/*1588* This means something is seriously wrong, so make sure
1589* that the operator knows.
1590*/
1591
1592if (*start != '=') {1593kutil_warnx(NULL, NULL, "RFC warning: "1594"bad character in environment pair");1595continue;1596}1597
1598assert(i < envsz);1599
1600if ((envs[i].key = kxstrdup(*evp)) == NULL)1601_exit(EXIT_FAILURE);1602envs[i].val = strchr(envs[i].key, '=');1603*envs[i].val++ = '\0';1604envs[i].keysz = strlen(envs[i].key);1605envs[i].valsz = strlen(envs[i].val);1606i++;1607}1608
1609/* Reset this, accounting for crappy entries. */1610
1611envsz = i;1612
1613/*1614* Now run a series of transmissions based upon what's in our
1615* environment.
1616*/
1617
1618kworker_child_env(envs, wfd, envsz);1619meth = kworker_child_method(envs, wfd, envsz);1620kworker_child_auth(envs, wfd, envsz);1621md5 = kworker_child_rawauth(envs, wfd, envsz);1622kworker_child_scheme(envs, wfd, envsz);1623kworker_child_remote(envs, wfd, envsz);1624kworker_child_path(envs, wfd, envsz);1625kworker_child_scriptname(envs, wfd, envsz);1626kworker_child_httphost(envs, wfd, envsz);1627kworker_child_port(envs, wfd, envsz);1628
1629/* And now the message body itself. */1630
1631kworker_child_body(envs, wfd, envsz,1632&pp, meth, NULL, 0, debugging, md5);1633kworker_child_query(envs, wfd, envsz, &pp);1634kworker_child_cookies(envs, wfd, envsz, &pp);1635kworker_child_last(wfd);1636
1637/* Note: the "val" is from within the key. */1638
1639for (i = 0; i < envsz; i++)1640free(envs[i].key);1641free(envs);1642return KCGI_OK;1643}
1644
1645/*
1646* Reads from the FastCGI control process, kfcgi_control(), are buffered
1647* according to what the control process can read from the web server.
1648* Here we read ahead til we have enough data for what currently needs
1649* to be read.
1650* Returns a pointer to the data of size "sz" or NULL if errors occured.
1651* If error is KCGI_OK, this *always* returns a buffer.
1652* The error is reported in "er".
1653*/
1654static char *1655kworker_fcgi_read(struct fcgi_buf *b, size_t nsz, enum kcgi_err *er)1656{
1657void *pp;1658int rc;1659size_t sz;1660
1661again:1662*er = KCGI_OK;1663if (b->pos + nsz <= b->sz) {1664b->pos += nsz;1665return &b->buf[b->pos - nsz];1666}1667
1668/* Fill up the next frame. */1669
1670rc = fullread(b->fd, &sz, sizeof(size_t), 0, er);1671if (rc <= 0) {1672kutil_warnx(NULL, NULL, "FastCGI: "1673"error reading frame size from control");1674return NULL;1675} else if (sz == 0) {1676kutil_warnx(NULL, NULL, "FastCGI: connection "1677"closed while reading frame size");1678*er = KCGI_HUP;1679return NULL;1680}1681
1682if ((pp = kxrealloc(b->buf, b->sz + sz)) == NULL) {1683*er = KCGI_ENOMEM;1684return NULL;1685}1686
1687b->buf = pp;1688rc = fullread(b->fd, b->buf + b->sz, sz, 0, er);1689if (rc <= 0) {1690kutil_warnx(NULL, NULL, "FastCGI: error "1691"reading frame data from control");1692return NULL;1693}1694
1695b->sz += sz;1696goto again;1697}
1698
1699
1700/*
1701* Read the FastCGI header (see section 8, Types and Contents,
1702* FCGI_Header, in the FastCGI Specification v1.0).
1703* Return KCGI_OK on success, KCGI_HUP on connection close, KCGI_FORM
1704* with FastCGI protocol errors, and a fatal error otherwise.
1705*/
1706static enum kcgi_err1707kworker_fcgi_header(struct fcgi_buf *b, struct fcgi_hdr *hdr)1708{
1709enum kcgi_err er;1710const char *cp;1711struct fcgi_hdr buf;1712
1713if ((cp = kworker_fcgi_read(b, 8, &er)) == NULL)1714return er;1715
1716memcpy(&buf, cp, 8);1717
1718/* Translate from network-byte order. */1719
1720hdr->version = buf.version;1721hdr->type = buf.type;1722hdr->requestId = ntohs(buf.requestId);1723hdr->contentLength = ntohs(buf.contentLength);1724hdr->paddingLength = buf.paddingLength;1725
1726if (hdr->version == 1)1727return KCGI_OK;1728
1729kutil_warnx(NULL, NULL, "FastCGI: bad header "1730"version: %" PRIu8 " (want 1)", hdr->version);1731return KCGI_FORM;1732}
1733
1734/*
1735* Read in the entire header and data for the begin sequence request.
1736* This is defined in section 5.1 of the v1.0 specification.
1737* Return KCGI_OK on success, KCGI_HUP on connection close, KCGI_FORM
1738* with FastCGI protocol errors, and a fatal error otherwise.
1739*/
1740static enum kcgi_err1741kworker_fcgi_begin(struct fcgi_buf *b, uint16_t *rid)1742{
1743struct fcgi_hdr hdr;1744const struct fcgi_bgn *ptr;1745const char *buf;1746enum kcgi_err er;1747
1748/* Read the header entry. */1749
1750if ((er = kworker_fcgi_header(b, &hdr)) != KCGI_OK)1751return er;1752
1753*rid = hdr.requestId;1754
1755if (hdr.type != FCGI_BEGIN_REQUEST) {1756kutil_warnx(NULL, NULL, "FastCGI: bad type: %" PRIu81757" (want %d)", hdr.type, FCGI_BEGIN_REQUEST);1758return KCGI_FORM;1759}1760
1761/* Read the "begin" content and discard padding. */1762
1763buf = kworker_fcgi_read(b,1764hdr.contentLength +1765hdr.paddingLength, &er);1766
1767ptr = (const struct fcgi_bgn *)buf;1768
1769if (ptr->flags) {1770kutil_warnx(NULL, NULL, "FastCGI: bad flags: %" PRId81771" (want 0)", ptr->flags);1772return KCGI_FORM;1773}1774
1775return KCGI_OK;1776}
1777
1778/*
1779* Read in a data stream as defined within section 5.3 of the v1.0
1780* specification.
1781* We might have multiple stdin buffers for the same data, so always
1782* append to the existing NUL-terminated buffer.
1783* Return KCGI_OK on success, KCGI_HUP on connection close, KCGI_FORM
1784* with FastCGI protocol errors, and a fatal error otherwise.
1785*/
1786static enum kcgi_err1787kworker_fcgi_stdin(struct fcgi_buf *b, const struct fcgi_hdr *hdr,1788unsigned char **sbp, size_t *ssz)1789{
1790enum kcgi_err er;1791void *ptr;1792char *bp;1793
1794/* Read the "begin" content and discard padding. */1795
1796bp = kworker_fcgi_read(b,1797hdr->contentLength +1798hdr->paddingLength, &er);1799
1800/*1801* Short-circuit: no data to read.
1802* The caller should detect this and stop reading from the
1803* FastCGI connection.
1804*/
1805
1806if (hdr->contentLength == 0)1807return KCGI_OK;1808
1809/*1810* Use another buffer for the stdin.
1811* This is because our buffer (b->buf) consists of FastCGI
1812* frames (data interspersed with control information).
1813* Obviously, we want to extract our data from that.
1814* Make sure it's NUL-terminated!
1815* FIXME: check for addition overflow.
1816*/
1817
1818ptr = kxrealloc(*sbp, *ssz + hdr->contentLength + 1);1819if (ptr == NULL)1820return KCGI_ENOMEM;1821
1822*sbp = ptr;1823memcpy(*sbp + *ssz, bp, hdr->contentLength);1824(*sbp)[*ssz + hdr->contentLength] = '\0';1825*ssz += hdr->contentLength;1826return KCGI_OK;1827}
1828
1829/*
1830* Read out a series of parameters contained within a FastCGI parameter
1831* request defined in section 5.2 of the v1.0 specification.
1832* Return KCGI_OK on success, KCGI_HUP on connection close, KCGI_FORM
1833* with FastCGI protocol errors, and a fatal error otherwise.
1834*/
1835static enum kcgi_err1836kworker_fcgi_params(struct fcgi_buf *buf, const struct fcgi_hdr *hdr,1837struct env **envs, size_t *envsz)1838{
1839size_t i, remain, pos, keysz, valsz;1840const unsigned char *b;1841enum kcgi_err er;1842void *ptr;1843
1844b = (unsigned char *)kworker_fcgi_read1845(buf, hdr->contentLength +1846hdr->paddingLength, &er);1847
1848if (b == NULL)1849return er;1850
1851/*1852* Loop through the string data that's laid out as a key length
1853* then value length, then key, then value.
1854* There can be arbitrarily many key-values per string.
1855*/
1856
1857remain = hdr->contentLength;1858pos = 0;1859
1860while (remain > 0) {1861/* First read the lengths. */1862assert(pos < hdr->contentLength);1863if (0 != b[pos] >> 7) {1864if (remain <= 3) {1865kutil_warnx(NULL, NULL,1866"FastCGI: bad parameter data");1867return 0;1868}1869keysz = ((b[pos] & 0x7f) << 24) +1870(b[pos + 1] << 16) +1871(b[pos + 2] << 8) + b[pos + 3];1872pos += 4;1873remain -= 4;1874} else {1875keysz = b[pos];1876pos++;1877remain--;1878}1879
1880if (remain < 1) {1881kutil_warnx(NULL, NULL,1882"FastCGI: bad parameter data");1883return KCGI_FORM;1884}1885
1886assert(pos < hdr->contentLength);1887if (0 != b[pos] >> 7) {1888if (remain <= 3) {1889kutil_warnx(NULL, NULL,1890"FastCGI: bad parameter data");1891return KCGI_FORM;1892}1893valsz = ((b[pos] & 0x7f) << 24) +1894(b[pos + 1] << 16) +1895(b[pos + 2] << 8) + b[pos + 3];1896pos += 4;1897remain -= 4;1898} else {1899valsz = b[pos];1900pos++;1901remain--;1902}1903
1904/* Make sure we have room for data. */1905
1906if (pos + keysz + valsz > hdr->contentLength) {1907kutil_warnx(NULL, NULL,1908"FastCGI: bad parameter data");1909return KCGI_FORM;1910}1911
1912remain -= keysz;1913remain -= valsz;1914
1915/*1916* First, make sure that the key is valid.
1917* There's no documented precedent for this, so we
1918* follow CGI's constraints in RFC 3875, sec. 4.1.
1919* If it's not valid, just skip it.
1920*/
1921
1922for (i = 0; i < keysz; i++)1923if (!isascii((unsigned char)b[pos + i]) ||1924!isgraph((unsigned char)b[pos + i]))1925break;1926
1927if (keysz == 0) {1928kutil_warnx(NULL, NULL, "FastCGI warning: "1929"empty environment parameter");1930pos += valsz;1931continue;1932} else if (i < keysz) {1933kutil_warnx(NULL, NULL, "RFC warning: bad "1934"character in environment parameters");1935pos += keysz + valsz;1936continue;1937}1938
1939/* Look up the key in our existing keys. */1940
1941for (i = 0; i < *envsz; i++) {1942if ((*envs)[i].keysz != keysz)1943continue;1944if (memcmp((*envs)[i].key, &b[pos], keysz) == 0)1945break;1946}1947
1948/*1949* If we don't have the key: expand our table.
1950* If we do, clear the current value.
1951*/
1952
1953if (i == *envsz) {1954ptr = kxreallocarray1955(*envs, *envsz + 1,1956sizeof(struct env));1957if (ptr == NULL)1958return KCGI_ENOMEM;1959
1960*envs = ptr;1961(*envs)[i].key = kxmalloc(keysz + 1);1962if ((*envs)[i].key == NULL)1963return KCGI_ENOMEM;1964
1965memcpy((*envs)[i].key, &b[pos], keysz);1966(*envs)[i].key[keysz] = '\0';1967(*envs)[i].keysz = keysz;1968(*envsz)++;1969} else1970free((*envs)[i].val);1971
1972pos += keysz;1973
1974/* Copy the value. */1975
1976(*envs)[i].val = kxmalloc(valsz + 1);1977if ((*envs)[i].val == NULL)1978return KCGI_ENOMEM;1979
1980memcpy((*envs)[i].val, &b[pos], valsz);1981(*envs)[i].val[valsz] = '\0';1982(*envs)[i].valsz = valsz;1983
1984pos += valsz;1985}1986
1987return KCGI_OK;1988}
1989
1990/*
1991* This is executed by the untrusted child for FastCGI setups.
1992* Throughout, we follow the FastCGI specification, version 1.0, 29
1993* April 1996.
1994*/
1995void
1996kworker_fcgi_child(int wfd, int work_ctl,1997const struct kvalid *keys, size_t keysz,1998const char *const *mimes, size_t mimesz,1999unsigned int debugging)2000{
2001struct parms pp;2002struct fcgi_hdr hdr;2003enum kcgi_err er;2004unsigned char *sbuf = NULL;2005struct env *envs = NULL;2006uint16_t rid;2007uint32_t cookie = 0;2008size_t i, ssz = 0, sz, envsz = 0;2009int rc, md5;2010enum kmethod meth;2011struct fcgi_buf fbuf;2012
2013memset(&fbuf, 0, sizeof(struct fcgi_buf));2014
2015pp.fd = wfd;2016pp.keys = keys;2017pp.keysz = keysz;2018pp.mimes = mimes;2019pp.mimesz = mimesz;2020
2021/*2022* Loop over all incoming sequences to this particular slave.
2023* Sequences must consist of a single FastCGI session as defined
2024* in the FastCGI version 1.0 reference document.
2025*
2026* If the connection closes out at any point, we receive a
2027* zero-length read from the control socket.
2028* The response to this should be to write an zero error code
2029* back to the control socket, then keep on listening.
2030* Otherwise, if we've read the full message, write a non-zero
2031* error code, then our identifier and cookie, then the rest
2032* goes directly to the parse routines in kworker_parent().
2033*/
2034
2035for (;;) {2036free(sbuf);2037for (i = 0; i < envsz; i++) {2038free(envs[i].key);2039free(envs[i].val);2040}2041free(envs);2042free(fbuf.buf);2043
2044sbuf = NULL;2045ssz = 0;2046envs = NULL;2047envsz = 0;2048cookie = 0;2049memset(&fbuf, 0, sizeof(struct fcgi_buf));2050fbuf.fd = work_ctl;2051
2052/*2053* Begin by reading our magic cookie.
2054* This is emitted by kfcgi_control() at the start of
2055* our sequence.
2056* When we've finished reading data with success, we'll
2057* respond with this value.
2058*/
2059
2060rc = fullread(fbuf.fd,2061&cookie, sizeof(uint32_t), 1, &er);2062if (rc < 0) {2063kutil_warnx(NULL, NULL, "FastCGI: "2064"error reading worker cookie");2065break;2066} else if (rc == 0) {2067kutil_warnx(NULL, NULL, "FastCGI: "2068"worker process termination");2069break;2070}2071
2072/* Now start the FastCGI sequence. */2073
2074er = kworker_fcgi_begin(&fbuf, &rid);2075if (er == KCGI_HUP) {2076kutil_warnx(NULL, NULL, "FastCGI: "2077"connection severed at start");2078/* Note: writing error code... */2079rc = 0;2080fullwrite(work_ctl, &rc, sizeof(int));2081continue;2082} else if (er != KCGI_OK) {2083kutil_warnx(NULL, NULL, "FastCGI: "2084"unrecoverable error at start");2085break;2086}2087
2088/*2089* Now read one or more parameters.
2090* We read them all at once, then do the parsing later
2091* after we've read all of our data.
2092* We read parameters til we no longer have the
2093* FCGI_PARAMS type on the current header.
2094*/
2095
2096er = KCGI_OK;2097envsz = 0;2098memset(&hdr, 0, sizeof(struct fcgi_hdr));2099
2100while (er == KCGI_OK) {2101er = kworker_fcgi_header(&fbuf, &hdr);2102if (er != KCGI_OK)2103break;2104if (rid != hdr.requestId) {2105kutil_warnx(NULL, NULL,2106"FastCGI: wrong request ID");2107er = KCGI_FORM;2108break;2109}2110if (hdr.type != FCGI_PARAMS)2111break;2112er = kworker_fcgi_params2113(&fbuf, &hdr, &envs, &envsz);2114}2115
2116if (er == KCGI_HUP) {2117kutil_warnx(NULL, NULL, "FastCGI: "2118"connection severed at parameters");2119/* Note: writing error code... */2120rc = 0;2121fullwrite(work_ctl, &rc, sizeof(int));2122continue;2123} else if (er != KCGI_OK) {2124kutil_warnx(NULL, NULL, "FastCGI: "2125"unrecoverable error at parameters");2126break;2127} else if (hdr.type != FCGI_STDIN) {2128kutil_warnx(NULL, NULL, "FastCGI: "2129"bad header type");2130er = KCGI_FORM;2131break;2132} else if (rid != hdr.requestId) {2133kutil_warnx(NULL, NULL, "FastCGI: "2134"wrong request ID");2135er = KCGI_FORM;2136break;2137}2138
2139/*2140* Lastly, we want to process the stdin content.
2141* These will end with a single zero-length record.
2142* Keep looping til we've flushed all input.
2143*/
2144
2145for (;;) {2146/*2147* Call this even if we have a zero-length data
2148* payload as specified by contentLength.
2149* This is because there might be padding, and
2150* we want to make sure we've drawn everything
2151* from the socket before exiting.
2152*/
2153
2154er = kworker_fcgi_stdin2155(&fbuf, &hdr, &sbuf, &ssz);2156if (er != KCGI_OK || hdr.contentLength == 0)2157break;2158
2159/* Now read the next header. */2160
2161er = kworker_fcgi_header(&fbuf, &hdr);2162if (er != KCGI_OK)2163break;2164if (rid != hdr.requestId) {2165kutil_warnx(NULL, NULL, "FastCGI: "2166"wrong FastCGI request ID");2167er = KCGI_FORM;2168break;2169}2170
2171if (hdr.type == FCGI_STDIN)2172continue;2173kutil_warnx(NULL, NULL,2174"FastCGI: bad header type");2175er = KCGI_FORM;2176break;2177}2178
2179if (er == KCGI_HUP) {2180kutil_warnx(NULL, NULL, "FastCGI: "2181"connection severed at stdin");2182/* Note: writing error code. */2183rc = 0;2184fullwrite(work_ctl, &rc, sizeof(int));2185continue;2186} else if (er != KCGI_OK) {2187kutil_warnx(NULL, NULL, "FastCGI: "2188"unrecoverable error at stdin");2189break;2190}2191
2192/*2193* Notify the control process that we've received all of
2194* our data by giving back the cookie and requestId.
2195* FIXME: merge cookie and rc.
2196*/
2197
2198rc = 1;2199fullwrite(work_ctl, &rc, sizeof(int));2200fullwrite(work_ctl, &cookie, sizeof(uint32_t));2201fullwrite(work_ctl, &rid, sizeof(uint16_t));2202
2203/*2204* Read our last zero-length frame.
2205* This is because kfcgi_control() always ends with an
2206* empty frame, regardless of whether we're in an error
2207* or not.
2208* So if we're this far, we've read the full request,
2209* and we should have an empty frame.
2210*/
2211
2212rc = fullread(fbuf.fd, &sz, sizeof(size_t), 0, &er);2213if (rc <= 0) {2214kutil_warnx(NULL, NULL,2215"FastCGI: error reading trailer");2216break;2217} else if (sz != 0) {2218kutil_warnx(NULL, NULL,2219"FastCGI: trailer not zero-length");2220er = KCGI_FORM;2221break;2222}2223
2224/*2225* Now we can reply to our request.
2226* See kworker_parent().
2227* These are in a very specific order.
2228*/
2229
2230kworker_child_env(envs, wfd, envsz);2231meth = kworker_child_method(envs, wfd, envsz);2232kworker_child_auth(envs, wfd, envsz);2233md5 = kworker_child_rawauth(envs, wfd, envsz);2234kworker_child_scheme(envs, wfd, envsz);2235kworker_child_remote(envs, wfd, envsz);2236kworker_child_path(envs, wfd, envsz);2237kworker_child_scriptname(envs, wfd, envsz);2238kworker_child_httphost(envs, wfd, envsz);2239kworker_child_port(envs, wfd, envsz);2240
2241/*2242* And now the message body itself.
2243* We must either have a NULL message or non-zero
2244* length.
2245*/
2246
2247assert(ssz == 0 || sbuf != NULL);2248kworker_child_body(envs, wfd, envsz, &pp,2249meth, (char *)sbuf, ssz, debugging, md5);2250kworker_child_query(envs, wfd, envsz, &pp);2251kworker_child_cookies(envs, wfd, envsz, &pp);2252kworker_child_last(wfd);2253}2254
2255/* The same as what we do at the loop start. */2256
2257free(sbuf);2258for (i = 0; i < envsz; i++) {2259free(envs[i].key);2260free(envs[i].val);2261}2262free(envs);2263free(fbuf.buf);2264}
2265