efl
2215 строк · 61.2 Кб
1/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2*
3* Additional changes are licensed under the same terms as NGINX and
4* copyright Joyent, Inc. and other Node contributors. All rights reserved.
5*
6* Permission is hereby granted, free of charge, to any person obtaining a copy
7* of this software and associated documentation files (the "Software"), to
8* deal in the Software without restriction, including without limitation the
9* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10* sell copies of the Software, and to permit persons to whom the Software is
11* furnished to do so, subject to the following conditions:
12*
13* The above copyright notice and this permission notice shall be included in
14* all copies or substantial portions of the Software.
15*
16* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22* IN THE SOFTWARE.
23*/
24#include "http_parser.h"25#include <assert.h>26#include <stddef.h>27#include <ctype.h>28#include <stdlib.h>29#include <string.h>30#include <limits.h>31
32#ifndef ULLONG_MAX33# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */34#endif35
36#ifndef MIN37# define MIN(a,b) ((a) < (b) ? (a) : (b))38#endif39
40#ifndef ARRAY_SIZE41# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))42#endif43
44#ifndef BIT_AT45# define BIT_AT(a, i) \46(!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \47(1 << ((unsigned int) (i) & 7))))48#endif49
50#ifndef ELEM_AT51# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))52#endif53
54#define SET_ERRNO(e) \55do { \56parser->http_errno = (e); \57} while(0)58
59
60/* Run the notify callback FOR, returning ER if it fails */
61#define CALLBACK_NOTIFY_(FOR, ER) \62do { \63assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \64\65if (settings->on_##FOR) { \66if (0 != settings->on_##FOR(parser)) { \67SET_ERRNO(HPE_CB_##FOR); \68} \69\70/* We either errored above or got paused; get out */ \71if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \72return (ER); \73} \74} \75} while (0)76
77/* Run the notify callback FOR and consume the current byte */
78#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)79
80/* Run the notify callback FOR and don't consume the current byte */
81#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)82
83/* Run data callback FOR with LEN bytes, returning ER if it fails */
84#define CALLBACK_DATA_(FOR, LEN, ER) \85do { \86assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \87\88if (FOR##_mark) { \89if (settings->on_##FOR) { \90if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \91SET_ERRNO(HPE_CB_##FOR); \92} \93\94/* We either errored above or got paused; get out */ \95if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \96return (ER); \97} \98} \99FOR##_mark = NULL; \100} \101} while (0)102
103/* Run the data callback FOR and consume the current byte */
104#define CALLBACK_DATA(FOR) \105CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)106
107/* Run the data callback FOR and don't consume the current byte */
108#define CALLBACK_DATA_NOADVANCE(FOR) \109CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)110
111/* Set the mark FOR; non-destructive if mark is already set */
112#define MARK(FOR) \113do { \114if (!FOR##_mark) { \115FOR##_mark = p; \116} \117} while (0)118
119
120#define PROXY_CONNECTION "proxy-connection"121#define CONNECTION "connection"122#define CONTENT_LENGTH "content-length"123#define TRANSFER_ENCODING "transfer-encoding"124#define UPGRADE "upgrade"125#define CHUNKED "chunked"126#define KEEP_ALIVE "keep-alive"127#define CLOSE "close"128
129
130static const char *method_strings[] =131{132#define XX(num, name, string) #string,133HTTP_METHOD_MAP(XX)134#undef XX135};136
137
138/* Tokens as defined by rfc 2616. Also lowercases them.
139* token = 1*<any CHAR except CTLs or separators>
140* separators = "(" | ")" | "<" | ">" | "@"
141* | "," | ";" | ":" | "\" | <">
142* | "/" | "[" | "]" | "?" | "="
143* | "{" | "}" | SP | HT
144*/
145static const char tokens[256] = {146/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
1470, 0, 0, 0, 0, 0, 0, 0,148/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
1490, 0, 0, 0, 0, 0, 0, 0,150/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
1510, 0, 0, 0, 0, 0, 0, 0,152/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
1530, 0, 0, 0, 0, 0, 0, 0,154/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
1550, '!', 0, '#', '$', '%', '&', '\'',156/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
1570, 0, '*', '+', 0, '-', '.', 0,158/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
159'0', '1', '2', '3', '4', '5', '6', '7',160/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
161'8', '9', 0, 0, 0, 0, 0, 0,162/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
1630, 'a', 'b', 'c', 'd', 'e', 'f', 'g',164/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
165'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',166/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
167'p', 'q', 'r', 's', 't', 'u', 'v', 'w',168/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
169'x', 'y', 'z', 0, 0, 0, '^', '_',170/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
171'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',172/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
173'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',174/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
175'p', 'q', 'r', 's', 't', 'u', 'v', 'w',176/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
177'x', 'y', 'z', 0, '|', 0, '~', 0 };178
179
180static const int8_t unhex[256] =181{-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1182,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1183,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1184, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1185,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1186,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1187,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1188,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1189};190
191
192#if HTTP_PARSER_STRICT193# define T(v) 0194#else195# define T(v) v196#endif197
198
199static const uint8_t normal_url_char[32] = {200/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
2010 | 0 | 0 | 0 | 0 | 0 | 0 | 0,202/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
2030 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,204/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
2050 | 0 | 0 | 0 | 0 | 0 | 0 | 0,206/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
2070 | 0 | 0 | 0 | 0 | 0 | 0 | 0,208/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
2090 | 2 | 4 | 0 | 16 | 32 | 64 | 128,210/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
2111 | 2 | 4 | 8 | 16 | 32 | 64 | 128,212/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
2131 | 2 | 4 | 8 | 16 | 32 | 64 | 128,214/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
2151 | 2 | 4 | 8 | 16 | 32 | 64 | 0,216/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
2171 | 2 | 4 | 8 | 16 | 32 | 64 | 128,218/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
2191 | 2 | 4 | 8 | 16 | 32 | 64 | 128,220/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
2211 | 2 | 4 | 8 | 16 | 32 | 64 | 128,222/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
2231 | 2 | 4 | 8 | 16 | 32 | 64 | 128,224/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
2251 | 2 | 4 | 8 | 16 | 32 | 64 | 128,226/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
2271 | 2 | 4 | 8 | 16 | 32 | 64 | 128,228/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
2291 | 2 | 4 | 8 | 16 | 32 | 64 | 128,230/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
2311 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };232
233#undef T234
235enum state236{ s_dead = 1 /* important that this is > 0 */237
238, s_start_req_or_res239, s_res_or_resp_H240, s_start_res241, s_res_H242, s_res_HT243, s_res_HTT244, s_res_HTTP245, s_res_first_http_major246, s_res_http_major247, s_res_first_http_minor248, s_res_http_minor249, s_res_first_status_code250, s_res_status_code251, s_res_status252, s_res_line_almost_done253
254, s_start_req255
256, s_req_method257, s_req_spaces_before_url258, s_req_schema259, s_req_schema_slash260, s_req_schema_slash_slash261, s_req_server_start262, s_req_server263, s_req_server_with_at264, s_req_path265, s_req_query_string_start266, s_req_query_string267, s_req_fragment_start268, s_req_fragment269, s_req_http_start270, s_req_http_H271, s_req_http_HT272, s_req_http_HTT273, s_req_http_HTTP274, s_req_first_http_major275, s_req_http_major276, s_req_first_http_minor277, s_req_http_minor278, s_req_line_almost_done279
280, s_header_field_start281, s_header_field282, s_header_value_start283, s_header_value284, s_header_value_lws285
286, s_header_almost_done287
288, s_chunk_size_start289, s_chunk_size290, s_chunk_parameters291, s_chunk_size_almost_done292
293, s_headers_almost_done294, s_headers_done295
296/* Important: 's_headers_done' must be the last 'header' state. All297* states beyond this must be 'body' states. It is used for overflow
298* checking. See the PARSING_HEADER() macro.
299*/
300
301, s_chunk_data302, s_chunk_data_almost_done303, s_chunk_data_done304
305, s_body_identity306, s_body_identity_eof307
308, s_message_done309};310
311
312#define PARSING_HEADER(state) (state <= s_headers_done)313
314
315enum header_states316{ h_general = 0317, h_C318, h_CO319, h_CON320
321, h_matching_connection322, h_matching_proxy_connection323, h_matching_content_length324, h_matching_transfer_encoding325, h_matching_upgrade326
327, h_connection328, h_content_length329, h_transfer_encoding330, h_upgrade331
332, h_matching_transfer_encoding_chunked333, h_matching_connection_keep_alive334, h_matching_connection_close335
336, h_transfer_encoding_chunked337, h_connection_keep_alive338, h_connection_close339};340
341enum http_host_state342{343s_http_host_dead = 1344, s_http_userinfo_start345, s_http_userinfo346, s_http_host_start347, s_http_host_v6_start348, s_http_host349, s_http_host_v6350, s_http_host_v6_end351, s_http_host_port_start352, s_http_host_port353};354
355/* Macros for character classes; depends on strict-mode */
356#define CR '\r'357#define LF '\n'358#define LOWER(c) (unsigned char)(c | 0x20)359#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')360#define IS_NUM(c) ((c) >= '0' && (c) <= '9')361#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))362#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))363#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \364(c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \365(c) == ')')366#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \367(c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \368(c) == '$' || (c) == ',')369
370#if HTTP_PARSER_STRICT371#define TOKEN(c) (tokens[(unsigned char)c])372#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))373#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')374#else375#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])376#define IS_URL_CHAR(c) \377(BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))378#define IS_HOST_CHAR(c) \379(IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')380#endif381
382
383#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)384
385
386#if HTTP_PARSER_STRICT387# define STRICT_CHECK(cond) \388do { \389if (cond) { \390SET_ERRNO(HPE_STRICT); \391goto error; \392} \393} while (0)394# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)395#else396# define STRICT_CHECK(cond)397# define NEW_MESSAGE() start_state398#endif399
400
401/* Map errno values to strings for human-readable output */
402#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },403static struct {404const char *name;405const char *description;406} http_strerror_tab[] = {407HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)408};409#undef HTTP_STRERROR_GEN410
411int http_message_needs_eof(const http_parser *parser);412
413/* Our URL parser.
414*
415* This is designed to be shared by http_parser_execute() for URL validation,
416* hence it has a state transition + byte-for-byte interface. In addition, it
417* is meant to be embedded in http_parser_parse_url(), which does the dirty
418* work of turning state transitions URL components for its API.
419*
420* This function should only be invoked with non-space characters. It is
421* assumed that the caller cares about (and can detect) the transition between
422* URL and non-URL states by looking for these.
423*/
424static enum state425parse_url_char(enum state s, const char ch)426{
427if (ch == ' ' || ch == '\r' || ch == '\n') {428return s_dead;429}430
431#if HTTP_PARSER_STRICT432if (ch == '\t' || ch == '\f') {433return s_dead;434}435#endif436
437switch (s) {438case s_req_spaces_before_url:439/* Proxied requests are followed by scheme of an absolute URI (alpha).440* All methods except CONNECT are followed by '/' or '*'.
441*/
442
443if (ch == '/' || ch == '*') {444return s_req_path;445}446
447if (IS_ALPHA(ch)) {448return s_req_schema;449}450
451break;452
453case s_req_schema:454if (IS_ALPHA(ch)) {455return s;456}457
458if (ch == ':') {459return s_req_schema_slash;460}461
462break;463
464case s_req_schema_slash:465if (ch == '/') {466return s_req_schema_slash_slash;467}468
469break;470
471case s_req_schema_slash_slash:472if (ch == '/') {473return s_req_server_start;474}475
476break;477
478case s_req_server_with_at:479if (ch == '@') {480return s_dead;481}482
483/* FALLTHROUGH */484case s_req_server_start:485case s_req_server:486if (ch == '/') {487return s_req_path;488}489
490if (ch == '?') {491return s_req_query_string_start;492}493
494if (ch == '@') {495return s_req_server_with_at;496}497
498if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {499return s_req_server;500}501
502break;503
504case s_req_path:505if (IS_URL_CHAR(ch)) {506return s;507}508
509switch (ch) {510case '?':511return s_req_query_string_start;512
513case '#':514return s_req_fragment_start;515}516
517break;518
519case s_req_query_string_start:520case s_req_query_string:521if (IS_URL_CHAR(ch)) {522return s_req_query_string;523}524
525switch (ch) {526case '?':527/* allow extra '?' in query string */528return s_req_query_string;529
530case '#':531return s_req_fragment_start;532}533
534break;535
536case s_req_fragment_start:537if (IS_URL_CHAR(ch)) {538return s_req_fragment;539}540
541switch (ch) {542case '?':543return s_req_fragment;544
545case '#':546return s;547}548
549break;550
551case s_req_fragment:552if (IS_URL_CHAR(ch)) {553return s;554}555
556switch (ch) {557case '?':558case '#':559return s;560}561
562break;563
564default:565break;566}567
568/* We should never fall out of the switch above unless there's an error */569return s_dead;570}
571
572size_t http_parser_execute (http_parser *parser,573const http_parser_settings *settings,574const char *data,575size_t len)576{
577char c, ch;578int8_t unhex_val;579const char *p = data;580const char *header_field_mark = 0;581const char *header_value_mark = 0;582const char *url_mark = 0;583const char *body_mark = 0;584
585/* We're in an error state. Don't bother doing anything. */586if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {587return 0;588}589
590if (len == 0) {591switch (parser->state) {592case s_body_identity_eof:593/* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if594* we got paused.
595*/
596CALLBACK_NOTIFY_NOADVANCE(message_complete);597return 0;598
599case s_dead:600case s_start_req_or_res:601case s_start_res:602case s_start_req:603return 0;604
605default:606SET_ERRNO(HPE_INVALID_EOF_STATE);607return 1;608}609}610
611
612if (parser->state == s_header_field)613header_field_mark = data;614if (parser->state == s_header_value)615header_value_mark = data;616switch (parser->state) {617case s_req_path:618case s_req_schema:619case s_req_schema_slash:620case s_req_schema_slash_slash:621case s_req_server_start:622case s_req_server:623case s_req_server_with_at:624case s_req_query_string_start:625case s_req_query_string:626case s_req_fragment_start:627case s_req_fragment:628url_mark = data;629break;630}631
632for (p=data; p != data + len; p++) {633ch = *p;634
635if (PARSING_HEADER(parser->state)) {636++parser->nread;637/* Don't allow the total size of the HTTP headers (including the status638* line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
639* embedders against denial-of-service attacks where the attacker feeds
640* us a never-ending header that the embedder keeps buffering.
641*
642* This check is arguably the responsibility of embedders but we're doing
643* it on the embedder's behalf because most won't bother and this way we
644* make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
645* than any reasonable request or response so this should never affect
646* day-to-day operation.
647*/
648if (parser->nread > HTTP_MAX_HEADER_SIZE) {649SET_ERRNO(HPE_HEADER_OVERFLOW);650goto error;651}652}653
654reexecute_byte:655switch (parser->state) {656
657case s_dead:658/* this state is used after a 'Connection: close' message659* the parser will error out if it reads another message
660*/
661if (ch == CR || ch == LF)662break;663
664SET_ERRNO(HPE_CLOSED_CONNECTION);665goto error;666
667case s_start_req_or_res:668{669if (ch == CR || ch == LF)670break;671parser->flags = 0;672parser->content_length = ULLONG_MAX;673
674if (ch == 'H') {675parser->state = s_res_or_resp_H;676
677CALLBACK_NOTIFY(message_begin);678} else {679parser->type = HTTP_REQUEST;680parser->state = s_start_req;681goto reexecute_byte;682}683
684break;685}686
687case s_res_or_resp_H:688if (ch == 'T') {689parser->type = HTTP_RESPONSE;690parser->state = s_res_HT;691} else {692if (ch != 'E') {693SET_ERRNO(HPE_INVALID_CONSTANT);694goto error;695}696
697parser->type = HTTP_REQUEST;698parser->method = HTTP_HEAD;699parser->index = 2;700parser->state = s_req_method;701}702break;703
704case s_start_res:705{706parser->flags = 0;707parser->content_length = ULLONG_MAX;708
709switch (ch) {710case 'H':711parser->state = s_res_H;712break;713
714case CR:715case LF:716break;717
718default:719SET_ERRNO(HPE_INVALID_CONSTANT);720goto error;721}722
723CALLBACK_NOTIFY(message_begin);724break;725}726
727case s_res_H:728STRICT_CHECK(ch != 'T');729parser->state = s_res_HT;730break;731
732case s_res_HT:733STRICT_CHECK(ch != 'T');734parser->state = s_res_HTT;735break;736
737case s_res_HTT:738STRICT_CHECK(ch != 'P');739parser->state = s_res_HTTP;740break;741
742case s_res_HTTP:743STRICT_CHECK(ch != '/');744parser->state = s_res_first_http_major;745break;746
747case s_res_first_http_major:748if (ch < '0' || ch > '9') {749SET_ERRNO(HPE_INVALID_VERSION);750goto error;751}752
753parser->http_major = ch - '0';754parser->state = s_res_http_major;755break;756
757/* major HTTP version or dot */758case s_res_http_major:759{760if (ch == '.') {761parser->state = s_res_first_http_minor;762break;763}764
765if (!IS_NUM(ch)) {766SET_ERRNO(HPE_INVALID_VERSION);767goto error;768}769
770parser->http_major *= 10;771parser->http_major += ch - '0';772
773if (parser->http_major > 999) {774SET_ERRNO(HPE_INVALID_VERSION);775goto error;776}777
778break;779}780
781/* first digit of minor HTTP version */782case s_res_first_http_minor:783if (!IS_NUM(ch)) {784SET_ERRNO(HPE_INVALID_VERSION);785goto error;786}787
788parser->http_minor = ch - '0';789parser->state = s_res_http_minor;790break;791
792/* minor HTTP version or end of request line */793case s_res_http_minor:794{795if (ch == ' ') {796parser->state = s_res_first_status_code;797break;798}799
800if (!IS_NUM(ch)) {801SET_ERRNO(HPE_INVALID_VERSION);802goto error;803}804
805parser->http_minor *= 10;806parser->http_minor += ch - '0';807
808if (parser->http_minor > 999) {809SET_ERRNO(HPE_INVALID_VERSION);810goto error;811}812
813break;814}815
816case s_res_first_status_code:817{818if (!IS_NUM(ch)) {819if (ch == ' ') {820break;821}822
823SET_ERRNO(HPE_INVALID_STATUS);824goto error;825}826parser->status_code = ch - '0';827parser->state = s_res_status_code;828break;829}830
831case s_res_status_code:832{833if (!IS_NUM(ch)) {834switch (ch) {835case ' ':836parser->state = s_res_status;837break;838case CR:839parser->state = s_res_line_almost_done;840break;841case LF:842parser->state = s_header_field_start;843break;844default:845SET_ERRNO(HPE_INVALID_STATUS);846goto error;847}848break;849}850
851parser->status_code *= 10;852parser->status_code += ch - '0';853
854if (parser->status_code > 999) {855SET_ERRNO(HPE_INVALID_STATUS);856goto error;857}858
859break;860}861
862case s_res_status:863/* the human readable status. e.g. "NOT FOUND"864* we are not humans so just ignore this */
865if (ch == CR) {866parser->state = s_res_line_almost_done;867break;868}869
870if (ch == LF) {871parser->state = s_header_field_start;872break;873}874break;875
876case s_res_line_almost_done:877STRICT_CHECK(ch != LF);878parser->state = s_header_field_start;879CALLBACK_NOTIFY(status_complete);880break;881
882case s_start_req:883{884if (ch == CR || ch == LF)885break;886parser->flags = 0;887parser->content_length = ULLONG_MAX;888
889if (!IS_ALPHA(ch)) {890SET_ERRNO(HPE_INVALID_METHOD);891goto error;892}893
894parser->method = (enum http_method) 0;895parser->index = 1;896switch (ch) {897case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;898case 'D': parser->method = HTTP_DELETE; break;899case 'G': parser->method = HTTP_GET; break;900case 'H': parser->method = HTTP_HEAD; break;901case 'L': parser->method = HTTP_LOCK; break;902case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;903case 'N': parser->method = HTTP_NOTIFY; break;904case 'O': parser->method = HTTP_OPTIONS; break;905case 'P': parser->method = HTTP_POST;906/* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */907break;908case 'R': parser->method = HTTP_REPORT; break;909case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;910case 'T': parser->method = HTTP_TRACE; break;911case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;912default:913SET_ERRNO(HPE_INVALID_METHOD);914goto error;915}916parser->state = s_req_method;917
918CALLBACK_NOTIFY(message_begin);919
920break;921}922
923case s_req_method:924{925const char *matcher;926if (ch == '\0') {927SET_ERRNO(HPE_INVALID_METHOD);928goto error;929}930
931matcher = method_strings[parser->method];932if (ch == ' ' && matcher[parser->index] == '\0') {933parser->state = s_req_spaces_before_url;934} else if (ch == matcher[parser->index]) {935; /* nada */936} else if (parser->method == HTTP_CONNECT) {937if (parser->index == 1 && ch == 'H') {938parser->method = HTTP_CHECKOUT;939} else if (parser->index == 2 && ch == 'P') {940parser->method = HTTP_COPY;941} else {942SET_ERRNO(HPE_INVALID_METHOD);943goto error;944}945} else if (parser->method == HTTP_MKCOL) {946if (parser->index == 1 && ch == 'O') {947parser->method = HTTP_MOVE;948} else if (parser->index == 1 && ch == 'E') {949parser->method = HTTP_MERGE;950} else if (parser->index == 1 && ch == '-') {951parser->method = HTTP_MSEARCH;952} else if (parser->index == 2 && ch == 'A') {953parser->method = HTTP_MKACTIVITY;954} else {955SET_ERRNO(HPE_INVALID_METHOD);956goto error;957}958} else if (parser->method == HTTP_SUBSCRIBE) {959if (parser->index == 1 && ch == 'E') {960parser->method = HTTP_SEARCH;961} else {962SET_ERRNO(HPE_INVALID_METHOD);963goto error;964}965} else if (parser->index == 1 && parser->method == HTTP_POST) {966if (ch == 'R') {967parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */968} else if (ch == 'U') {969parser->method = HTTP_PUT; /* or HTTP_PURGE */970} else if (ch == 'A') {971parser->method = HTTP_PATCH;972} else {973SET_ERRNO(HPE_INVALID_METHOD);974goto error;975}976} else if (parser->index == 2) {977if (parser->method == HTTP_PUT) {978if (ch == 'R') {979parser->method = HTTP_PURGE;980} else {981SET_ERRNO(HPE_INVALID_METHOD);982goto error;983}984} else if (parser->method == HTTP_UNLOCK) {985if (ch == 'S') {986parser->method = HTTP_UNSUBSCRIBE;987} else {988SET_ERRNO(HPE_INVALID_METHOD);989goto error;990}991} else {992SET_ERRNO(HPE_INVALID_METHOD);993goto error;994}995} else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {996parser->method = HTTP_PROPPATCH;997} else {998SET_ERRNO(HPE_INVALID_METHOD);999goto error;1000}1001
1002++parser->index;1003break;1004}1005
1006case s_req_spaces_before_url:1007{1008if (ch == ' ') break;1009
1010MARK(url);1011if (parser->method == HTTP_CONNECT) {1012parser->state = s_req_server_start;1013}1014
1015parser->state = parse_url_char((enum state)parser->state, ch);1016if (parser->state == s_dead) {1017SET_ERRNO(HPE_INVALID_URL);1018goto error;1019}1020
1021break;1022}1023
1024case s_req_schema:1025case s_req_schema_slash:1026case s_req_schema_slash_slash:1027case s_req_server_start:1028{1029switch (ch) {1030/* No whitespace allowed here */1031case ' ':1032case CR:1033case LF:1034SET_ERRNO(HPE_INVALID_URL);1035goto error;1036default:1037parser->state = parse_url_char((enum state)parser->state, ch);1038if (parser->state == s_dead) {1039SET_ERRNO(HPE_INVALID_URL);1040goto error;1041}1042}1043
1044break;1045}1046
1047case s_req_server:1048case s_req_server_with_at:1049case s_req_path:1050case s_req_query_string_start:1051case s_req_query_string:1052case s_req_fragment_start:1053case s_req_fragment:1054{1055switch (ch) {1056case ' ':1057parser->state = s_req_http_start;1058CALLBACK_DATA(url);1059break;1060case CR:1061case LF:1062parser->http_major = 0;1063parser->http_minor = 9;1064parser->state = (ch == CR) ?1065s_req_line_almost_done :1066s_header_field_start;1067CALLBACK_DATA(url);1068break;1069default:1070parser->state = parse_url_char((enum state)parser->state, ch);1071if (parser->state == s_dead) {1072SET_ERRNO(HPE_INVALID_URL);1073goto error;1074}1075}1076break;1077}1078
1079case s_req_http_start:1080switch (ch) {1081case 'H':1082parser->state = s_req_http_H;1083break;1084case ' ':1085break;1086default:1087SET_ERRNO(HPE_INVALID_CONSTANT);1088goto error;1089}1090break;1091
1092case s_req_http_H:1093STRICT_CHECK(ch != 'T');1094parser->state = s_req_http_HT;1095break;1096
1097case s_req_http_HT:1098STRICT_CHECK(ch != 'T');1099parser->state = s_req_http_HTT;1100break;1101
1102case s_req_http_HTT:1103STRICT_CHECK(ch != 'P');1104parser->state = s_req_http_HTTP;1105break;1106
1107case s_req_http_HTTP:1108STRICT_CHECK(ch != '/');1109parser->state = s_req_first_http_major;1110break;1111
1112/* first digit of major HTTP version */1113case s_req_first_http_major:1114if (ch < '1' || ch > '9') {1115SET_ERRNO(HPE_INVALID_VERSION);1116goto error;1117}1118
1119parser->http_major = ch - '0';1120parser->state = s_req_http_major;1121break;1122
1123/* major HTTP version or dot */1124case s_req_http_major:1125{1126if (ch == '.') {1127parser->state = s_req_first_http_minor;1128break;1129}1130
1131if (!IS_NUM(ch)) {1132SET_ERRNO(HPE_INVALID_VERSION);1133goto error;1134}1135
1136parser->http_major *= 10;1137parser->http_major += ch - '0';1138
1139if (parser->http_major > 999) {1140SET_ERRNO(HPE_INVALID_VERSION);1141goto error;1142}1143
1144break;1145}1146
1147/* first digit of minor HTTP version */1148case s_req_first_http_minor:1149if (!IS_NUM(ch)) {1150SET_ERRNO(HPE_INVALID_VERSION);1151goto error;1152}1153
1154parser->http_minor = ch - '0';1155parser->state = s_req_http_minor;1156break;1157
1158/* minor HTTP version or end of request line */1159case s_req_http_minor:1160{1161if (ch == CR) {1162parser->state = s_req_line_almost_done;1163break;1164}1165
1166if (ch == LF) {1167parser->state = s_header_field_start;1168break;1169}1170
1171/* XXX allow spaces after digit? */1172
1173if (!IS_NUM(ch)) {1174SET_ERRNO(HPE_INVALID_VERSION);1175goto error;1176}1177
1178parser->http_minor *= 10;1179parser->http_minor += ch - '0';1180
1181if (parser->http_minor > 999) {1182SET_ERRNO(HPE_INVALID_VERSION);1183goto error;1184}1185
1186break;1187}1188
1189/* end of request line */1190case s_req_line_almost_done:1191{1192if (ch != LF) {1193SET_ERRNO(HPE_LF_EXPECTED);1194goto error;1195}1196
1197parser->state = s_header_field_start;1198break;1199}1200
1201case s_header_field_start:1202{1203if (ch == CR) {1204parser->state = s_headers_almost_done;1205break;1206}1207
1208if (ch == LF) {1209/* they might be just sending \n instead of \r\n so this would be1210* the second \n to denote the end of headers*/
1211parser->state = s_headers_almost_done;1212goto reexecute_byte;1213}1214
1215c = TOKEN(ch);1216
1217if (!c) {1218SET_ERRNO(HPE_INVALID_HEADER_TOKEN);1219goto error;1220}1221
1222MARK(header_field);1223
1224parser->index = 0;1225parser->state = s_header_field;1226
1227switch (c) {1228case 'c':1229parser->header_state = h_C;1230break;1231
1232case 'p':1233parser->header_state = h_matching_proxy_connection;1234break;1235
1236case 't':1237parser->header_state = h_matching_transfer_encoding;1238break;1239
1240case 'u':1241parser->header_state = h_matching_upgrade;1242break;1243
1244default:1245parser->header_state = h_general;1246break;1247}1248break;1249}1250
1251case s_header_field:1252{1253c = TOKEN(ch);1254
1255if (c) {1256switch (parser->header_state) {1257case h_general:1258break;1259
1260case h_C:1261parser->index++;1262parser->header_state = (c == 'o' ? h_CO : h_general);1263break;1264
1265case h_CO:1266parser->index++;1267parser->header_state = (c == 'n' ? h_CON : h_general);1268break;1269
1270case h_CON:1271parser->index++;1272switch (c) {1273case 'n':1274parser->header_state = h_matching_connection;1275break;1276case 't':1277parser->header_state = h_matching_content_length;1278break;1279default:1280parser->header_state = h_general;1281break;1282}1283break;1284
1285/* connection */1286
1287case h_matching_connection:1288parser->index++;1289if (parser->index > sizeof(CONNECTION)-11290|| c != CONNECTION[parser->index]) {1291parser->header_state = h_general;1292} else if (parser->index == sizeof(CONNECTION)-2) {1293parser->header_state = h_connection;1294}1295break;1296
1297/* proxy-connection */1298
1299case h_matching_proxy_connection:1300parser->index++;1301if (parser->index > sizeof(PROXY_CONNECTION)-11302|| c != PROXY_CONNECTION[parser->index]) {1303parser->header_state = h_general;1304} else if (parser->index == sizeof(PROXY_CONNECTION)-2) {1305parser->header_state = h_connection;1306}1307break;1308
1309/* content-length */1310
1311case h_matching_content_length:1312parser->index++;1313if (parser->index > sizeof(CONTENT_LENGTH)-11314|| c != CONTENT_LENGTH[parser->index]) {1315parser->header_state = h_general;1316} else if (parser->index == sizeof(CONTENT_LENGTH)-2) {1317parser->header_state = h_content_length;1318}1319break;1320
1321/* transfer-encoding */1322
1323case h_matching_transfer_encoding:1324parser->index++;1325if (parser->index > sizeof(TRANSFER_ENCODING)-11326|| c != TRANSFER_ENCODING[parser->index]) {1327parser->header_state = h_general;1328} else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {1329parser->header_state = h_transfer_encoding;1330}1331break;1332
1333/* upgrade */1334
1335case h_matching_upgrade:1336parser->index++;1337if (parser->index > sizeof(UPGRADE)-11338|| c != UPGRADE[parser->index]) {1339parser->header_state = h_general;1340} else if (parser->index == sizeof(UPGRADE)-2) {1341parser->header_state = h_upgrade;1342}1343break;1344
1345case h_connection:1346case h_content_length:1347case h_transfer_encoding:1348case h_upgrade:1349if (ch != ' ') parser->header_state = h_general;1350break;1351
1352default:1353assert(0 && "Unknown header_state");1354break;1355}1356break;1357}1358
1359if (ch == ':') {1360parser->state = s_header_value_start;1361CALLBACK_DATA(header_field);1362break;1363}1364
1365if (ch == CR) {1366parser->state = s_header_almost_done;1367CALLBACK_DATA(header_field);1368break;1369}1370
1371if (ch == LF) {1372parser->state = s_header_field_start;1373CALLBACK_DATA(header_field);1374break;1375}1376
1377SET_ERRNO(HPE_INVALID_HEADER_TOKEN);1378goto error;1379}1380
1381case s_header_value_start:1382{1383if (ch == ' ' || ch == '\t') break;1384
1385MARK(header_value);1386
1387parser->state = s_header_value;1388parser->index = 0;1389
1390if (ch == CR) {1391parser->header_state = h_general;1392parser->state = s_header_almost_done;1393CALLBACK_DATA(header_value);1394break;1395}1396
1397if (ch == LF) {1398parser->state = s_header_field_start;1399CALLBACK_DATA(header_value);1400break;1401}1402
1403c = LOWER(ch);1404
1405switch (parser->header_state) {1406case h_upgrade:1407parser->flags |= F_UPGRADE;1408parser->header_state = h_general;1409break;1410
1411case h_transfer_encoding:1412/* looking for 'Transfer-Encoding: chunked' */1413if ('c' == c) {1414parser->header_state = h_matching_transfer_encoding_chunked;1415} else {1416parser->header_state = h_general;1417}1418break;1419
1420case h_content_length:1421if (!IS_NUM(ch)) {1422SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);1423goto error;1424}1425
1426parser->content_length = ch - '0';1427break;1428
1429case h_connection:1430/* looking for 'Connection: keep-alive' */1431if (c == 'k') {1432parser->header_state = h_matching_connection_keep_alive;1433/* looking for 'Connection: close' */1434} else if (c == 'c') {1435parser->header_state = h_matching_connection_close;1436} else {1437parser->header_state = h_general;1438}1439break;1440
1441default:1442parser->header_state = h_general;1443break;1444}1445break;1446}1447
1448case s_header_value:1449{1450
1451if (ch == CR) {1452parser->state = s_header_almost_done;1453CALLBACK_DATA(header_value);1454break;1455}1456
1457if (ch == LF) {1458parser->state = s_header_almost_done;1459CALLBACK_DATA_NOADVANCE(header_value);1460goto reexecute_byte;1461}1462
1463c = LOWER(ch);1464
1465switch (parser->header_state) {1466case h_general:1467break;1468
1469case h_connection:1470case h_transfer_encoding:1471assert(0 && "Shouldn't get here.");1472break;1473
1474case h_content_length:1475{1476uint64_t t;1477
1478if (ch == ' ') break;1479
1480if (!IS_NUM(ch)) {1481SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);1482goto error;1483}1484
1485t = parser->content_length;1486t *= 10;1487t += ch - '0';1488
1489/* Overflow? */1490if (t < parser->content_length || t == ULLONG_MAX) {1491SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);1492goto error;1493}1494
1495parser->content_length = t;1496break;1497}1498
1499/* Transfer-Encoding: chunked */1500case h_matching_transfer_encoding_chunked:1501parser->index++;1502if (parser->index > sizeof(CHUNKED)-11503|| c != CHUNKED[parser->index]) {1504parser->header_state = h_general;1505} else if (parser->index == sizeof(CHUNKED)-2) {1506parser->header_state = h_transfer_encoding_chunked;1507}1508break;1509
1510/* looking for 'Connection: keep-alive' */1511case h_matching_connection_keep_alive:1512parser->index++;1513if (parser->index > sizeof(KEEP_ALIVE)-11514|| c != KEEP_ALIVE[parser->index]) {1515parser->header_state = h_general;1516} else if (parser->index == sizeof(KEEP_ALIVE)-2) {1517parser->header_state = h_connection_keep_alive;1518}1519break;1520
1521/* looking for 'Connection: close' */1522case h_matching_connection_close:1523parser->index++;1524if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {1525parser->header_state = h_general;1526} else if (parser->index == sizeof(CLOSE)-2) {1527parser->header_state = h_connection_close;1528}1529break;1530
1531case h_transfer_encoding_chunked:1532case h_connection_keep_alive:1533case h_connection_close:1534if (ch != ' ') parser->header_state = h_general;1535break;1536
1537default:1538parser->state = s_header_value;1539parser->header_state = h_general;1540break;1541}1542break;1543}1544
1545case s_header_almost_done:1546{1547STRICT_CHECK(ch != LF);1548
1549parser->state = s_header_value_lws;1550
1551switch (parser->header_state) {1552case h_connection_keep_alive:1553parser->flags |= F_CONNECTION_KEEP_ALIVE;1554break;1555case h_connection_close:1556parser->flags |= F_CONNECTION_CLOSE;1557break;1558case h_transfer_encoding_chunked:1559parser->flags |= F_CHUNKED;1560break;1561default:1562break;1563}1564
1565break;1566}1567
1568case s_header_value_lws:1569{1570if (ch == ' ' || ch == '\t')1571parser->state = s_header_value_start;1572else1573{1574parser->state = s_header_field_start;1575goto reexecute_byte;1576}1577break;1578}1579
1580case s_headers_almost_done:1581{1582STRICT_CHECK(ch != LF);1583
1584if (parser->flags & F_TRAILING) {1585/* End of a chunked request */1586parser->state = NEW_MESSAGE();1587CALLBACK_NOTIFY(message_complete);1588break;1589}1590
1591parser->state = s_headers_done;1592
1593/* Set this here so that on_headers_complete() callbacks can see it */1594parser->upgrade =1595(parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);1596
1597/* Here we call the headers_complete callback. This is somewhat1598* different than other callbacks because if the user returns 1, we
1599* will interpret that as saying that this message has no body. This
1600* is needed for the annoying case of recieving a response to a HEAD
1601* request.
1602*
1603* We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1604* we have to simulate it by handling a change in errno below.
1605*/
1606if (settings->on_headers_complete) {1607switch (settings->on_headers_complete(parser)) {1608case 0:1609break;1610
1611case 1:1612parser->flags |= F_SKIPBODY;1613break;1614
1615default:1616SET_ERRNO(HPE_CB_headers_complete);1617return p - data; /* Error */1618}1619}1620
1621if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {1622return p - data;1623}1624
1625goto reexecute_byte;1626}1627
1628case s_headers_done:1629{1630STRICT_CHECK(ch != LF);1631
1632parser->nread = 0;1633
1634/* Exit, the rest of the connect is in a different protocol. */1635if (parser->upgrade) {1636parser->state = NEW_MESSAGE();1637CALLBACK_NOTIFY(message_complete);1638return (p - data) + 1;1639}1640
1641if (parser->flags & F_SKIPBODY) {1642parser->state = NEW_MESSAGE();1643CALLBACK_NOTIFY(message_complete);1644} else if (parser->flags & F_CHUNKED) {1645/* chunked encoding - ignore Content-Length header */1646parser->state = s_chunk_size_start;1647} else {1648if (parser->content_length == 0) {1649/* Content-Length header given but zero: Content-Length: 0\r\n */1650parser->state = NEW_MESSAGE();1651CALLBACK_NOTIFY(message_complete);1652} else if (parser->content_length != ULLONG_MAX) {1653/* Content-Length header given and non-zero */1654parser->state = s_body_identity;1655} else {1656if (parser->type == HTTP_REQUEST ||1657!http_message_needs_eof(parser)) {1658/* Assume content-length 0 - read the next */1659parser->state = NEW_MESSAGE();1660CALLBACK_NOTIFY(message_complete);1661} else {1662/* Read body until EOF */1663parser->state = s_body_identity_eof;1664}1665}1666}1667
1668break;1669}1670
1671case s_body_identity:1672{1673uint64_t to_read = MIN(parser->content_length,1674(uint64_t) ((data + len) - p));1675
1676assert(parser->content_length != 01677&& parser->content_length != ULLONG_MAX);1678
1679/* The difference between advancing content_length and p is because1680* the latter will automaticaly advance on the next loop iteration.
1681* Further, if content_length ends up at 0, we want to see the last
1682* byte again for our message complete callback.
1683*/
1684MARK(body);1685parser->content_length -= to_read;1686p += to_read - 1;1687
1688if (parser->content_length == 0) {1689parser->state = s_message_done;1690
1691/* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.1692*
1693* The alternative to doing this is to wait for the next byte to
1694* trigger the data callback, just as in every other case. The
1695* problem with this is that this makes it difficult for the test
1696* harness to distinguish between complete-on-EOF and
1697* complete-on-length. It's not clear that this distinction is
1698* important for applications, but let's keep it for now.
1699*/
1700CALLBACK_DATA_(body, p - body_mark + 1, p - data);1701goto reexecute_byte;1702}1703
1704break;1705}1706
1707/* read until EOF */1708case s_body_identity_eof:1709MARK(body);1710p = data + len - 1;1711
1712break;1713
1714case s_message_done:1715parser->state = NEW_MESSAGE();1716CALLBACK_NOTIFY(message_complete);1717break;1718
1719case s_chunk_size_start:1720{1721assert(parser->nread == 1);1722assert(parser->flags & F_CHUNKED);1723
1724unhex_val = unhex[(unsigned char)ch];1725if (unhex_val == -1) {1726SET_ERRNO(HPE_INVALID_CHUNK_SIZE);1727goto error;1728}1729
1730parser->content_length = unhex_val;1731parser->state = s_chunk_size;1732break;1733}1734
1735case s_chunk_size:1736{1737uint64_t t;1738
1739assert(parser->flags & F_CHUNKED);1740
1741if (ch == CR) {1742parser->state = s_chunk_size_almost_done;1743break;1744}1745
1746unhex_val = unhex[(unsigned char)ch];1747
1748if (unhex_val == -1) {1749if (ch == ';' || ch == ' ') {1750parser->state = s_chunk_parameters;1751break;1752}1753
1754SET_ERRNO(HPE_INVALID_CHUNK_SIZE);1755goto error;1756}1757
1758t = parser->content_length;1759t *= 16;1760t += unhex_val;1761
1762/* Overflow? */1763if (t < parser->content_length || t == ULLONG_MAX) {1764SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);1765goto error;1766}1767
1768parser->content_length = t;1769break;1770}1771
1772case s_chunk_parameters:1773{1774assert(parser->flags & F_CHUNKED);1775/* just ignore this shit. TODO check for overflow */1776if (ch == CR) {1777parser->state = s_chunk_size_almost_done;1778break;1779}1780break;1781}1782
1783case s_chunk_size_almost_done:1784{1785assert(parser->flags & F_CHUNKED);1786STRICT_CHECK(ch != LF);1787
1788parser->nread = 0;1789
1790if (parser->content_length == 0) {1791parser->flags |= F_TRAILING;1792parser->state = s_header_field_start;1793} else {1794parser->state = s_chunk_data;1795}1796break;1797}1798
1799case s_chunk_data:1800{1801uint64_t to_read = MIN(parser->content_length,1802(uint64_t) ((data + len) - p));1803
1804assert(parser->flags & F_CHUNKED);1805assert(parser->content_length != 01806&& parser->content_length != ULLONG_MAX);1807
1808/* See the explanation in s_body_identity for why the content1809* length and data pointers are managed this way.
1810*/
1811MARK(body);1812parser->content_length -= to_read;1813p += to_read - 1;1814
1815if (parser->content_length == 0) {1816parser->state = s_chunk_data_almost_done;1817}1818
1819break;1820}1821
1822case s_chunk_data_almost_done:1823assert(parser->flags & F_CHUNKED);1824assert(parser->content_length == 0);1825STRICT_CHECK(ch != CR);1826parser->state = s_chunk_data_done;1827CALLBACK_DATA(body);1828break;1829
1830case s_chunk_data_done:1831assert(parser->flags & F_CHUNKED);1832STRICT_CHECK(ch != LF);1833parser->nread = 0;1834parser->state = s_chunk_size_start;1835break;1836
1837default:1838assert(0 && "unhandled state");1839SET_ERRNO(HPE_INVALID_INTERNAL_STATE);1840goto error;1841}1842}1843
1844/* Run callbacks for any marks that we have leftover after we ran our of1845* bytes. There should be at most one of these set, so it's OK to invoke
1846* them in series (unset marks will not result in callbacks).
1847*
1848* We use the NOADVANCE() variety of callbacks here because 'p' has already
1849* overflowed 'data' and this allows us to correct for the off-by-one that
1850* we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1851* value that's in-bounds).
1852*/
1853
1854assert(((header_field_mark ? 1 : 0) +1855(header_value_mark ? 1 : 0) +1856(url_mark ? 1 : 0) +1857(body_mark ? 1 : 0)) <= 1);1858
1859CALLBACK_DATA_NOADVANCE(header_field);1860CALLBACK_DATA_NOADVANCE(header_value);1861CALLBACK_DATA_NOADVANCE(url);1862CALLBACK_DATA_NOADVANCE(body);1863
1864return len;1865
1866error:1867if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {1868SET_ERRNO(HPE_UNKNOWN);1869}1870
1871return (p - data);1872}
1873
1874
1875/* Does the parser need to see an EOF to find the end of the message? */
1876int
1877http_message_needs_eof (const http_parser *parser)1878{
1879if (parser->type == HTTP_REQUEST) {1880return 0;1881}1882
1883/* See RFC 2616 section 4.4 */1884if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */1885parser->status_code == 204 || /* No Content */1886parser->status_code == 304 || /* Not Modified */1887parser->flags & F_SKIPBODY) { /* response to a HEAD request */1888return 0;1889}1890
1891if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {1892return 0;1893}1894
1895return 1;1896}
1897
1898
1899int
1900http_should_keep_alive (const http_parser *parser)1901{
1902if (parser->http_major > 0 && parser->http_minor > 0) {1903/* HTTP/1.1 */1904if (parser->flags & F_CONNECTION_CLOSE) {1905return 0;1906}1907} else {1908/* HTTP/1.0 or earlier */1909if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {1910return 0;1911}1912}1913
1914return !http_message_needs_eof(parser);1915}
1916
1917
1918const char *1919http_method_str (enum http_method m)1920{
1921return ELEM_AT(method_strings, m, "<unknown>");1922}
1923
1924
1925void
1926http_parser_init (http_parser *parser, enum http_parser_type t)1927{
1928void *data = parser->data; /* preserve application data */1929memset(parser, 0, sizeof(*parser));1930parser->data = data;1931parser->type = t;1932parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));1933parser->http_errno = HPE_OK;1934}
1935
1936const char *1937http_errno_name(enum http_errno err) {1938assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));1939return http_strerror_tab[err].name;1940}
1941
1942const char *1943http_errno_description(enum http_errno err) {1944assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));1945return http_strerror_tab[err].description;1946}
1947
1948static enum http_host_state1949http_parse_host_char(enum http_host_state s, const char ch) {1950switch(s) {1951case s_http_userinfo:1952case s_http_userinfo_start:1953if (ch == '@') {1954return s_http_host_start;1955}1956
1957if (IS_USERINFO_CHAR(ch)) {1958return s_http_userinfo;1959}1960break;1961
1962case s_http_host_start:1963if (ch == '[') {1964return s_http_host_v6_start;1965}1966
1967if (IS_HOST_CHAR(ch)) {1968return s_http_host;1969}1970
1971break;1972
1973case s_http_host:1974if (IS_HOST_CHAR(ch)) {1975return s_http_host;1976}1977
1978/* FALLTHROUGH */1979case s_http_host_v6_end:1980if (ch == ':') {1981return s_http_host_port_start;1982}1983
1984break;1985
1986case s_http_host_v6:1987if (ch == ']') {1988return s_http_host_v6_end;1989}1990
1991/* FALLTHROUGH */1992case s_http_host_v6_start:1993if (IS_HEX(ch) || ch == ':' || ch == '.') {1994return s_http_host_v6;1995}1996
1997break;1998
1999case s_http_host_port:2000case s_http_host_port_start:2001if (IS_NUM(ch)) {2002return s_http_host_port;2003}2004
2005break;2006
2007default:2008break;2009}2010return s_http_host_dead;2011}
2012
2013static int2014http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {2015enum http_host_state s;2016
2017const char *p;2018size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;2019
2020u->field_data[UF_HOST].len = 0;2021
2022s = found_at ? s_http_userinfo_start : s_http_host_start;2023
2024for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {2025enum http_host_state new_s = http_parse_host_char(s, *p);2026
2027if (new_s == s_http_host_dead) {2028return 1;2029}2030
2031switch(new_s) {2032case s_http_host:2033if (s != s_http_host) {2034u->field_data[UF_HOST].off = p - buf;2035}2036u->field_data[UF_HOST].len++;2037break;2038
2039case s_http_host_v6:2040if (s != s_http_host_v6) {2041u->field_data[UF_HOST].off = p - buf;2042}2043u->field_data[UF_HOST].len++;2044break;2045
2046case s_http_host_port:2047if (s != s_http_host_port) {2048u->field_data[UF_PORT].off = p - buf;2049u->field_data[UF_PORT].len = 0;2050u->field_set |= (1 << UF_PORT);2051}2052u->field_data[UF_PORT].len++;2053break;2054
2055case s_http_userinfo:2056if (s != s_http_userinfo) {2057u->field_data[UF_USERINFO].off = p - buf ;2058u->field_data[UF_USERINFO].len = 0;2059u->field_set |= (1 << UF_USERINFO);2060}2061u->field_data[UF_USERINFO].len++;2062break;2063
2064default:2065break;2066}2067s = new_s;2068}2069
2070/* Make sure we don't end somewhere unexpected */2071switch (s) {2072case s_http_host_start:2073case s_http_host_v6_start:2074case s_http_host_v6:2075case s_http_host_port_start:2076case s_http_userinfo:2077case s_http_userinfo_start:2078return 1;2079default:2080break;2081}2082
2083return 0;2084}
2085
2086int
2087http_parser_parse_url(const char *buf, size_t buflen, int is_connect,2088struct http_parser_url *u)2089{
2090enum state s;2091const char *p;2092enum http_parser_url_fields uf, old_uf;2093int found_at = 0;2094
2095u->port = u->field_set = 0;2096s = is_connect ? s_req_server_start : s_req_spaces_before_url;2097uf = old_uf = UF_MAX;2098
2099for (p = buf; p < buf + buflen; p++) {2100s = parse_url_char(s, *p);2101
2102/* Figure out the next field that we're operating on */2103switch (s) {2104case s_dead:2105return 1;2106
2107/* Skip delimeters */2108case s_req_schema_slash:2109case s_req_schema_slash_slash:2110case s_req_server_start:2111case s_req_query_string_start:2112case s_req_fragment_start:2113continue;2114
2115case s_req_schema:2116uf = UF_SCHEMA;2117break;2118
2119case s_req_server_with_at:2120found_at = 1;2121
2122#ifndef __has_attribute2123# define __has_attribute(x) 02124#endif2125#if __has_attribute(fallthrough)2126__attribute__((fallthrough));2127#endif2128/* FALLTROUGH */2129case s_req_server:2130uf = UF_HOST;2131break;2132
2133case s_req_path:2134uf = UF_PATH;2135break;2136
2137case s_req_query_string:2138uf = UF_QUERY;2139break;2140
2141case s_req_fragment:2142uf = UF_FRAGMENT;2143break;2144
2145default:2146assert(!"Unexpected state");2147return 1;2148}2149
2150/* Nothing's changed; soldier on */2151if (uf == old_uf) {2152u->field_data[uf].len++;2153continue;2154}2155
2156u->field_data[uf].off = p - buf;2157u->field_data[uf].len = 1;2158
2159u->field_set |= (1 << uf);2160old_uf = uf;2161}2162
2163/* host must be present if there is a schema */2164/* parsing http:///toto will fail */2165if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {2166if (http_parse_host(buf, u, found_at) != 0) {2167return 1;2168}2169}2170
2171/* CONNECT requests can only contain "hostname:port" */2172if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {2173return 1;2174}2175
2176if (u->field_set & (1 << UF_PORT)) {2177/* Don't bother with endp; we've already validated the string */2178unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);2179
2180/* Ports have a max value of 2^16 */2181if (v > 0xffff) {2182return 1;2183}2184
2185u->port = (uint16_t) v;2186}2187
2188return 0;2189}
2190
2191void
2192http_parser_pause(http_parser *parser, int paused) {2193/* Users should only be pausing/unpausing a parser that is not in an error2194* state. In non-debug builds, there's not much that we can do about this
2195* other than ignore it.
2196*/
2197if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||2198HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {2199SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);2200} else {2201assert(0 && "Attempting to pause parser in error state");2202}2203}
2204
2205int
2206http_body_is_final(const struct http_parser *parser) {2207return parser->state == s_message_done;2208}
2209
2210unsigned long2211http_parser_version(void) {2212return HTTP_PARSER_VERSION_MAJOR * 0x10000 |2213HTTP_PARSER_VERSION_MINOR * 0x00100 |2214HTTP_PARSER_VERSION_PATCH * 0x00001;2215}
2216