Class: Pitchfork::HttpParser
- Inherits:
-
Object
- Object
- Pitchfork::HttpParser
- Defined in:
- lib/pitchfork/http_parser.rb,
ext/pitchfork_http/pitchfork_http.c
Constant Summary collapse
- DEFAULTS =
default parameters we merge into the request env for Rack handlers
{ "rack.errors" => $stderr, "rack.multiprocess" => true, "rack.multithread" => false, "rack.run_once" => false, "rack.version" => [1, 2], "rack.hijack?" => true, "SCRIPT_NAME" => "", # this is not in the Rack spec, but some apps may rely on it "SERVER_SOFTWARE" => "Pitchfork #{Pitchfork::Const::UNICORN_VERSION}" }
- NULL_IO =
StringIO.new.binmode
- HTTP_RESPONSE_START =
:stopdoc:
[ 'HTTP', '/1.1 ' ]
- EMPTY_ARRAY =
[].freeze
- CHUNK_MAX =
The maximum size a single chunk when using chunked transfer encoding. This is only a theoretical maximum used to detect errors in clients, it is highly unlikely to encounter clients that send more than several kilobytes at once.
OFFT2NUM(UH_OFF_T_MAX)
- LENGTH_MAX =
The maximum size of the body as specified by Content-Length. This is only a theoretical maximum, the actual limit is subject to the limits of the file system used for
Dir.tmpdir. OFFT2NUM(UH_OFF_T_MAX)
- @@input_class =
Pitchfork::TeeInput
- @@check_client_connection =
false
Class Method Summary collapse
- .check_client_connection ⇒ Object
- .check_client_connection=(bool) ⇒ Object
- .input_class ⇒ Object
- .input_class=(klass) ⇒ Object
-
.is_chunked?(v) ⇒ Boolean
called by ext/pitchfork_http/pitchfork_http.rl via rb_funcall.
Instance Method Summary collapse
-
#add_parse(buffer) ⇒ nil
adds the contents of
bufferto the internal buffer and attempts to continue parsing. -
#body_eof? ⇒ Boolean
Detects if we’re done filtering the body or not.
- #buf ⇒ Object
-
#call ⇒ Object
for rack.hijack, we respond to this method so no extra allocation of a proc object.
-
#check_client_connection(socket) ⇒ Object
:nodoc:.
-
#clear ⇒ Object
Resets the parser to it’s initial state so that you can reuse it rather than making new ones.
-
#content_length ⇒ nil, Integer
Returns the number of bytes left to run through HttpParser#filter_body.
- #env ⇒ Object
-
#filter_body(dst, src) ⇒ Object
Takes a String of
src, will modify data if dechunking is done. - #headers(env, buf) ⇒ Object
-
#headers? ⇒ Boolean
This should be used to detect if a request has headers (and if the response will have headers as well).
- #hijacked! ⇒ Object
- #hijacked? ⇒ Boolean
-
#new ⇒ Object
constructor
Creates a new parser.
-
#keepalive? ⇒ Boolean
This should be used to detect if a request can really handle keepalives and pipelining.
-
#next? ⇒ Boolean
Exactly like HttpParser#keepalive?, except it will reset the internal parser state on next parse if it returns true.
-
#parse ⇒ nil
Takes a Hash and a String of data, parses the String of data filling in the Hash returning the Hash if parsing is finished, nil otherwise When returning the env Hash, it may modify data to point to where body processing should begin.
-
#read(socket) ⇒ Object
Does the majority of the IO processing.
-
#response_start_sent ⇒ Object
ignored by Ruby anyways.
- #response_start_sent=(boolean) ⇒ Object
-
#trailers(req, data) ⇒ nil
This is an alias for HttpParser#headers.
-
#write_http_header(socket) ⇒ Object
:nodoc:.
Constructor Details
#new ⇒ Object
Creates a new parser.
3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3817 static VALUE HttpParser_init(VALUE self) { struct http_parser *hp = data_get(self); http_parser_init(hp); RB_OBJ_WRITE(self, &hp->buf, rb_str_new(NULL, 0)); RB_OBJ_WRITE(self, &hp->env, rb_hash_new_capa(32)); // Even the simplest request will have 10 keys return self; } |
Class Method Details
.check_client_connection ⇒ Object
38 39 40 |
# File 'lib/pitchfork/http_parser.rb', line 38 def self.check_client_connection @@check_client_connection end |
.check_client_connection=(bool) ⇒ Object
42 43 44 |
# File 'lib/pitchfork/http_parser.rb', line 42 def self.check_client_connection=(bool) @@check_client_connection = bool end |
.input_class ⇒ Object
30 31 32 |
# File 'lib/pitchfork/http_parser.rb', line 30 def self.input_class @@input_class end |
.input_class=(klass) ⇒ Object
34 35 36 |
# File 'lib/pitchfork/http_parser.rb', line 34 def self.input_class=(klass) @@input_class = klass end |
.is_chunked?(v) ⇒ Boolean
called by ext/pitchfork_http/pitchfork_http.rl via rb_funcall
144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/pitchfork/http_parser.rb', line 144 def self.is_chunked?(v) # :nodoc: vals = v.split(',') vals.each do |val| val.strip! val.downcase! end if vals.pop == 'chunked' return true unless vals.include?('chunked') raise Pitchfork::HttpParserError, 'double chunked', [] end return false unless vals.include?('chunked') raise Pitchfork::HttpParserError, 'chunked not last', [] end |
Instance Method Details
#add_parse(buffer) ⇒ nil
adds the contents of buffer to the internal buffer and attempts to continue parsing. Returns the env Hash on success or nil if more data is needed.
Raises HttpParserError if there are parsing errors.
3935 3936 3937 3938 3939 3940 3941 3942 3943 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3935 static VALUE HttpParser_add_parse(VALUE self, VALUE buffer) { struct http_parser *hp = data_get(self); Check_Type(buffer, T_STRING); rb_str_buf_append(hp->buf, buffer); return HttpParser_parse(self); } |
#body_eof? ⇒ Boolean
Detects if we’re done filtering the body or not. This can be used to detect when to stop calling HttpParser#filter_body.
3978 3979 3980 3981 3982 3983 3984 3985 3986 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3978 static VALUE HttpParser_body_eof(VALUE self) { struct http_parser *hp = data_get(self); if (HP_FL_TEST(hp, CHUNKED)) return chunked_eof(hp) ? Qtrue : Qfalse; return hp->len.content == 0 ? Qtrue : Qfalse; } |
#buf ⇒ Object
4039 4040 4041 4042 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4039 static VALUE HttpParser_buf(VALUE self) { return data_get(self)->buf; } |
#call ⇒ Object
for rack.hijack, we respond to this method so no extra allocation of a proc object
104 105 106 107 |
# File 'lib/pitchfork/http_parser.rb', line 104 def call hijacked! env['rack.hijack_io'] = env['pitchfork.socket'] end |
#check_client_connection(socket) ⇒ Object
:nodoc:
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
# File 'lib/pitchfork/http_parser.rb', line 114 def check_client_connection(socket) # :nodoc: if TCPSocket === socket begin tcp_info = socket.getsockopt(Socket::IPPROTO_TCP, Socket::TCP_INFO) rescue IOError, SystemCallError return write_http_header(socket) end case tcp_info.data.unpack1("C") when 6, 7, 8, 9, 11 # TIME_WAIT, CLOSE, CLOSE_WAIT, LAST_ACK, CLOSING raise Errno::EPIPE, "client closed connection", EMPTY_ARRAY end else write_http_header(socket) end end |
#clear ⇒ Object
Resets the parser to it’s initial state so that you can reuse it rather than making new ones.
3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3835 static VALUE HttpParser_clear(VALUE self) { struct http_parser *hp = data_get(self); /* we can't safely reuse .buf and .env if hijacked */ if (HP_FL_TEST(hp, HIJACK)) return HttpParser_init(self); http_parser_init(hp); rb_hash_clear(hp->env); return self; } |
#content_length ⇒ nil, Integer
Returns the number of bytes left to run through HttpParser#filter_body. This will initially be the value of the “Content-Length” HTTP header after header parsing is complete and will decrease in value as HttpParser#filter_body is called for each chunk. This should return zero for requests with no body.
This will return nil on “Transfer-Encoding: chunked” requests.
3877 3878 3879 3880 3881 3882 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3877 static VALUE HttpParser_content_length(VALUE self) { struct http_parser *hp = data_get(self); return HP_FL_TEST(hp, CHUNKED) ? Qnil : OFFT2NUM(hp->len.content); } |
#env ⇒ Object
4044 4045 4046 4047 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4044 static VALUE HttpParser_env(VALUE self) { return data_get(self)->env; } |
#filter_body(dst, src) ⇒ Object
Takes a String of src, will modify data if dechunking is done. Returns nil if there is more data left to process. Returns src if body processing is complete. When returning src, it may modify src so the start of the string points to where the body ended so that trailer processing can begin.
Raises HttpParserError if there are dechunking errors. Basically this is a glorified memcpy(3) that copies src into buf while filtering it through the dechunker.
4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4072 static VALUE HttpParser_filter_body(VALUE self, VALUE dst, VALUE src) { struct http_parser *hp = data_get(self); char *srcptr; long srclen; srcptr = RSTRING_PTR(src); srclen = RSTRING_LEN(src); StringValue(dst); if (HP_FL_TEST(hp, CHUNKED)) { if (!chunked_eof(hp)) { rb_str_modify(dst); rb_str_resize(dst, srclen); /* we can never copy more than srclen bytes */ hp->s.dest_offset = 0; RB_OBJ_WRITE(self, &hp->cont, dst); RB_OBJ_WRITE(self, &hp->buf, src); http_parser_execute(self, hp, srcptr, srclen); if (hp->cs == http_parser_error) parser_raise(eHttpParserError, "Invalid HTTP format, parsing fails."); assert(hp->s.dest_offset <= hp->offset && "destination buffer overflow"); advance_str(src, hp->offset); rb_str_set_len(dst, hp->s.dest_offset); if (RSTRING_LEN(dst) == 0 && chunked_eof(hp)) { assert(hp->len.chunk == 0 && "chunk at EOF but more to parse"); } else { src = Qnil; } } } else { /* no need to enter the Ragel machine for unchunked transfers */ assert(hp->len.content >= 0 && "negative Content-Length"); if (hp->len.content > 0) { long nr = MIN(srclen, hp->len.content); rb_str_modify(dst); rb_str_resize(dst, nr); /* * using rb_str_replace() to avoid memcpy() doesn't help in * most cases because a GC-aware programmer will pass an explicit * buffer to env["rack.input"].read and reuse the buffer in a loop. * This causes copy-on-write behavior to be triggered anyways * when the +src+ buffer is modified (when reading off the socket). */ RB_OBJ_WRITE(self, &hp->buf, src); memcpy(RSTRING_PTR(dst), srcptr, nr); hp->len.content -= nr; if (hp->len.content == 0) { HP_FL_SET(hp, REQEOF); hp->cs = http_parser_first_final; } advance_str(src, nr); src = Qnil; } } hp->offset = 0; /* for trailer parsing */ return src; } |
#headers(env, buf) ⇒ Object
3956 3957 3958 3959 3960 3961 3962 3963 3964 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3956 static VALUE HttpParser_headers(VALUE self, VALUE env, VALUE buf) { struct http_parser *hp = data_get(self); RB_OBJ_WRITE(self, &hp->buf, buf); RB_OBJ_WRITE(self, &hp->env, env); return HttpParser_parse(self); } |
#headers? ⇒ Boolean
This should be used to detect if a request has headers (and if the response will have headers as well). HTTP/0.9 requests should return false, all subsequent HTTP versions will return true
4032 4033 4034 4035 4036 4037 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4032 static VALUE HttpParser_has_headers(VALUE self) { struct http_parser *hp = data_get(self); return HP_FL_TEST(hp, HASHEADER) ? Qtrue : Qfalse; } |
#hijacked! ⇒ Object
4049 4050 4051 4052 4053 4054 4055 4056 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4049 static VALUE HttpParser_hijacked_bang(VALUE self) { struct http_parser *hp = data_get(self); HP_FL_SET(hp, HIJACK); return self; } |
#hijacked? ⇒ Boolean
109 110 111 |
# File 'lib/pitchfork/http_parser.rb', line 109 def hijacked? env.include?('rack.hijack_io') end |
#keepalive? ⇒ Boolean
This should be used to detect if a request can really handle keepalives and pipelining. Currently, the rules are:
-
MUST be a GET or HEAD request
-
MUST be HTTP/1.1
orHTTP/1.0 with “Connection: keep-alive” -
MUST NOT have “Connection: close” set
3999 4000 4001 4002 4003 4004 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3999 static VALUE HttpParser_keepalive(VALUE self) { struct http_parser *hp = data_get(self); return HP_FL_ALL(hp, KEEPALIVE) ? Qtrue : Qfalse; } |
#next? ⇒ Boolean
Exactly like HttpParser#keepalive?, except it will reset the internal parser state on next parse if it returns true.
4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4013 static VALUE HttpParser_next(VALUE self) { struct http_parser *hp = data_get(self); if (HP_FL_ALL(hp, KEEPALIVE)) { HP_FL_SET(hp, TO_CLEAR); return Qtrue; } return Qfalse; } |
#parse ⇒ nil
Takes a Hash and a String of data, parses the String of data filling in the Hash returning the Hash if parsing is finished, nil otherwise When returning the env Hash, it may modify data to point to where body processing should begin.
Raises HttpParserError if there are parsing errors.
3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3896 static VALUE HttpParser_parse(VALUE self) { struct http_parser *hp = data_get(self); VALUE data = hp->buf; if (HP_FL_TEST(hp, TO_CLEAR)) HttpParser_clear(self); http_parser_execute(self, hp, RSTRING_PTR(data), RSTRING_LEN(data)); if (hp->offset > MAX_HEADER_LEN) parser_raise(e413, "HTTP header is too large"); if (hp->cs == http_parser_first_final || hp->cs == http_parser_en_ChunkedBody) { advance_str(data, hp->offset + 1); hp->offset = 0; if (HP_FL_TEST(hp, INTRAILER)) HP_FL_SET(hp, REQEOF); return hp->env; } if (hp->cs == http_parser_error) parser_raise(eHttpParserError, "Invalid HTTP format, parsing fails."); return Qnil; } |
#read(socket) ⇒ Object
Does the majority of the IO processing. It has been written in Ruby using about 8 different IO processing strategies.
It is currently carefully constructed to make sure that it gets the best possible performance for the common case: GET requests that are fully complete after a single read(2)
Anyone who thinks they can make it faster is more than welcome to take a crack at it.
returns an environment hash suitable for Rack if successful This does minimal exception trapping and it is up to the caller to handle any socket errors (e.g. user aborted upload).
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
# File 'lib/pitchfork/http_parser.rb', line 61 def read(socket) e = env # From https://www.ietf.org/rfc/rfc3875: # "Script authors should be aware that the REMOTE_ADDR and # REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9) # may not identify the ultimate source of the request. They # identify the client for the immediate request to the server; # that client may be a proxy, gateway, or other intermediary # acting on behalf of the actual source client." address = socket.remote_address e['REMOTE_ADDR'] = if address.unix? "127.0.0.1" else address.ip_address end # short circuit the common case with small GET requests first socket.readpartial(16384, buf) if parse.nil? # Parser is not done, queue up more data to read and continue parsing # an Exception thrown from the parser will throw us out of the loop false until add_parse(socket.readpartial(16384)) end check_client_connection(socket) if @@check_client_connection e['rack.input'] = 0 == content_length ? NULL_IO : @@input_class.new(socket, self) # for Rack hijacking in Rack 1.5 and later e['pitchfork.socket'] = socket e['rack.hijack'] = self if PATH_INFO_REQUIRES_LEADING_SLASH && e['PATH_INFO'] == '*' e.delete('PATH_INFO') end e.merge!(DEFAULTS) end |
#response_start_sent ⇒ Object
ignored by Ruby anyways
4148 4149 4150 4151 4152 4153 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4148 static VALUE HttpParser_rssget(VALUE self) { struct http_parser *hp = data_get(self); return HP_FL_TEST(hp, RESSTART) ? Qtrue : Qfalse; } |
#response_start_sent=(boolean) ⇒ Object
4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 4136 static VALUE HttpParser_rssset(VALUE self, VALUE boolean) { struct http_parser *hp = data_get(self); if (RTEST(boolean)) HP_FL_SET(hp, RESSTART); else HP_FL_UNSET(hp, RESSTART); return boolean; /* ignored by Ruby anyways */ } |
#trailers(req, data) ⇒ nil
This is an alias for HttpParser#headers
3956 3957 3958 3959 3960 3961 3962 3963 3964 |
# File 'ext/pitchfork_http/pitchfork_http.c', line 3956 static VALUE HttpParser_headers(VALUE self, VALUE env, VALUE buf) { struct http_parser *hp = data_get(self); RB_OBJ_WRITE(self, &hp->buf, buf); RB_OBJ_WRITE(self, &hp->env, env); return HttpParser_parse(self); } |
#write_http_header(socket) ⇒ Object
:nodoc:
136 137 138 139 140 141 |
# File 'lib/pitchfork/http_parser.rb', line 136 def write_http_header(socket) # :nodoc: if headers? self.response_start_sent = true HTTP_RESPONSE_START.each { |c| socket.write(c) } end end |