Class: Pitchfork::HttpParser

Inherits:
Object
  • Object
show all
Defined in:
lib/pitchfork/http_parser.rb,
ext/pitchfork_http/pitchfork_http.c

Constant Summary collapse

DEFAULTS =

default parameters we merge into the request env for Rack handlers

{
  "rack.errors" => $stderr,
  "rack.multiprocess" => true,
  "rack.multithread" => false,
  "rack.run_once" => false,
  "rack.version" => [1, 2],
  "rack.hijack?" => true,
  "SCRIPT_NAME" => "",

  # this is not in the Rack spec, but some apps may rely on it
  "SERVER_SOFTWARE" => "Pitchfork #{Pitchfork::Const::UNICORN_VERSION}"
}
NULL_IO =
StringIO.new.binmode
HTTP_RESPONSE_START =

:stopdoc:

[ 'HTTP', '/1.1 ' ]
EMPTY_ARRAY =
[].freeze
CHUNK_MAX =

The maximum size a single chunk when using chunked transfer encoding. This is only a theoretical maximum used to detect errors in clients, it is highly unlikely to encounter clients that send more than several kilobytes at once.

OFFT2NUM(UH_OFF_T_MAX)
LENGTH_MAX =

The maximum size of the body as specified by Content-Length. This is only a theoretical maximum, the actual limit is subject to the limits of the file system used for Dir.tmpdir.

OFFT2NUM(UH_OFF_T_MAX)
@@input_class =
Pitchfork::TeeInput
@@check_client_connection =
false

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#newObject

Creates a new parser.



3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
# File 'ext/pitchfork_http/pitchfork_http.c', line 3817

static VALUE HttpParser_init(VALUE self)
{
  struct http_parser *hp = data_get(self);

  http_parser_init(hp);
  RB_OBJ_WRITE(self, &hp->buf, rb_str_new(NULL, 0));
  RB_OBJ_WRITE(self, &hp->env, rb_hash_new_capa(32)); // Even the simplest request will have 10 keys

  return self;
}

Class Method Details

.check_client_connectionObject



38
39
40
# File 'lib/pitchfork/http_parser.rb', line 38

def self.check_client_connection
  @@check_client_connection
end

.check_client_connection=(bool) ⇒ Object



42
43
44
# File 'lib/pitchfork/http_parser.rb', line 42

def self.check_client_connection=(bool)
  @@check_client_connection = bool
end

.input_classObject



30
31
32
# File 'lib/pitchfork/http_parser.rb', line 30

def self.input_class
  @@input_class
end

.input_class=(klass) ⇒ Object



34
35
36
# File 'lib/pitchfork/http_parser.rb', line 34

def self.input_class=(klass)
  @@input_class = klass
end

.is_chunked?(v) ⇒ Boolean

called by ext/pitchfork_http/pitchfork_http.rl via rb_funcall

Returns:

  • (Boolean)

Raises:



144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/pitchfork/http_parser.rb', line 144

def self.is_chunked?(v) # :nodoc:
  vals = v.split(',')
  vals.each do |val|
    val.strip!
    val.downcase!
  end

  if vals.pop == 'chunked'
    return true unless vals.include?('chunked')
    raise Pitchfork::HttpParserError, 'double chunked', []
  end
  return false unless vals.include?('chunked')
  raise Pitchfork::HttpParserError, 'chunked not last', []
end

Instance Method Details

#add_parse(buffer) ⇒ nil

adds the contents of buffer to the internal buffer and attempts to continue parsing. Returns the env Hash on success or nil if more data is needed.

Raises HttpParserError if there are parsing errors.

Returns:

  • (nil)


3935
3936
3937
3938
3939
3940
3941
3942
3943
# File 'ext/pitchfork_http/pitchfork_http.c', line 3935

static VALUE HttpParser_add_parse(VALUE self, VALUE buffer)
{
  struct http_parser *hp = data_get(self);

  Check_Type(buffer, T_STRING);
  rb_str_buf_append(hp->buf, buffer);

  return HttpParser_parse(self);
}

#body_eof?Boolean

Detects if we’re done filtering the body or not. This can be used to detect when to stop calling HttpParser#filter_body.

Returns:

  • (Boolean)


3978
3979
3980
3981
3982
3983
3984
3985
3986
# File 'ext/pitchfork_http/pitchfork_http.c', line 3978

static VALUE HttpParser_body_eof(VALUE self)
{
  struct http_parser *hp = data_get(self);

  if (HP_FL_TEST(hp, CHUNKED))
    return chunked_eof(hp) ? Qtrue : Qfalse;

  return hp->len.content == 0 ? Qtrue : Qfalse;
}

#bufObject



4039
4040
4041
4042
# File 'ext/pitchfork_http/pitchfork_http.c', line 4039

static VALUE HttpParser_buf(VALUE self)
{
  return data_get(self)->buf;
}

#callObject

for rack.hijack, we respond to this method so no extra allocation of a proc object



104
105
106
107
# File 'lib/pitchfork/http_parser.rb', line 104

def call
  hijacked!
  env['rack.hijack_io'] = env['pitchfork.socket']
end

#check_client_connection(socket) ⇒ Object

:nodoc:



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/pitchfork/http_parser.rb', line 114

def check_client_connection(socket) # :nodoc:
  if TCPSocket === socket
    begin
      tcp_info = socket.getsockopt(Socket::IPPROTO_TCP, Socket::TCP_INFO)
    rescue IOError, SystemCallError
      return write_http_header(socket)
    end

    case tcp_info.data.unpack1("C")
    when 6, 7, 8, 9, 11 # TIME_WAIT, CLOSE, CLOSE_WAIT, LAST_ACK, CLOSING
      raise Errno::EPIPE, "client closed connection", EMPTY_ARRAY
    end
  else
    write_http_header(socket)
  end
end

#clearObject

Resets the parser to it’s initial state so that you can reuse it rather than making new ones.



3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
# File 'ext/pitchfork_http/pitchfork_http.c', line 3835

static VALUE HttpParser_clear(VALUE self)
{
  struct http_parser *hp = data_get(self);

  /* we can't safely reuse .buf and .env if hijacked */
  if (HP_FL_TEST(hp, HIJACK))
    return HttpParser_init(self);

  http_parser_init(hp);
  rb_hash_clear(hp->env);

  return self;
}

#content_lengthnil, Integer

Returns the number of bytes left to run through HttpParser#filter_body. This will initially be the value of the “Content-Length” HTTP header after header parsing is complete and will decrease in value as HttpParser#filter_body is called for each chunk. This should return zero for requests with no body.

This will return nil on “Transfer-Encoding: chunked” requests.

Returns:

  • (nil, Integer)


3877
3878
3879
3880
3881
3882
# File 'ext/pitchfork_http/pitchfork_http.c', line 3877

static VALUE HttpParser_content_length(VALUE self)
{
  struct http_parser *hp = data_get(self);

  return HP_FL_TEST(hp, CHUNKED) ? Qnil : OFFT2NUM(hp->len.content);
}

#envObject



4044
4045
4046
4047
# File 'ext/pitchfork_http/pitchfork_http.c', line 4044

static VALUE HttpParser_env(VALUE self)
{
  return data_get(self)->env;
}

#filter_body(dst, src) ⇒ Object

Takes a String of src, will modify data if dechunking is done. Returns nil if there is more data left to process. Returns src if body processing is complete. When returning src, it may modify src so the start of the string points to where the body ended so that trailer processing can begin.

Raises HttpParserError if there are dechunking errors. Basically this is a glorified memcpy(3) that copies src into buf while filtering it through the dechunker.



4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
# File 'ext/pitchfork_http/pitchfork_http.c', line 4072

static VALUE HttpParser_filter_body(VALUE self, VALUE dst, VALUE src)
{
  struct http_parser *hp = data_get(self);
  char *srcptr;
  long srclen;

  srcptr = RSTRING_PTR(src);
  srclen = RSTRING_LEN(src);

  StringValue(dst);

  if (HP_FL_TEST(hp, CHUNKED)) {
    if (!chunked_eof(hp)) {
      rb_str_modify(dst);
      rb_str_resize(dst, srclen); /* we can never copy more than srclen bytes */

      hp->s.dest_offset = 0;
      RB_OBJ_WRITE(self, &hp->cont, dst);
      RB_OBJ_WRITE(self, &hp->buf, src);
      http_parser_execute(self, hp, srcptr, srclen);
      if (hp->cs == http_parser_error)
        parser_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");

      assert(hp->s.dest_offset <= hp->offset &&
             "destination buffer overflow");
      advance_str(src, hp->offset);
      rb_str_set_len(dst, hp->s.dest_offset);

      if (RSTRING_LEN(dst) == 0 && chunked_eof(hp)) {
        assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
      } else {
        src = Qnil;
      }
    }
  } else {
    /* no need to enter the Ragel machine for unchunked transfers */
    assert(hp->len.content >= 0 && "negative Content-Length");
    if (hp->len.content > 0) {
      long nr = MIN(srclen, hp->len.content);

      rb_str_modify(dst);
      rb_str_resize(dst, nr);
      /*
       * using rb_str_replace() to avoid memcpy() doesn't help in
       * most cases because a GC-aware programmer will pass an explicit
       * buffer to env["rack.input"].read and reuse the buffer in a loop.
       * This causes copy-on-write behavior to be triggered anyways
       * when the +src+ buffer is modified (when reading off the socket).
       */
      RB_OBJ_WRITE(self, &hp->buf, src);
      memcpy(RSTRING_PTR(dst), srcptr, nr);
      hp->len.content -= nr;
      if (hp->len.content == 0) {
        HP_FL_SET(hp, REQEOF);
        hp->cs = http_parser_first_final;
      }
      advance_str(src, nr);
      src = Qnil;
    }
  }
  hp->offset = 0; /* for trailer parsing */
  return src;
}

#headers(env, buf) ⇒ Object



3956
3957
3958
3959
3960
3961
3962
3963
3964
# File 'ext/pitchfork_http/pitchfork_http.c', line 3956

static VALUE HttpParser_headers(VALUE self, VALUE env, VALUE buf)
{
  struct http_parser *hp = data_get(self);

  RB_OBJ_WRITE(self, &hp->buf, buf);
  RB_OBJ_WRITE(self, &hp->env, env);

  return HttpParser_parse(self);
}

#headers?Boolean

This should be used to detect if a request has headers (and if the response will have headers as well). HTTP/0.9 requests should return false, all subsequent HTTP versions will return true

Returns:

  • (Boolean)


4032
4033
4034
4035
4036
4037
# File 'ext/pitchfork_http/pitchfork_http.c', line 4032

static VALUE HttpParser_has_headers(VALUE self)
{
  struct http_parser *hp = data_get(self);

  return HP_FL_TEST(hp, HASHEADER) ? Qtrue : Qfalse;
}

#hijacked!Object



4049
4050
4051
4052
4053
4054
4055
4056
# File 'ext/pitchfork_http/pitchfork_http.c', line 4049

static VALUE HttpParser_hijacked_bang(VALUE self)
{
  struct http_parser *hp = data_get(self);

  HP_FL_SET(hp, HIJACK);

  return self;
}

#hijacked?Boolean

Returns:

  • (Boolean)


109
110
111
# File 'lib/pitchfork/http_parser.rb', line 109

def hijacked?
  env.include?('rack.hijack_io')
end

#keepalive?Boolean

This should be used to detect if a request can really handle keepalives and pipelining. Currently, the rules are:

  1. MUST be a GET or HEAD request

  2. MUST be HTTP/1.1 or HTTP/1.0 with “Connection: keep-alive”

  3. MUST NOT have “Connection: close” set

Returns:

  • (Boolean)


3999
4000
4001
4002
4003
4004
# File 'ext/pitchfork_http/pitchfork_http.c', line 3999

static VALUE HttpParser_keepalive(VALUE self)
{
  struct http_parser *hp = data_get(self);

  return HP_FL_ALL(hp, KEEPALIVE) ? Qtrue : Qfalse;
}

#next?Boolean

Exactly like HttpParser#keepalive?, except it will reset the internal parser state on next parse if it returns true.

Returns:

  • (Boolean)


4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
# File 'ext/pitchfork_http/pitchfork_http.c', line 4013

static VALUE HttpParser_next(VALUE self)
{
  struct http_parser *hp = data_get(self);

  if (HP_FL_ALL(hp, KEEPALIVE)) {
    HP_FL_SET(hp, TO_CLEAR);
    return Qtrue;
  }
  return Qfalse;
}

#parsenil

Takes a Hash and a String of data, parses the String of data filling in the Hash returning the Hash if parsing is finished, nil otherwise When returning the env Hash, it may modify data to point to where body processing should begin.

Raises HttpParserError if there are parsing errors.

Returns:

  • (nil)


3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
# File 'ext/pitchfork_http/pitchfork_http.c', line 3896

static VALUE HttpParser_parse(VALUE self)
{
  struct http_parser *hp = data_get(self);
  VALUE data = hp->buf;

  if (HP_FL_TEST(hp, TO_CLEAR))
    HttpParser_clear(self);

  http_parser_execute(self, hp, RSTRING_PTR(data), RSTRING_LEN(data));
  if (hp->offset > MAX_HEADER_LEN)
    parser_raise(e413, "HTTP header is too large");

  if (hp->cs == http_parser_first_final ||
      hp->cs == http_parser_en_ChunkedBody) {
    advance_str(data, hp->offset + 1);
    hp->offset = 0;
    if (HP_FL_TEST(hp, INTRAILER))
      HP_FL_SET(hp, REQEOF);

    return hp->env;
  }

  if (hp->cs == http_parser_error)
    parser_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");

  return Qnil;
}

#read(socket) ⇒ Object

Does the majority of the IO processing. It has been written in Ruby using about 8 different IO processing strategies.

It is currently carefully constructed to make sure that it gets the best possible performance for the common case: GET requests that are fully complete after a single read(2)

Anyone who thinks they can make it faster is more than welcome to take a crack at it.

returns an environment hash suitable for Rack if successful This does minimal exception trapping and it is up to the caller to handle any socket errors (e.g. user aborted upload).



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/pitchfork/http_parser.rb', line 61

def read(socket)
  e = env

  # From https://www.ietf.org/rfc/rfc3875:
  # "Script authors should be aware that the REMOTE_ADDR and
  #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
  #  may not identify the ultimate source of the request.  They
  #  identify the client for the immediate request to the server;
  #  that client may be a proxy, gateway, or other intermediary
  #  acting on behalf of the actual source client."
  address = socket.remote_address
  e['REMOTE_ADDR'] = if address.unix?
    "127.0.0.1"
  else
    address.ip_address
  end

  # short circuit the common case with small GET requests first
  socket.readpartial(16384, buf)
  if parse.nil?
    # Parser is not done, queue up more data to read and continue parsing
    # an Exception thrown from the parser will throw us out of the loop
    false until add_parse(socket.readpartial(16384))
  end

  check_client_connection(socket) if @@check_client_connection

  e['rack.input'] = 0 == content_length ?
                    NULL_IO : @@input_class.new(socket, self)

  # for Rack hijacking in Rack 1.5 and later
  e['pitchfork.socket'] = socket
  e['rack.hijack'] = self

  if PATH_INFO_REQUIRES_LEADING_SLASH && e['PATH_INFO'] == '*'
    e.delete('PATH_INFO')
  end

  e.merge!(DEFAULTS)
end

#response_start_sentObject

ignored by Ruby anyways



4148
4149
4150
4151
4152
4153
# File 'ext/pitchfork_http/pitchfork_http.c', line 4148

static VALUE HttpParser_rssget(VALUE self)
{
  struct http_parser *hp = data_get(self);

  return HP_FL_TEST(hp, RESSTART) ? Qtrue : Qfalse;
}

#response_start_sent=(boolean) ⇒ Object



4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
# File 'ext/pitchfork_http/pitchfork_http.c', line 4136

static VALUE HttpParser_rssset(VALUE self, VALUE boolean)
{
  struct http_parser *hp = data_get(self);

  if (RTEST(boolean))
    HP_FL_SET(hp, RESSTART);
  else
    HP_FL_UNSET(hp, RESSTART);

  return boolean; /* ignored by Ruby anyways */
}

#trailers(req, data) ⇒ nil

This is an alias for HttpParser#headers

Returns:

  • (nil)


3956
3957
3958
3959
3960
3961
3962
3963
3964
# File 'ext/pitchfork_http/pitchfork_http.c', line 3956

static VALUE HttpParser_headers(VALUE self, VALUE env, VALUE buf)
{
  struct http_parser *hp = data_get(self);

  RB_OBJ_WRITE(self, &hp->buf, buf);
  RB_OBJ_WRITE(self, &hp->env, env);

  return HttpParser_parse(self);
}

#write_http_header(socket) ⇒ Object

:nodoc:



136
137
138
139
140
141
# File 'lib/pitchfork/http_parser.rb', line 136

def write_http_header(socket) # :nodoc:
  if headers?
    self.response_start_sent = true
    HTTP_RESPONSE_START.each { |c| socket.write(c) }
  end
end