| # Copyright 2011, Google Inc. |
| # All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are |
| # met: |
| # |
| # * Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # * Redistributions in binary form must reproduce the above |
| # copyright notice, this list of conditions and the following disclaimer |
| # in the documentation and/or other materials provided with the |
| # distribution. |
| # * Neither the name of Google Inc. nor the names of its |
| # contributors may be used to endorse or promote products derived from |
| # this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| |
| """Utilities for parsing and formatting headers that follow the grammar defined |
| in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt. |
| """ |
| |
| |
| import urlparse |
| |
| |
| _SEPARATORS = '()<>@,;:\\"/[]?={} \t' |
| |
| |
| def _is_char(c): |
| """Returns true iff c is in CHAR as specified in HTTP RFC.""" |
| |
| return ord(c) <= 127 |
| |
| |
| def _is_ctl(c): |
| """Returns true iff c is in CTL as specified in HTTP RFC.""" |
| |
| return ord(c) <= 31 or ord(c) == 127 |
| |
| |
| class ParsingState(object): |
| |
| def __init__(self, data): |
| self.data = data |
| self.head = 0 |
| |
| |
| def peek(state, pos=0): |
| """Peeks the character at pos from the head of data.""" |
| |
| if state.head + pos >= len(state.data): |
| return None |
| |
| return state.data[state.head + pos] |
| |
| |
| def consume(state, amount=1): |
| """Consumes specified amount of bytes from the head and returns the |
| consumed bytes. If there's not enough bytes to consume, returns None. |
| """ |
| |
| if state.head + amount > len(state.data): |
| return None |
| |
| result = state.data[state.head:state.head + amount] |
| state.head = state.head + amount |
| return result |
| |
| |
| def consume_string(state, expected): |
| """Given a parsing state and a expected string, consumes the string from |
| the head. Returns True if consumed successfully. Otherwise, returns |
| False. |
| """ |
| |
| pos = 0 |
| |
| for c in expected: |
| if c != peek(state, pos): |
| return False |
| pos += 1 |
| |
| consume(state, pos) |
| return True |
| |
| |
| def consume_lws(state): |
| """Consumes a LWS from the head. Returns True if any LWS is consumed. |
| Otherwise, returns False. |
| |
| LWS = [CRLF] 1*( SP | HT ) |
| """ |
| |
| original_head = state.head |
| |
| consume_string(state, '\r\n') |
| |
| pos = 0 |
| |
| while True: |
| c = peek(state, pos) |
| if c == ' ' or c == '\t': |
| pos += 1 |
| else: |
| if pos == 0: |
| state.head = original_head |
| return False |
| else: |
| consume(state, pos) |
| return True |
| |
| |
| def consume_lwses(state): |
| """Consumes *LWS from the head.""" |
| |
| while consume_lws(state): |
| pass |
| |
| |
| def consume_token(state): |
| """Consumes a token from the head. Returns the token or None if no token |
| was found. |
| """ |
| |
| pos = 0 |
| |
| while True: |
| c = peek(state, pos) |
| if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): |
| if pos == 0: |
| return None |
| |
| return consume(state, pos) |
| else: |
| pos += 1 |
| |
| |
| def consume_token_or_quoted_string(state): |
| """Consumes a token or a quoted-string, and returns the token or unquoted |
| string. If no token or quoted-string was found, returns None. |
| """ |
| |
| original_head = state.head |
| |
| if not consume_string(state, '"'): |
| return consume_token(state) |
| |
| result = [] |
| |
| expect_quoted_pair = False |
| |
| while True: |
| if not expect_quoted_pair and consume_lws(state): |
| result.append(' ') |
| continue |
| |
| c = consume(state) |
| if c is None: |
| # quoted-string is not enclosed with double quotation |
| state.head = original_head |
| return None |
| elif expect_quoted_pair: |
| expect_quoted_pair = False |
| if _is_char(c): |
| result.append(c) |
| else: |
| # Non CHAR character found in quoted-pair |
| state.head = original_head |
| return None |
| elif c == '\\': |
| expect_quoted_pair = True |
| elif c == '"': |
| return ''.join(result) |
| elif _is_ctl(c): |
| # Invalid character %r found in qdtext |
| state.head = original_head |
| return None |
| else: |
| result.append(c) |
| |
| |
| def quote_if_necessary(s): |
| """Quotes arbitrary string into quoted-string.""" |
| |
| quote = False |
| if s == '': |
| return '""' |
| |
| result = [] |
| for c in s: |
| if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): |
| quote = True |
| |
| if c == '"' or _is_ctl(c): |
| result.append('\\' + c) |
| else: |
| result.append(c) |
| |
| if quote: |
| return '"' + ''.join(result) + '"' |
| else: |
| return ''.join(result) |
| |
| |
| def parse_uri(uri): |
| """Parse absolute URI then return host, port and resource.""" |
| |
| parsed = urlparse.urlsplit(uri) |
| if parsed.scheme != 'wss' and parsed.scheme != 'ws': |
| # |uri| must be a relative URI. |
| # TODO(toyoshim): Should validate |uri|. |
| return None, None, uri |
| |
| if parsed.hostname is None: |
| return None, None, None |
| |
| port = None |
| try: |
| port = parsed.port |
| except ValueError, e: |
| # port property cause ValueError on invalid null port description like |
| # 'ws://host:/path'. |
| return None, None, None |
| |
| if port is None: |
| if parsed.scheme == 'ws': |
| port = 80 |
| else: |
| port = 443 |
| |
| path = parsed.path |
| if not path: |
| path += '/' |
| if parsed.query: |
| path += '?' + parsed.query |
| if parsed.fragment: |
| path += '#' + parsed.fragment |
| |
| return parsed.hostname, port, path |
| |
| |
| try: |
| urlparse.uses_netloc.index('ws') |
| except ValueError, e: |
| # urlparse in Python2.5.1 doesn't have 'ws' and 'wss' entries. |
| urlparse.uses_netloc.append('ws') |
| urlparse.uses_netloc.append('wss') |
| |
| |
| # vi:sts=4 sw=4 et |