| // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // Derived from: |
| // mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp |
| // The license block is: |
| /* ***** BEGIN LICENSE BLOCK ***** |
| * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
| * |
| * The contents of this file are subject to the Mozilla Public License Version |
| * 1.1 (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * http://www.mozilla.org/MPL/ |
| * |
| * Software distributed under the License is distributed on an "AS IS" basis, |
| * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License |
| * for the specific language governing rights and limitations under the |
| * License. |
| * |
| * The Original Code is Mozilla. |
| * |
| * The Initial Developer of the Original Code is |
| * Netscape Communications. |
| * Portions created by the Initial Developer are Copyright (C) 2001 |
| * the Initial Developer. All Rights Reserved. |
| * |
| * Contributor(s): |
| * Darin Fisher <darin@netscape.com> (original author) |
| * |
| * Alternatively, the contents of this file may be used under the terms of |
| * either the GNU General Public License Version 2 or later (the "GPL"), or |
| * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
| * in which case the provisions of the GPL or the LGPL are applicable instead |
| * of those above. If you wish to allow use of your version of this file only |
| * under the terms of either the GPL or the LGPL, and not to allow others to |
| * use your version of this file under the terms of the MPL, indicate your |
| * decision by deleting the provisions above and replace them with the notice |
| * and other provisions required by the GPL or the LGPL. If you do not delete |
| * the provisions above, a recipient may use your version of this file under |
| * the terms of any one of the MPL, the GPL or the LGPL. |
| * |
| * ***** END LICENSE BLOCK ***** */ |
| |
| #include "net/http/http_chunked_decoder.h" |
| |
| #include <algorithm> |
| |
| #include "base/logging.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_piece.h" |
| #include "base/strings/string_util.h" |
| #include "net/base/net_errors.h" |
| |
| namespace net { |
| |
| // Absurdly long size to avoid imposing a constraint on chunked encoding |
| // extensions. |
| const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384; |
| |
| HttpChunkedDecoder::HttpChunkedDecoder() |
| : chunk_remaining_(0), |
| chunk_terminator_remaining_(false), |
| reached_last_chunk_(false), |
| reached_eof_(false), |
| bytes_after_eof_(0) { |
| } |
| |
| int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) { |
| int result = 0; |
| |
| while (buf_len > 0) { |
| if (chunk_remaining_ > 0) { |
| // Since |chunk_remaining_| is positive and |buf_len| an int, the minimum |
| // of the two must be an int. |
| int num = static_cast<int>( |
| std::min(chunk_remaining_, static_cast<int64_t>(buf_len))); |
| |
| buf_len -= num; |
| chunk_remaining_ -= num; |
| |
| result += num; |
| buf += num; |
| |
| // After each chunk's data there should be a CRLF. |
| if (chunk_remaining_ == 0) |
| chunk_terminator_remaining_ = true; |
| continue; |
| } else if (reached_eof_) { |
| bytes_after_eof_ += buf_len; |
| break; // Done! |
| } |
| |
| int bytes_consumed = ScanForChunkRemaining(buf, buf_len); |
| if (bytes_consumed < 0) |
| return bytes_consumed; // Error |
| |
| buf_len -= bytes_consumed; |
| if (buf_len > 0) |
| memmove(buf, buf + bytes_consumed, buf_len); |
| } |
| |
| return result; |
| } |
| |
| int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) { |
| DCHECK_EQ(0, chunk_remaining_); |
| DCHECK_GT(buf_len, 0); |
| |
| int bytes_consumed = 0; |
| |
| size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n'); |
| if (index_of_lf != base::StringPiece::npos) { |
| buf_len = static_cast<int>(index_of_lf); |
| if (buf_len && buf[buf_len - 1] == '\r') // Eliminate a preceding CR. |
| buf_len--; |
| bytes_consumed = static_cast<int>(index_of_lf) + 1; |
| |
| // Make buf point to the full line buffer to parse. |
| if (!line_buf_.empty()) { |
| line_buf_.append(buf, buf_len); |
| buf = line_buf_.data(); |
| buf_len = static_cast<int>(line_buf_.size()); |
| } |
| |
| if (reached_last_chunk_) { |
| if (buf_len > 0) |
| DVLOG(1) << "ignoring http trailer"; |
| else |
| reached_eof_ = true; |
| } else if (chunk_terminator_remaining_) { |
| if (buf_len > 0) { |
| DLOG(ERROR) << "chunk data not terminated properly"; |
| return ERR_INVALID_CHUNKED_ENCODING; |
| } |
| chunk_terminator_remaining_ = false; |
| } else if (buf_len > 0) { |
| // Ignore any chunk-extensions. |
| size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';'); |
| if (index_of_semicolon != base::StringPiece::npos) |
| buf_len = static_cast<int>(index_of_semicolon); |
| |
| if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) { |
| DLOG(ERROR) << "Failed parsing HEX from: " << |
| std::string(buf, buf_len); |
| return ERR_INVALID_CHUNKED_ENCODING; |
| } |
| |
| if (chunk_remaining_ == 0) |
| reached_last_chunk_ = true; |
| } else { |
| DLOG(ERROR) << "missing chunk-size"; |
| return ERR_INVALID_CHUNKED_ENCODING; |
| } |
| line_buf_.clear(); |
| } else { |
| // Save the partial line; wait for more data. |
| bytes_consumed = buf_len; |
| |
| // Ignore a trailing CR |
| if (buf[buf_len - 1] == '\r') |
| buf_len--; |
| |
| if (line_buf_.length() + buf_len > kMaxLineBufLen) { |
| DLOG(ERROR) << "Chunked line length too long"; |
| return ERR_INVALID_CHUNKED_ENCODING; |
| } |
| |
| line_buf_.append(buf, buf_len); |
| } |
| return bytes_consumed; |
| } |
| |
| |
| // While the HTTP 1.1 specification defines chunk-size as 1*HEX |
| // some sites rely on more lenient parsing. |
| // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces |
| // (0x20) to be 7 characters long, such as "819b ". |
| // |
| // A comparison of browsers running on WindowsXP shows that |
| // they will parse the following inputs (egrep syntax): |
| // |
| // Let \X be the character class for a hex digit: [0-9a-fA-F] |
| // |
| // RFC 7230: ^\X+$ |
| // IE7: ^\X+[^\X]*$ |
| // Safari 3.1: ^[\t\r ]*\X+[\t ]*$ |
| // Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$ |
| // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$ |
| // |
| // Our strategy is to be as strict as possible, while not breaking |
| // known sites. |
| // |
| // Us: ^\X+[ ]*$ |
| bool HttpChunkedDecoder::ParseChunkSize(const char* start, |
| int len, |
| int64_t* out) { |
| DCHECK_GE(len, 0); |
| |
| // Strip trailing spaces |
| while (len > 0 && start[len - 1] == ' ') |
| len--; |
| |
| // Be more restrictive than HexStringToInt64; |
| // don't allow inputs with leading "-", "+", "0x", "0X" |
| base::StringPiece chunk_size(start, len); |
| if (chunk_size.find_first_not_of("0123456789abcdefABCDEF") |
| != base::StringPiece::npos) { |
| return false; |
| } |
| |
| int64_t parsed_number; |
| bool ok = base::HexStringToInt64(chunk_size, &parsed_number); |
| if (ok && parsed_number >= 0) { |
| *out = parsed_number; |
| return true; |
| } |
| return false; |
| } |
| |
| } // namespace net |