| // Copyright 2014 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // A streaming validator for UTF-8. Validation is based on the definition in |
| // RFC-3629. In particular, it does not reject the invalid characters rejected |
| // by base::IsStringUTF8(). |
| // |
| // The implementation detects errors on the first possible byte. |
| |
| #ifndef BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ |
| #define BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ |
| |
| #include <string> |
| |
| #include "base/i18n/base_i18n_export.h" |
| #include "base/macros.h" |
| #include "starboard/types.h" |
| |
| namespace base { |
| |
| class BASE_I18N_EXPORT StreamingUtf8Validator { |
| public: |
| // The validator exposes 3 states. It starts in state VALID_ENDPOINT. As it |
| // processes characters it alternates between VALID_ENDPOINT and |
| // VALID_MIDPOINT. If it encounters an invalid byte or UTF-8 sequence the |
| // state changes permanently to INVALID. |
| enum State { |
| VALID_ENDPOINT, |
| VALID_MIDPOINT, |
| INVALID |
| }; |
| |
| StreamingUtf8Validator() : state_(0u) {} |
| // Trivial destructor intentionally omitted. |
| |
| // Validate |size| bytes starting at |data|. If the concatenation of all calls |
| // to AddBytes() since this object was constructed or reset is a valid UTF-8 |
| // string, returns VALID_ENDPOINT. If it could be the prefix of a valid UTF-8 |
| // string, returns VALID_MIDPOINT. If an invalid byte or UTF-8 sequence was |
| // present, returns INVALID. |
| State AddBytes(const char* data, size_t size); |
| |
| // Return the object to a freshly-constructed state so that it can be re-used. |
| void Reset(); |
| |
| // Validate a complete string using the same criteria. Returns true if the |
| // string only contains complete, valid UTF-8 codepoints. |
| static bool Validate(const std::string& string); |
| |
| private: |
| // The current state of the validator. Value 0 is the initial/valid state. |
| // The state is stored as an offset into |kUtf8ValidatorTables|. The special |
| // state |kUtf8InvalidState| is invalid. |
| uint8_t state_; |
| |
| // This type could be made copyable but there is currently no use-case for |
| // it. |
| DISALLOW_COPY_AND_ASSIGN(StreamingUtf8Validator); |
| }; |
| |
| } // namespace base |
| |
| #endif // BASE_I18N_STREAMING_UTF8_VALIDATOR_H_ |