blob: 8405303fd9d95bd5fb8fe3ef0a0bcdb70102cd80 [file] [log] [blame]
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/utf_offset_string_conversions.h"
#include <algorithm>
#include "base/memory/scoped_ptr.h"
#include "base/string_piece.h"
#include "base/utf_string_conversion_utils.h"
using base::PrepareForUTF16Or32Output;
using base::PrepareForUTF8Output;
using base::ReadUnicodeCharacter;
using base::WriteUnicodeCharacter;
// Converts the given source Unicode character type to the given destination
// Unicode character type as a STL string. The given input buffer and size
// determine the source, and the given output STL string will be replaced by
// the result.
template<typename SrcChar, typename DestStdString>
bool ConvertUnicode(const SrcChar* src,
size_t src_len,
DestStdString* output,
std::vector<size_t>* offsets_for_adjustment) {
if (offsets_for_adjustment) {
std::for_each(offsets_for_adjustment->begin(),
offsets_for_adjustment->end(),
LimitOffset<DestStdString>(src_len));
}
// ICU requires 32-bit numbers.
bool success = true;
OffsetAdjuster offset_adjuster(offsets_for_adjustment);
int32 src_len32 = static_cast<int32>(src_len);
for (int32 i = 0; i < src_len32; i++) {
uint32 code_point;
size_t original_i = i;
size_t chars_written = 0;
if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
chars_written = WriteUnicodeCharacter(code_point, output);
} else {
chars_written = WriteUnicodeCharacter(0xFFFD, output);
success = false;
}
if (offsets_for_adjustment) {
// NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
// character read, not after it (so that incrementing it in the loop
// increment will place it at the right location), so we need to account
// for that in determining the amount that was read.
offset_adjuster.Add(OffsetAdjuster::Adjustment(original_i,
i - original_i + 1, chars_written));
}
}
return success;
}
bool UTF8ToUTF16AndAdjustOffset(const char* src,
size_t src_len,
string16* output,
size_t* offset_for_adjustment) {
std::vector<size_t> offsets;
if (offset_for_adjustment)
offsets.push_back(*offset_for_adjustment);
PrepareForUTF16Or32Output(src, src_len, output);
bool ret = ConvertUnicode(src, src_len, output, &offsets);
if (offset_for_adjustment)
*offset_for_adjustment = offsets[0];
return ret;
}
bool UTF8ToUTF16AndAdjustOffsets(const char* src,
size_t src_len,
string16* output,
std::vector<size_t>* offsets_for_adjustment) {
PrepareForUTF16Or32Output(src, src_len, output);
return ConvertUnicode(src, src_len, output, offsets_for_adjustment);
}
string16 UTF8ToUTF16AndAdjustOffset(const base::StringPiece& utf8,
size_t* offset_for_adjustment) {
std::vector<size_t> offsets;
if (offset_for_adjustment)
offsets.push_back(*offset_for_adjustment);
string16 result;
UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
&offsets);
if (offset_for_adjustment)
*offset_for_adjustment = offsets[0];
return result;
}
string16 UTF8ToUTF16AndAdjustOffsets(
const base::StringPiece& utf8,
std::vector<size_t>* offsets_for_adjustment) {
string16 result;
UTF8ToUTF16AndAdjustOffsets(utf8.data(), utf8.length(), &result,
offsets_for_adjustment);
return result;
}
std::string UTF16ToUTF8AndAdjustOffset(
const base::StringPiece16& utf16,
size_t* offset_for_adjustment) {
std::vector<size_t> offsets;
if (offset_for_adjustment)
offsets.push_back(*offset_for_adjustment);
std::string result = UTF16ToUTF8AndAdjustOffsets(utf16, &offsets);
if (offset_for_adjustment)
*offset_for_adjustment = offsets[0];
return result;
}
std::string UTF16ToUTF8AndAdjustOffsets(
const base::StringPiece16& utf16,
std::vector<size_t>* offsets_for_adjustment) {
std::string result;
PrepareForUTF8Output(utf16.data(), utf16.length(), &result);
ConvertUnicode(utf16.data(), utf16.length(), &result, offsets_for_adjustment);
return result;
}
OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,
size_t original_length,
size_t output_length)
: original_offset(original_offset),
original_length(original_length),
output_length(output_length) {
}
OffsetAdjuster::OffsetAdjuster(std::vector<size_t>* offsets_for_adjustment)
: offsets_for_adjustment_(offsets_for_adjustment) {
}
OffsetAdjuster::~OffsetAdjuster() {
if (!offsets_for_adjustment_ || adjustments_.empty())
return;
for (std::vector<size_t>::iterator i(offsets_for_adjustment_->begin());
i != offsets_for_adjustment_->end(); ++i)
AdjustOffset(i);
}
void OffsetAdjuster::Add(const Adjustment& adjustment) {
adjustments_.push_back(adjustment);
}
void OffsetAdjuster::AdjustOffset(std::vector<size_t>::iterator offset) {
if (*offset == string16::npos)
return;
size_t adjustment = 0;
for (std::vector<Adjustment>::const_iterator i = adjustments_.begin();
i != adjustments_.end(); ++i) {
if (*offset == i->original_offset && i->output_length == 0) {
*offset = string16::npos;
return;
}
if (*offset <= i->original_offset)
break;
if (*offset < (i->original_offset + i->original_length)) {
*offset = string16::npos;
return;
}
adjustment += (i->original_length - i->output_length);
}
*offset -= adjustment;
}