blob: e249ddeb8d3e6d511c5662d2298f604c6b0cb68a [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef MEDIA_LEARNING_IMPL_ONE_HOT_H_
#define MEDIA_LEARNING_IMPL_ONE_HOT_H_
#include <map>
#include <memory>
#include <vector>
#include "base/component_export.h"
#include "base/macros.h"
#include "media/learning/common/labelled_example.h"
#include "media/learning/common/learning_task.h"
#include "media/learning/common/value.h"
#include "media/learning/impl/model.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
namespace media {
namespace learning {
// Converter class that memorizes a mapping from nominal features to numeric
// features with a one-hot encoding.
class COMPONENT_EXPORT(LEARNING_IMPL) OneHotConverter {
public:
// Build a one-hot converter for all nominal features |task|, using the values
// found in |training_data|.
OneHotConverter(const LearningTask& task, const TrainingData& training_data);
OneHotConverter(const OneHotConverter&) = delete;
OneHotConverter& operator=(const OneHotConverter&) = delete;
~OneHotConverter();
// Return the LearningTask that has only nominal features.
const LearningTask& converted_task() const { return converted_task_; }
// Convert |training_data| to be a one-hot model.
TrainingData Convert(const TrainingData& training_data) const;
// Convert |feature_vector| to match the one-hot model.
FeatureVector Convert(const FeatureVector& feature_vector) const;
private:
// Build a converter for original feature |index|.
void ProcessOneFeature(
size_t index,
const LearningTask::ValueDescription& original_description,
const TrainingData& training_data);
// Learning task with the feature descriptions adjusted for the one-hot model.
LearningTask converted_task_;
// [value] == vector index that should be 1 in the one-hot vector.
using ValueVectorIndexMap = std::map<Value, size_t>;
// [original task feature index] = optional converter for it. If the feature
// was kNumeric to begin with, then there will be no converter.
std::vector<absl::optional<ValueVectorIndexMap>> converters_;
};
// Model that uses |Converter| to convert instances before sending them to the
// underlying model.
class COMPONENT_EXPORT(LEARNING_IMPL) ConvertingModel : public Model {
public:
ConvertingModel(std::unique_ptr<OneHotConverter> converter,
std::unique_ptr<Model> model);
ConvertingModel(const ConvertingModel&) = delete;
ConvertingModel& operator=(const ConvertingModel&) = delete;
~ConvertingModel() override;
// Model
TargetHistogram PredictDistribution(const FeatureVector& instance) override;
private:
std::unique_ptr<OneHotConverter> converter_;
std::unique_ptr<Model> model_;
};
} // namespace learning
} // namespace media
#endif // MEDIA_LEARNING_IMPL_ONE_HOT_H_