Replay Parsing
Set of tools for parsing replay data into a more amenable form for machine learning. These are mainly used via their Python API.
- Author
Bryce Ferenczi
- Version
0.1
- Date
2024-05-27
- Copyright
Copyright (c) 2024
-
namespace cvt
Functions
-
template<typename T, typename UnitT>
auto transformUnits(const std::vector<UnitT> &units) noexcept -> py::array_t<T> Converts vector of unit structures to a {n_unit, feature} array.
- Template Parameters:
T – output data type of the array
UnitT – type of unit data
- Parameters:
units – Units to convert to array
- Returns:
2D feature array of dimension {n_unit, feature}
-
template<typename T>
auto transformUnitsByAlliance(const std::vector<Unit> &units) noexcept -> py::dict Unit transformation and return grouped by alliance {self, ally, enemy, neutral}.
- Template Parameters:
T – output datatype of feature array
- Parameters:
units – Vector of units to convert to feature arrays
- Returns:
Dictionary mapping from alliance to feature array
-
template<typename T, std::output_iterator<T> It>
auto expandPlayerRelative(const Image<std::uint8_t> &img, It out) noexcept -> It Converts an image of player relative enums to one-hot encoding, i.e. a mask of each class type.
- Template Parameters:
T – value type of the image returned
It – iterator type to write result to
- Parameters:
img – enum image of player_relative data, assumes SC2 data [1-4] (cvt::Alliance)
out – output iterator to write result to, must be preallocated contiguous data to write image to
- Returns:
output iterator one past the output image data
-
template<typename T, std::output_iterator<T> It>
auto unpackBoolImage(const Image<bool> &image, It out) -> It Unpack bool image to output iterator.
- Template Parameters:
T – value type to unpack to
- Parameters:
image – input image
out – span to write unpacked data into
- Returns:
vector of value type of bool image
-
template<typename T>
auto unpackBoolImage(const Image<bool> &image) noexcept -> std::vector<T> Unpack bool image to flattened std::vector.
- Template Parameters:
T – value type to unpack to
- Parameters:
image – input image
- Returns:
vector of value type of bool image
-
template<typename T, IsSoAType StepDataType>
auto createScalarFeatures(const StepDataType &data, std::size_t timeIdx) -> py::array_t<T> Convert game state of scalars into a feature vector. TODO: Have a dictionary of lambda functions that can normalize each of these features?
- Template Parameters:
T – feature vector arithmetic type
- Parameters:
data – replay data
timeIdx – time index to sample from replay
- Returns:
Feature vector of data
-
template<typename T, typename ReplayDataType>
auto createMinimapFeatures(const ReplayDataType &replay, std::size_t timeIdx, MinimapFeatureFlags includedLayers, bool expandPlayerRel = true) -> py::array_t<T> Create Stacked Features Image from Minimap Data (C, H, W) in the order HeightMap, Visibility, Creep, Alerts, Buildable, Pathable, PlayerRelative.
- Template Parameters:
T – Underlying type of returned image
- Parameters:
data – Replay data
timeIdx – Time index to sample from
expandPlayerRel – Expand the Player Relative to four 1-hot channels (see cvt::Alliance)
- Returns:
Returns (C,H,W) Image of Type T
-
template<typename B>
struct Caster - #include <replay_parsing.hpp>
Helper Struct to static_cast one type to an arithmetic type.
- Template Parameters:
B – type to cast to
-
struct MinimapFeatureFlags
- #include <replay_parsing.hpp>
Class that maps string description of minimap feature layers to a bit flag.
Public Functions
-
inline constexpr auto getOffset(std::string_view key) const -> std::size_t
Find the index of the layer in the bitset.
- Parameters:
key – Name of the layer to search
- Returns:
Index in the layer array
-
inline void set() noexcept
Set all bits.
-
inline void set(std::string_view key, bool value = true)
Set minimap layer to value.
- Parameters:
key – Name of minimap layer
value – Value to set (default: True)
-
inline auto test(std::string_view key) const -> bool
Get the current value of the minimap layer.
- Parameters:
key – Name of the minimap layer to test
- Returns:
Current activation status
-
inline auto count() const noexcept -> std::size_t
Number of active minimap layers.
- Returns:
Number of active minimap layers
-
inline void reset() noexcept
Set all minimap layers to false.
Public Members
Public Static Attributes
-
static const std::array keys = {"heightMap", "visibility", "creep", "player_relative", "alerts", "buildable", "pathable"}
Minimap feature layers.
-
inline constexpr auto getOffset(std::string_view key) const -> std::size_t
-
template<typename ReplayDataType>
class ReplayParser - #include <replay_parsing.hpp>
Convenience wrapper around ReplayDataSOA to return map of features at each timestep.
- Template Parameters:
ReplayDataType – Type of replay data being parsed
Public Functions
-
inline explicit ReplayParser(const std::filesystem::path &dataPath) noexcept
-
inline void parseReplay(ReplayDataType replayData)
Ingest replay data for sampling.
- Parameters:
replayData – Replay data to parse
-
inline void setPlayerMinimapExpansion(bool flag) noexcept
Set whether to expand player_relative to one hot encoding or keep as enum values.
- Parameters:
flag – true will expand player_relaitive
-
inline auto getPlayerMinimapExpansion() const noexcept -> bool
Get if the player_relative expansion flag has been set.
- Returns:
if true then player_relative is expanded from enum to one-hot
-
inline void setMinimapFeatures(const std::vector<std::string> &features)
Set the minimap features to stack and emit from the parser, if an empty list is given then all flags are simply cleared/reset, if a single special key “all” is given then all flags are set.
- Parameters:
features – list of features to set, must be a member of cvt::MinimapFeatureFlags::keys or single key “all”
-
inline auto getMinimapFeatures() const -> py::list
Get the names of the minimap features of the currently set bits. If expandPlayerRelative is set then this will replace […, player_relative, …] with […, self, ally, neutral, enemy, …].
- Returns:
py::list of strings of the currently enabled features
-
inline auto sampleAll(std::size_t index, bool unit_alliance = false) const -> py::dict
Get a python dictionary containing features from that timestep.
- Parameters:
index – index of replay sample
unit_alliance – Whether to group units by alliance in a dictionary (default: false)
- Returns:
Dictionary containing feature data at that point in time
-
inline auto sampleUnits(std::size_t index) const
Sample all unit data from replay at index.
- Parameters:
index – Index from replay to sample from
- Returns:
Unit data transformed into an array [N,D] where N is number of units, D is dimensionality of the feature vector of each unit.
-
inline auto sampleUnitsGroupAlliance(std::size_t index) const
Sample unit data from replay at index, grouped into dictionary of alliances.
- Parameters:
index – Index from replay to sample from
- Returns:
Dictionary of unit data transformed into a [N,D] array grouped by alliance
-
inline auto sampleNeutralUnits(std::size_t index) const
Sample neutral unit data from replay at index.
- Parameters:
index – Index from replay to sample from
- Returns:
Neutral Unit data transformed into a [N,D] array
-
inline auto sampleActions(std::size_t index) const
Sample action from replay at index.
- Parameters:
index – Index from replay to sample from
- Returns:
List of actions made by the player in the native struct form
-
inline auto sampleMinimaps(std::size_t index) const
Sample minimap data from replay at index.
- Parameters:
index – Index from replay to sample from
- Returns:
Minimap data transformed into an tensor [C,H,W] of the layers specified by minimapFeatureFlags
-
inline auto sampleScalars(std::size_t index) const
Sample scalar data from replay at index.
- Parameters:
index – Index from replay to sample from
- Returns:
Scalar data transformed into a vector
-
inline auto size() const noexcept -> std::size_t
Number of timesteps in the replay.
- Returns:
Number of timesteps in the replay
-
inline auto empty() const noexcept -> bool
Check if the replay/parser is empty.
- Returns:
True if empty
-
inline auto data() const noexcept -> const auto&
Read-only reference to the currently loaded data.
- Returns:
Read-only reference to the currently loaded replay data
-
inline auto info() const noexcept -> const ReplayInfo&
Read-only reference to the currently loaded replay header.
- Returns:
Read-only reference to the currently loaded replay header
Private Members
-
UpgradeState upgrade_
Upgrade timing calculator.
-
ReplayDataType replayData_ = {}
Replay data.
-
MinimapFeatureFlags minimapFeatureFlags_ = {}
Minimap feature flags requested.
-
bool expandPlayerRelative_ = {true}
Flag to expand player relative from enum to one-hot in minimap.
-
class UpgradeState
- #include <replay_parsing.hpp>
UpgradeState class loads information stored in yaml and determines the time points of the game when an upgrade is active for the player. This class can be queried with getState with a gameStep to return a one-hot encoding of the currently active upgrades.
Public Functions
-
explicit UpgradeState(std::filesystem::path dataFile)
Create upgrade timing instance using information based on dataFile.
- Parameters:
dataFile – Path to the yaml containing upgrade action data for each game version and race.
-
void setVersion(std::string_view version)
Set the version of the game.
- Parameters:
version – string description of the game version i.e. 4.9.1.12345
-
void calculateTimes(const std::vector<std::vector<Action>> &playerActions, const std::vector<unsigned int> &gameTime)
Calculate and set the expected reserach completion times based on the actions and their timepoint.
- Parameters:
playerActions – Vector of actions the player has performed at each timepoint
gameTime – The timepoint in the game each action is associated with
-
template<typename T>
inline auto getState(std::size_t timeIdx) const -> py::array_t<T> Get a one-hot encoding vector of the state of upgrades (0 false, 1 true)
- Template Parameters:
T – datatype of the one-hot encoding
- Parameters:
timeIdx – The timepoint to calculate the state of upgrades
- Returns:
one-hot encoding of currently active upgrades as a py::array
Private Functions
-
auto getValidIds() const -> const std::set<int>&
Get the set of actions ids that correspond to upgrade actions based on currentRace_ and gameVersion_.
- Returns:
Set of upgrade actions
-
auto getValidRemap() const -> const std::unordered_map<int, std::array<int, 3>>&
Get remapping of generic tiered “upgrade” action to the distinct tiers.
- Returns:
Mapping from generic tiered action id to actual tier ids.
-
void loadInfo()
Fill out the id2delay_ mapping with the information from dataFile_ based on the currentRace_ and gameVersion_.
Private Members
-
std::filesystem::path dataFile_
Path to yaml file that contains upgrade action data from each game version and faction.
-
std::unordered_map<std::string, std::unordered_map<int, int>> gameVersion2id2delay_ = {}
Mapping from action/upgrade id to the time it takes to research in game steps.
-
std::vector<int32_t> upgradeTimes_ = {}
The times when the upgrade is active for the player.
-
std::string gameVersion_ = {}
The current version of the game.
-
explicit UpgradeState(std::filesystem::path dataFile)
-
template<typename T, typename UnitT>