Codechange: Change internal format of encoded strings to improve robustness and allow expansion. (#13499)

This commit is contained in:
Peter Nelson 2025-02-09 12:45:50 +00:00 committed by GitHub
parent 1193852007
commit dccc6185b9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 241 additions and 94 deletions

View File

@ -915,17 +915,69 @@ static inline size_t SlCalcStdStringLen(const void *ptr)
* just bail out and do not continue trying to replace the tokens.
* @param str the string to fix.
*/
static void FixSCCEncoded(std::string &str)
void FixSCCEncoded(std::string &str, bool fix_code)
{
for (size_t i = 0; i < str.size(); /* nothing. */) {
size_t len = Utf8EncodedCharLen(str[i]);
if (len == 0 || i + len > str.size()) break;
if (str.empty()) return;
/* We need to convert from old escape-style encoding to record separator encoding.
* Initial `<SCC_ENCODED><STRINGID>` stays the same.
*
* `:<SCC_ENCODED><STRINGID>` becomes `<RS><SCC_ENCODED><STRINGID>`
* `:<HEX>` becomes `<RS><SCC_ENCODED_NUMERIC><HEX>`
* `:"<STRING>"` becomes `<RS><SCC_ENCODED_STRING><STRING>`
*/
std::string result;
auto output = std::back_inserter(result);
bool is_encoded = false; // Set if we determine by the presence of SCC_ENCODED that the string is an encoded string.
bool in_string = false; // Set if we in a string, between double-quotes.
bool need_type = true; // Set if a parameter type needs to be emitted.
for (auto it = std::begin(str); it != std::end(str); /* nothing */) {
size_t len = Utf8EncodedCharLen(*it);
if (len == 0 || it + len > std::end(str)) break;
char32_t c;
Utf8Decode(&c, &str[i]);
if (c == 0xE028 || c == 0xE02A) Utf8Encode(&str[i], SCC_ENCODED);
i += len;
Utf8Decode(&c, &*it);
if (c == SCC_ENCODED || (fix_code && (c == 0xE028 || c == 0xE02A))) {
Utf8Encode(output, SCC_ENCODED);
need_type = false;
is_encoded = true;
it += len;
continue;
}
/* If the first character is not SCC_ENCODED then we don't have to do any conversion. */
if (!is_encoded) return;
if (c == '"') {
in_string = !in_string;
if (in_string && need_type) {
/* Started a new string parameter. */
Utf8Encode(output, SCC_ENCODED_STRING);
need_type = false;
}
it += len;
continue;
}
if (!in_string && c == ':') {
*output = SCC_RECORD_SEPARATOR;
need_type = true;
it += len;
continue;
}
if (need_type) {
/* Started a new numeric parameter. */
Utf8Encode(output, SCC_ENCODED_NUMERIC);
need_type = false;
}
Utf8Encode(output, c);
it += len;
}
str = result;
}
/**
@ -970,7 +1022,7 @@ static void SlStdString(void *ptr, VarType conv)
StringValidationSettings settings = SVS_REPLACE_WITH_QUESTION_MARK;
if ((conv & SLF_ALLOW_CONTROL) != 0) {
settings = settings | SVS_ALLOW_CONTROL_CODE;
if (IsSavegameVersionBefore(SLV_169)) FixSCCEncoded(*str);
if (IsSavegameVersionBefore(SLV_ENCODED_STRING_FORMAT)) FixSCCEncoded(*str, IsSavegameVersionBefore(SLV_169));
}
if ((conv & SLF_ALLOW_NEWLINE) != 0) {
settings = settings | SVS_ALLOW_NEWLINE;

View File

@ -397,6 +397,8 @@ enum SaveLoadVersion : uint16_t {
SLV_INCREASE_HOUSE_LIMIT, ///< 348 PR#12288 Increase house limit to 4096.
SLV_COMPANY_INAUGURATED_PERIOD_V2, ///< 349 PR#13448 Fix savegame storage for company inaugurated year in wallclock mode.
SLV_ENCODED_STRING_FORMAT, ///< 350 PR#13499 Encoded String format changed.
SL_MAX_VERSION, ///< Highest possible saveload version
};

View File

@ -197,16 +197,26 @@ void ScriptText::ParamCheck::Encode(std::back_insert_iterator<std::string> &outp
struct visitor {
std::back_insert_iterator<std::string> &output;
void operator()(const std::string &value) { fmt::format_to(this->output, ":\"{}\"", value); }
void operator()(const SQInteger &value) { fmt::format_to(this->output, ":{:X}", value); }
void operator()(const std::string &value)
{
Utf8Encode(this->output, SCC_ENCODED_STRING);
fmt::format_to(this->output, "{}", value);
}
void operator()(const SQInteger &value)
{
Utf8Encode(this->output, SCC_ENCODED_NUMERIC);
fmt::format_to(this->output, "{:X}", value);
}
void operator()(const ScriptTextRef &value)
{
fmt::format_to(this->output, ":");
Utf8Encode(this->output, SCC_ENCODED);
fmt::format_to(this->output, "{:X}", value->string);
}
};
*output = SCC_RECORD_SEPARATOR;
std::visit(visitor{output}, *this->param);
this->used = true;
}

View File

@ -90,6 +90,24 @@ std::string FormatArrayAsHex(std::span<const uint8_t> data)
return str;
}
/**
* Test if a character is (only) part of an encoded string.
* @param c Character to test.
* @returns True iff the character is an encoded string control code.
*/
static bool IsSccEncodedCode(char32_t c)
{
switch (c) {
case SCC_RECORD_SEPARATOR:
case SCC_ENCODED:
case SCC_ENCODED_NUMERIC:
case SCC_ENCODED_STRING:
return true;
default:
return false;
}
}
/**
* Copies the valid (UTF-8) characters from \c str up to \c last to the \c dst.
@ -140,7 +158,7 @@ static void StrMakeValid(T &dst, const char *str, const char *last, StringValida
continue;
}
if ((IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) || ((settings & SVS_ALLOW_CONTROL_CODE) != 0 && c == SCC_ENCODED)) {
if ((IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) || ((settings & SVS_ALLOW_CONTROL_CODE) != 0 && IsSccEncodedCode(c))) {
/* Copy the character back. Even if dst is current the same as str
* (i.e. no characters have been changed) this is quicker than
* moving the pointers ahead by len */

View File

@ -953,6 +953,79 @@ uint ConvertDisplaySpeedToKmhishSpeed(uint speed, VehicleType type)
return GetVelocityUnits(type).c.FromDisplay(speed * 16, true, 10);
}
/**
* Decodes an encoded string during FormatString.
* @param str The buffer of the encoded string.
* @param builder The string builder to write the string to.
* @returns Updated position position in input buffer.
*/
static const char *DecodeEncodedString(const char *str, StringBuilder &builder)
{
ArrayStringParameters<20> sub_args;
char *p;
StringIndexInTab id(std::strtoul(str, &p, 16));
if (*p != SCC_RECORD_SEPARATOR && *p != '\0') {
while (*p != '\0') p++;
builder += "(invalid SCC_ENCODED)";
return p;
}
if (id >= TAB_SIZE_GAMESCRIPT) {
while (*p != '\0') p++;
builder += "(invalid StringID)";
return p;
}
int i = 0;
while (*p != '\0' && i < 20) {
/* The start of parameter. */
const char *s = ++p;
/* Find end of the parameter. */
for (; *p != '\0' && *p != SCC_RECORD_SEPARATOR; ++p) {}
/* Get the parameter type. */
char32_t parameter_type;
size_t len = Utf8Decode(&parameter_type, s);
s += len;
switch (parameter_type) {
case SCC_ENCODED: {
uint64_t param = std::strtoull(s, &p, 16);
if (param >= TAB_SIZE_GAMESCRIPT) {
while (*p != '\0') p++;
builder += "(invalid sub-StringID)";
return p;
}
param = MakeStringID(TEXT_TAB_GAMESCRIPT_START, StringIndexInTab(param));
sub_args.SetParam(i++, param);
break;
}
case SCC_ENCODED_NUMERIC: {
uint64_t param = std::strtoull(s, &p, 16);
sub_args.SetParam(i++, param);
break;
}
case SCC_ENCODED_STRING: {
sub_args.SetParam(i++, std::string(s, p - s));
break;
}
default:
/* Skip unknown parameter. */
i++;
break;
}
}
StringID stringid = MakeStringID(TEXT_TAB_GAMESCRIPT_START, id);
GetStringWithArgs(builder, stringid, sub_args, true);
return p;
}
/**
* Parse most format codes within a string and write the result to a buffer.
* @param builder The string builder to write the final string to.
@ -1018,87 +1091,9 @@ static void FormatString(StringBuilder &builder, const char *str_arg, StringPara
args.SetTypeOfNextParameter(b);
switch (b) {
case SCC_ENCODED: {
ArrayStringParameters<20> sub_args;
char *p;
StringIndexInTab stringid(std::strtoul(str, &p, 16));
if (*p != ':' && *p != '\0') {
while (*p != '\0') p++;
str = p;
builder += "(invalid SCC_ENCODED)";
break;
}
if (stringid >= TAB_SIZE_GAMESCRIPT) {
while (*p != '\0') p++;
str = p;
builder += "(invalid StringID)";
break;
}
int i = 0;
while (*p != '\0' && i < 20) {
uint64_t param;
const char *s = ++p;
/* Find the next value */
bool instring = false;
bool escape = false;
for (;; p++) {
if (*p == '\\') {
escape = true;
continue;
}
if (*p == '"' && escape) {
escape = false;
continue;
}
escape = false;
if (*p == '"') {
instring = !instring;
continue;
}
if (instring) {
continue;
}
if (*p == ':') break;
if (*p == '\0') break;
}
if (*s != '"') {
/* Check if we want to look up another string */
char32_t l;
size_t len = Utf8Decode(&l, s);
bool lookup = (l == SCC_ENCODED);
if (lookup) s += len;
param = std::strtoull(s, &p, 16);
if (lookup) {
if (param >= TAB_SIZE_GAMESCRIPT) {
while (*p != '\0') p++;
str = p;
builder += "(invalid sub-StringID)";
break;
}
param = MakeStringID(TEXT_TAB_GAMESCRIPT_START, StringIndexInTab(param));
}
sub_args.SetParam(i++, param);
} else {
s++; // skip the leading \"
sub_args.SetParam(i++, std::string(s, p - s - 1)); // also skip the trailing \".
}
}
/* If we didn't error out, we can actually print the string. */
if (*str != '\0') {
str = p;
GetStringWithArgs(builder, MakeStringID(TEXT_TAB_GAMESCRIPT_START, stringid), sub_args, true);
}
case SCC_ENCODED:
str = DecodeEncodedString(str, builder);
break;
}
case SCC_NEWGRF_STRINL: {
StringID substr = Utf8Consume(&str);

View File

@ -15,14 +15,19 @@
* by strgen to generate the language files.
*/
enum StringControlCode : uint16_t {
SCC_RECORD_SEPARATOR = 0x1E,
SCC_CONTROL_START = 0xE000,
SCC_CONTROL_END = 0xE1FF,
SCC_SPRITE_START = 0xE200,
SCC_SPRITE_END = SCC_SPRITE_START + 0xFF,
/* This must be the first entry. It's encoded in strings that are saved. */
SCC_ENCODED = SCC_CONTROL_START,
/* All SCC_ENCODED* control codes must have stable ids are they are stored in strings that are saved in savegames. */
SCC_ENCODED = SCC_CONTROL_START, ///< Encoded string marker and sub-string parameter.
SCC_ENCODED_RESERVED, ///< Reserved for future non-GS encoded strings.
SCC_ENCODED_NUMERIC, ///< Encoded numeric parameter.
SCC_ENCODED_STRING, ///< Encoded string parameter.
/* Font selection codes, must be in same order as FontSize enum */
SCC_FIRST_FONT,

View File

@ -12,6 +12,7 @@
#include "../3rdparty/catch2/catch.hpp"
#include "../string_func.h"
#include "../table/control_codes.h"
/**** String compare/equals *****/
@ -408,3 +409,67 @@ TEST_CASE("StrTrimView") {
}
}
extern void FixSCCEncoded(std::string &str, bool fix_code);
/* Helper to call FixSCCEncoded and return the result in a new string. */
static std::string FixSCCEncodedWrapper(const std::string &str, bool fix_code)
{
std::string result = str;
FixSCCEncoded(result, fix_code);
return result;
}
/* Helper to compose a string part from a unicode character */
static void ComposePart(std::back_insert_iterator<std::string> &output, char32_t c)
{
Utf8Encode(output, c);
}
/* Helper to compose a string part from a string. */
static void ComposePart(std::back_insert_iterator<std::string> &output, const std::string &value)
{
for (const auto &c : value) *output = c;
}
/* Helper to compose a string from unicde or string parts. */
template <typename... Args>
static std::string Compose(Args &&... args)
{
std::string result;
auto output = std::back_inserter(result);
(ComposePart(output, args), ...);
return result;
}
TEST_CASE("FixSCCEncoded")
{
/* Test conversion of empty string. */
CHECK(FixSCCEncodedWrapper("", false) == "");
/* Test conversion of old code to new code. */
CHECK(FixSCCEncodedWrapper("\uE0280", true) == Compose(SCC_ENCODED, "0"));
/* Test conversion of two old codes to new codes. */
CHECK(FixSCCEncodedWrapper("\uE0280:\uE0281", true) == Compose(SCC_ENCODED, "0", SCC_RECORD_SEPARATOR, SCC_ENCODED, "1"));
/* Test conversion with no parameter. */
CHECK(FixSCCEncodedWrapper("\uE0001", false) == Compose(SCC_ENCODED, "1"));
/* Test conversion with one numeric parameter. */
CHECK(FixSCCEncodedWrapper("\uE00022:1", false) == Compose(SCC_ENCODED, "22", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "1"));
/* Test conversion with two numeric parameters. */
CHECK(FixSCCEncodedWrapper("\uE0003:12:2", false) == Compose(SCC_ENCODED, "3", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "12", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "2"));
/* Test conversion with one string parameter. */
CHECK(FixSCCEncodedWrapper("\uE0004:\"Foo\"", false) == Compose(SCC_ENCODED, "4", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo"));
/* Test conversion with two string parameters. */
CHECK(FixSCCEncodedWrapper("\uE00055:\"Foo\":\"Bar\"", false) == Compose(SCC_ENCODED, "55", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Bar"));
/* Test conversion with two string parameters surrounding a numeric parameter. */
CHECK(FixSCCEncodedWrapper("\uE0006:\"Foo\":7CA:\"Bar\"", false) == Compose(SCC_ENCODED, "6", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "7CA", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Bar"));
/* Test conversion with one sub-string and two string parameters. */
CHECK(FixSCCEncodedWrapper("\uE000777:\uE0008888:\"Foo\":\"BarBaz\"", false) == Compose(SCC_ENCODED, "777", SCC_RECORD_SEPARATOR, SCC_ENCODED, "8888", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "BarBaz"));
}