diff --git a/src/saveload/saveload.cpp b/src/saveload/saveload.cpp index bf8c27982a..7cd4d987c5 100644 --- a/src/saveload/saveload.cpp +++ b/src/saveload/saveload.cpp @@ -915,17 +915,69 @@ static inline size_t SlCalcStdStringLen(const void *ptr) * just bail out and do not continue trying to replace the tokens. * @param str the string to fix. */ -static void FixSCCEncoded(std::string &str) +void FixSCCEncoded(std::string &str, bool fix_code) { - for (size_t i = 0; i < str.size(); /* nothing. */) { - size_t len = Utf8EncodedCharLen(str[i]); - if (len == 0 || i + len > str.size()) break; + if (str.empty()) return; + + /* We need to convert from old escape-style encoding to record separator encoding. + * Initial `` stays the same. + * + * `:` becomes `` + * `:` becomes `` + * `:""` becomes `` + */ + std::string result; + auto output = std::back_inserter(result); + + bool is_encoded = false; // Set if we determine by the presence of SCC_ENCODED that the string is an encoded string. + bool in_string = false; // Set if we in a string, between double-quotes. + bool need_type = true; // Set if a parameter type needs to be emitted. + + for (auto it = std::begin(str); it != std::end(str); /* nothing */) { + size_t len = Utf8EncodedCharLen(*it); + if (len == 0 || it + len > std::end(str)) break; char32_t c; - Utf8Decode(&c, &str[i]); - if (c == 0xE028 || c == 0xE02A) Utf8Encode(&str[i], SCC_ENCODED); - i += len; + Utf8Decode(&c, &*it); + if (c == SCC_ENCODED || (fix_code && (c == 0xE028 || c == 0xE02A))) { + Utf8Encode(output, SCC_ENCODED); + need_type = false; + is_encoded = true; + it += len; + continue; + } + + /* If the first character is not SCC_ENCODED then we don't have to do any conversion. */ + if (!is_encoded) return; + + if (c == '"') { + in_string = !in_string; + if (in_string && need_type) { + /* Started a new string parameter. */ + Utf8Encode(output, SCC_ENCODED_STRING); + need_type = false; + } + it += len; + continue; + } + + if (!in_string && c == ':') { + *output = SCC_RECORD_SEPARATOR; + need_type = true; + it += len; + continue; + } + if (need_type) { + /* Started a new numeric parameter. */ + Utf8Encode(output, SCC_ENCODED_NUMERIC); + need_type = false; + } + + Utf8Encode(output, c); + it += len; } + + str = result; } /** @@ -970,7 +1022,7 @@ static void SlStdString(void *ptr, VarType conv) StringValidationSettings settings = SVS_REPLACE_WITH_QUESTION_MARK; if ((conv & SLF_ALLOW_CONTROL) != 0) { settings = settings | SVS_ALLOW_CONTROL_CODE; - if (IsSavegameVersionBefore(SLV_169)) FixSCCEncoded(*str); + if (IsSavegameVersionBefore(SLV_ENCODED_STRING_FORMAT)) FixSCCEncoded(*str, IsSavegameVersionBefore(SLV_169)); } if ((conv & SLF_ALLOW_NEWLINE) != 0) { settings = settings | SVS_ALLOW_NEWLINE; diff --git a/src/saveload/saveload.h b/src/saveload/saveload.h index a68439ec09..3ab4a0af4e 100644 --- a/src/saveload/saveload.h +++ b/src/saveload/saveload.h @@ -397,6 +397,8 @@ enum SaveLoadVersion : uint16_t { SLV_INCREASE_HOUSE_LIMIT, ///< 348 PR#12288 Increase house limit to 4096. SLV_COMPANY_INAUGURATED_PERIOD_V2, ///< 349 PR#13448 Fix savegame storage for company inaugurated year in wallclock mode. + SLV_ENCODED_STRING_FORMAT, ///< 350 PR#13499 Encoded String format changed. + SL_MAX_VERSION, ///< Highest possible saveload version }; diff --git a/src/script/api/script_text.cpp b/src/script/api/script_text.cpp index 0d923034a0..544578a21e 100644 --- a/src/script/api/script_text.cpp +++ b/src/script/api/script_text.cpp @@ -197,16 +197,26 @@ void ScriptText::ParamCheck::Encode(std::back_insert_iterator &outp struct visitor { std::back_insert_iterator &output; - void operator()(const std::string &value) { fmt::format_to(this->output, ":\"{}\"", value); } - void operator()(const SQInteger &value) { fmt::format_to(this->output, ":{:X}", value); } + void operator()(const std::string &value) + { + Utf8Encode(this->output, SCC_ENCODED_STRING); + fmt::format_to(this->output, "{}", value); + } + + void operator()(const SQInteger &value) + { + Utf8Encode(this->output, SCC_ENCODED_NUMERIC); + fmt::format_to(this->output, "{:X}", value); + } + void operator()(const ScriptTextRef &value) { - fmt::format_to(this->output, ":"); Utf8Encode(this->output, SCC_ENCODED); fmt::format_to(this->output, "{:X}", value->string); } }; + *output = SCC_RECORD_SEPARATOR; std::visit(visitor{output}, *this->param); this->used = true; } diff --git a/src/string.cpp b/src/string.cpp index eb8bb0632b..9803cc9b5a 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -90,6 +90,24 @@ std::string FormatArrayAsHex(std::span data) return str; } +/** + * Test if a character is (only) part of an encoded string. + * @param c Character to test. + * @returns True iff the character is an encoded string control code. + */ +static bool IsSccEncodedCode(char32_t c) +{ + switch (c) { + case SCC_RECORD_SEPARATOR: + case SCC_ENCODED: + case SCC_ENCODED_NUMERIC: + case SCC_ENCODED_STRING: + return true; + + default: + return false; + } +} /** * Copies the valid (UTF-8) characters from \c str up to \c last to the \c dst. @@ -140,7 +158,7 @@ static void StrMakeValid(T &dst, const char *str, const char *last, StringValida continue; } - if ((IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) || ((settings & SVS_ALLOW_CONTROL_CODE) != 0 && c == SCC_ENCODED)) { + if ((IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) || ((settings & SVS_ALLOW_CONTROL_CODE) != 0 && IsSccEncodedCode(c))) { /* Copy the character back. Even if dst is current the same as str * (i.e. no characters have been changed) this is quicker than * moving the pointers ahead by len */ diff --git a/src/strings.cpp b/src/strings.cpp index 18785e1596..9255b9d618 100644 --- a/src/strings.cpp +++ b/src/strings.cpp @@ -953,6 +953,79 @@ uint ConvertDisplaySpeedToKmhishSpeed(uint speed, VehicleType type) return GetVelocityUnits(type).c.FromDisplay(speed * 16, true, 10); } +/** + * Decodes an encoded string during FormatString. + * @param str The buffer of the encoded string. + * @param builder The string builder to write the string to. + * @returns Updated position position in input buffer. + */ +static const char *DecodeEncodedString(const char *str, StringBuilder &builder) +{ + ArrayStringParameters<20> sub_args; + + char *p; + StringIndexInTab id(std::strtoul(str, &p, 16)); + if (*p != SCC_RECORD_SEPARATOR && *p != '\0') { + while (*p != '\0') p++; + builder += "(invalid SCC_ENCODED)"; + return p; + } + if (id >= TAB_SIZE_GAMESCRIPT) { + while (*p != '\0') p++; + builder += "(invalid StringID)"; + return p; + } + + int i = 0; + while (*p != '\0' && i < 20) { + /* The start of parameter. */ + const char *s = ++p; + + /* Find end of the parameter. */ + for (; *p != '\0' && *p != SCC_RECORD_SEPARATOR; ++p) {} + + /* Get the parameter type. */ + char32_t parameter_type; + size_t len = Utf8Decode(¶meter_type, s); + s += len; + + switch (parameter_type) { + case SCC_ENCODED: { + uint64_t param = std::strtoull(s, &p, 16); + if (param >= TAB_SIZE_GAMESCRIPT) { + while (*p != '\0') p++; + builder += "(invalid sub-StringID)"; + return p; + } + param = MakeStringID(TEXT_TAB_GAMESCRIPT_START, StringIndexInTab(param)); + sub_args.SetParam(i++, param); + break; + } + + case SCC_ENCODED_NUMERIC: { + uint64_t param = std::strtoull(s, &p, 16); + sub_args.SetParam(i++, param); + break; + } + + case SCC_ENCODED_STRING: { + sub_args.SetParam(i++, std::string(s, p - s)); + break; + } + + default: + /* Skip unknown parameter. */ + i++; + break; + } + } + + StringID stringid = MakeStringID(TEXT_TAB_GAMESCRIPT_START, id); + GetStringWithArgs(builder, stringid, sub_args, true); + + return p; +} + /** * Parse most format codes within a string and write the result to a buffer. * @param builder The string builder to write the final string to. @@ -1018,87 +1091,9 @@ static void FormatString(StringBuilder &builder, const char *str_arg, StringPara args.SetTypeOfNextParameter(b); switch (b) { - case SCC_ENCODED: { - ArrayStringParameters<20> sub_args; - - char *p; - StringIndexInTab stringid(std::strtoul(str, &p, 16)); - if (*p != ':' && *p != '\0') { - while (*p != '\0') p++; - str = p; - builder += "(invalid SCC_ENCODED)"; - break; - } - if (stringid >= TAB_SIZE_GAMESCRIPT) { - while (*p != '\0') p++; - str = p; - builder += "(invalid StringID)"; - break; - } - - int i = 0; - while (*p != '\0' && i < 20) { - uint64_t param; - const char *s = ++p; - - /* Find the next value */ - bool instring = false; - bool escape = false; - for (;; p++) { - if (*p == '\\') { - escape = true; - continue; - } - if (*p == '"' && escape) { - escape = false; - continue; - } - escape = false; - - if (*p == '"') { - instring = !instring; - continue; - } - if (instring) { - continue; - } - - if (*p == ':') break; - if (*p == '\0') break; - } - - if (*s != '"') { - /* Check if we want to look up another string */ - char32_t l; - size_t len = Utf8Decode(&l, s); - bool lookup = (l == SCC_ENCODED); - if (lookup) s += len; - - param = std::strtoull(s, &p, 16); - - if (lookup) { - if (param >= TAB_SIZE_GAMESCRIPT) { - while (*p != '\0') p++; - str = p; - builder += "(invalid sub-StringID)"; - break; - } - param = MakeStringID(TEXT_TAB_GAMESCRIPT_START, StringIndexInTab(param)); - } - - sub_args.SetParam(i++, param); - } else { - s++; // skip the leading \" - sub_args.SetParam(i++, std::string(s, p - s - 1)); // also skip the trailing \". - } - } - /* If we didn't error out, we can actually print the string. */ - if (*str != '\0') { - str = p; - GetStringWithArgs(builder, MakeStringID(TEXT_TAB_GAMESCRIPT_START, stringid), sub_args, true); - } + case SCC_ENCODED: + str = DecodeEncodedString(str, builder); break; - } case SCC_NEWGRF_STRINL: { StringID substr = Utf8Consume(&str); diff --git a/src/table/control_codes.h b/src/table/control_codes.h index d98c4a4c87..5e2e4ea9c6 100644 --- a/src/table/control_codes.h +++ b/src/table/control_codes.h @@ -15,14 +15,19 @@ * by strgen to generate the language files. */ enum StringControlCode : uint16_t { + SCC_RECORD_SEPARATOR = 0x1E, + SCC_CONTROL_START = 0xE000, SCC_CONTROL_END = 0xE1FF, SCC_SPRITE_START = 0xE200, SCC_SPRITE_END = SCC_SPRITE_START + 0xFF, - /* This must be the first entry. It's encoded in strings that are saved. */ - SCC_ENCODED = SCC_CONTROL_START, + /* All SCC_ENCODED* control codes must have stable ids are they are stored in strings that are saved in savegames. */ + SCC_ENCODED = SCC_CONTROL_START, ///< Encoded string marker and sub-string parameter. + SCC_ENCODED_RESERVED, ///< Reserved for future non-GS encoded strings. + SCC_ENCODED_NUMERIC, ///< Encoded numeric parameter. + SCC_ENCODED_STRING, ///< Encoded string parameter. /* Font selection codes, must be in same order as FontSize enum */ SCC_FIRST_FONT, diff --git a/src/tests/string_func.cpp b/src/tests/string_func.cpp index 6086cad075..a88c0d19c9 100644 --- a/src/tests/string_func.cpp +++ b/src/tests/string_func.cpp @@ -12,6 +12,7 @@ #include "../3rdparty/catch2/catch.hpp" #include "../string_func.h" +#include "../table/control_codes.h" /**** String compare/equals *****/ @@ -408,3 +409,67 @@ TEST_CASE("StrTrimView") { } } +extern void FixSCCEncoded(std::string &str, bool fix_code); + +/* Helper to call FixSCCEncoded and return the result in a new string. */ +static std::string FixSCCEncodedWrapper(const std::string &str, bool fix_code) +{ + std::string result = str; + FixSCCEncoded(result, fix_code); + return result; +} + +/* Helper to compose a string part from a unicode character */ +static void ComposePart(std::back_insert_iterator &output, char32_t c) +{ + Utf8Encode(output, c); +} + +/* Helper to compose a string part from a string. */ +static void ComposePart(std::back_insert_iterator &output, const std::string &value) +{ + for (const auto &c : value) *output = c; +} + +/* Helper to compose a string from unicde or string parts. */ +template +static std::string Compose(Args &&... args) +{ + std::string result; + auto output = std::back_inserter(result); + (ComposePart(output, args), ...); + return result; +} + +TEST_CASE("FixSCCEncoded") +{ + /* Test conversion of empty string. */ + CHECK(FixSCCEncodedWrapper("", false) == ""); + + /* Test conversion of old code to new code. */ + CHECK(FixSCCEncodedWrapper("\uE0280", true) == Compose(SCC_ENCODED, "0")); + + /* Test conversion of two old codes to new codes. */ + CHECK(FixSCCEncodedWrapper("\uE0280:\uE0281", true) == Compose(SCC_ENCODED, "0", SCC_RECORD_SEPARATOR, SCC_ENCODED, "1")); + + /* Test conversion with no parameter. */ + CHECK(FixSCCEncodedWrapper("\uE0001", false) == Compose(SCC_ENCODED, "1")); + + /* Test conversion with one numeric parameter. */ + CHECK(FixSCCEncodedWrapper("\uE00022:1", false) == Compose(SCC_ENCODED, "22", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "1")); + + /* Test conversion with two numeric parameters. */ + CHECK(FixSCCEncodedWrapper("\uE0003:12:2", false) == Compose(SCC_ENCODED, "3", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "12", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "2")); + + /* Test conversion with one string parameter. */ + CHECK(FixSCCEncodedWrapper("\uE0004:\"Foo\"", false) == Compose(SCC_ENCODED, "4", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo")); + + /* Test conversion with two string parameters. */ + CHECK(FixSCCEncodedWrapper("\uE00055:\"Foo\":\"Bar\"", false) == Compose(SCC_ENCODED, "55", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Bar")); + + /* Test conversion with two string parameters surrounding a numeric parameter. */ + CHECK(FixSCCEncodedWrapper("\uE0006:\"Foo\":7CA:\"Bar\"", false) == Compose(SCC_ENCODED, "6", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "7CA", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Bar")); + + /* Test conversion with one sub-string and two string parameters. */ + CHECK(FixSCCEncodedWrapper("\uE000777:\uE0008888:\"Foo\":\"BarBaz\"", false) == Compose(SCC_ENCODED, "777", SCC_RECORD_SEPARATOR, SCC_ENCODED, "8888", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "BarBaz")); +}