mirror of
https://github.com/OpenTTD/OpenTTD.git
synced 2025-06-19 19:49:30 +01:00
Codechange: Change internal format of encoded strings to improve robustness and allow expansion. (#13499)
This commit is contained in:
parent
1193852007
commit
dccc6185b9
@ -915,17 +915,69 @@ static inline size_t SlCalcStdStringLen(const void *ptr)
|
||||
* just bail out and do not continue trying to replace the tokens.
|
||||
* @param str the string to fix.
|
||||
*/
|
||||
static void FixSCCEncoded(std::string &str)
|
||||
void FixSCCEncoded(std::string &str, bool fix_code)
|
||||
{
|
||||
for (size_t i = 0; i < str.size(); /* nothing. */) {
|
||||
size_t len = Utf8EncodedCharLen(str[i]);
|
||||
if (len == 0 || i + len > str.size()) break;
|
||||
if (str.empty()) return;
|
||||
|
||||
/* We need to convert from old escape-style encoding to record separator encoding.
|
||||
* Initial `<SCC_ENCODED><STRINGID>` stays the same.
|
||||
*
|
||||
* `:<SCC_ENCODED><STRINGID>` becomes `<RS><SCC_ENCODED><STRINGID>`
|
||||
* `:<HEX>` becomes `<RS><SCC_ENCODED_NUMERIC><HEX>`
|
||||
* `:"<STRING>"` becomes `<RS><SCC_ENCODED_STRING><STRING>`
|
||||
*/
|
||||
std::string result;
|
||||
auto output = std::back_inserter(result);
|
||||
|
||||
bool is_encoded = false; // Set if we determine by the presence of SCC_ENCODED that the string is an encoded string.
|
||||
bool in_string = false; // Set if we in a string, between double-quotes.
|
||||
bool need_type = true; // Set if a parameter type needs to be emitted.
|
||||
|
||||
for (auto it = std::begin(str); it != std::end(str); /* nothing */) {
|
||||
size_t len = Utf8EncodedCharLen(*it);
|
||||
if (len == 0 || it + len > std::end(str)) break;
|
||||
|
||||
char32_t c;
|
||||
Utf8Decode(&c, &str[i]);
|
||||
if (c == 0xE028 || c == 0xE02A) Utf8Encode(&str[i], SCC_ENCODED);
|
||||
i += len;
|
||||
Utf8Decode(&c, &*it);
|
||||
if (c == SCC_ENCODED || (fix_code && (c == 0xE028 || c == 0xE02A))) {
|
||||
Utf8Encode(output, SCC_ENCODED);
|
||||
need_type = false;
|
||||
is_encoded = true;
|
||||
it += len;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If the first character is not SCC_ENCODED then we don't have to do any conversion. */
|
||||
if (!is_encoded) return;
|
||||
|
||||
if (c == '"') {
|
||||
in_string = !in_string;
|
||||
if (in_string && need_type) {
|
||||
/* Started a new string parameter. */
|
||||
Utf8Encode(output, SCC_ENCODED_STRING);
|
||||
need_type = false;
|
||||
}
|
||||
it += len;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!in_string && c == ':') {
|
||||
*output = SCC_RECORD_SEPARATOR;
|
||||
need_type = true;
|
||||
it += len;
|
||||
continue;
|
||||
}
|
||||
if (need_type) {
|
||||
/* Started a new numeric parameter. */
|
||||
Utf8Encode(output, SCC_ENCODED_NUMERIC);
|
||||
need_type = false;
|
||||
}
|
||||
|
||||
Utf8Encode(output, c);
|
||||
it += len;
|
||||
}
|
||||
|
||||
str = result;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -970,7 +1022,7 @@ static void SlStdString(void *ptr, VarType conv)
|
||||
StringValidationSettings settings = SVS_REPLACE_WITH_QUESTION_MARK;
|
||||
if ((conv & SLF_ALLOW_CONTROL) != 0) {
|
||||
settings = settings | SVS_ALLOW_CONTROL_CODE;
|
||||
if (IsSavegameVersionBefore(SLV_169)) FixSCCEncoded(*str);
|
||||
if (IsSavegameVersionBefore(SLV_ENCODED_STRING_FORMAT)) FixSCCEncoded(*str, IsSavegameVersionBefore(SLV_169));
|
||||
}
|
||||
if ((conv & SLF_ALLOW_NEWLINE) != 0) {
|
||||
settings = settings | SVS_ALLOW_NEWLINE;
|
||||
|
@ -397,6 +397,8 @@ enum SaveLoadVersion : uint16_t {
|
||||
SLV_INCREASE_HOUSE_LIMIT, ///< 348 PR#12288 Increase house limit to 4096.
|
||||
SLV_COMPANY_INAUGURATED_PERIOD_V2, ///< 349 PR#13448 Fix savegame storage for company inaugurated year in wallclock mode.
|
||||
|
||||
SLV_ENCODED_STRING_FORMAT, ///< 350 PR#13499 Encoded String format changed.
|
||||
|
||||
SL_MAX_VERSION, ///< Highest possible saveload version
|
||||
};
|
||||
|
||||
|
@ -197,16 +197,26 @@ void ScriptText::ParamCheck::Encode(std::back_insert_iterator<std::string> &outp
|
||||
struct visitor {
|
||||
std::back_insert_iterator<std::string> &output;
|
||||
|
||||
void operator()(const std::string &value) { fmt::format_to(this->output, ":\"{}\"", value); }
|
||||
void operator()(const SQInteger &value) { fmt::format_to(this->output, ":{:X}", value); }
|
||||
void operator()(const std::string &value)
|
||||
{
|
||||
Utf8Encode(this->output, SCC_ENCODED_STRING);
|
||||
fmt::format_to(this->output, "{}", value);
|
||||
}
|
||||
|
||||
void operator()(const SQInteger &value)
|
||||
{
|
||||
Utf8Encode(this->output, SCC_ENCODED_NUMERIC);
|
||||
fmt::format_to(this->output, "{:X}", value);
|
||||
}
|
||||
|
||||
void operator()(const ScriptTextRef &value)
|
||||
{
|
||||
fmt::format_to(this->output, ":");
|
||||
Utf8Encode(this->output, SCC_ENCODED);
|
||||
fmt::format_to(this->output, "{:X}", value->string);
|
||||
}
|
||||
};
|
||||
|
||||
*output = SCC_RECORD_SEPARATOR;
|
||||
std::visit(visitor{output}, *this->param);
|
||||
this->used = true;
|
||||
}
|
||||
|
@ -90,6 +90,24 @@ std::string FormatArrayAsHex(std::span<const uint8_t> data)
|
||||
return str;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test if a character is (only) part of an encoded string.
|
||||
* @param c Character to test.
|
||||
* @returns True iff the character is an encoded string control code.
|
||||
*/
|
||||
static bool IsSccEncodedCode(char32_t c)
|
||||
{
|
||||
switch (c) {
|
||||
case SCC_RECORD_SEPARATOR:
|
||||
case SCC_ENCODED:
|
||||
case SCC_ENCODED_NUMERIC:
|
||||
case SCC_ENCODED_STRING:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the valid (UTF-8) characters from \c str up to \c last to the \c dst.
|
||||
@ -140,7 +158,7 @@ static void StrMakeValid(T &dst, const char *str, const char *last, StringValida
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) || ((settings & SVS_ALLOW_CONTROL_CODE) != 0 && c == SCC_ENCODED)) {
|
||||
if ((IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) || ((settings & SVS_ALLOW_CONTROL_CODE) != 0 && IsSccEncodedCode(c))) {
|
||||
/* Copy the character back. Even if dst is current the same as str
|
||||
* (i.e. no characters have been changed) this is quicker than
|
||||
* moving the pointers ahead by len */
|
||||
|
155
src/strings.cpp
155
src/strings.cpp
@ -953,6 +953,79 @@ uint ConvertDisplaySpeedToKmhishSpeed(uint speed, VehicleType type)
|
||||
return GetVelocityUnits(type).c.FromDisplay(speed * 16, true, 10);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes an encoded string during FormatString.
|
||||
* @param str The buffer of the encoded string.
|
||||
* @param builder The string builder to write the string to.
|
||||
* @returns Updated position position in input buffer.
|
||||
*/
|
||||
static const char *DecodeEncodedString(const char *str, StringBuilder &builder)
|
||||
{
|
||||
ArrayStringParameters<20> sub_args;
|
||||
|
||||
char *p;
|
||||
StringIndexInTab id(std::strtoul(str, &p, 16));
|
||||
if (*p != SCC_RECORD_SEPARATOR && *p != '\0') {
|
||||
while (*p != '\0') p++;
|
||||
builder += "(invalid SCC_ENCODED)";
|
||||
return p;
|
||||
}
|
||||
if (id >= TAB_SIZE_GAMESCRIPT) {
|
||||
while (*p != '\0') p++;
|
||||
builder += "(invalid StringID)";
|
||||
return p;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
while (*p != '\0' && i < 20) {
|
||||
/* The start of parameter. */
|
||||
const char *s = ++p;
|
||||
|
||||
/* Find end of the parameter. */
|
||||
for (; *p != '\0' && *p != SCC_RECORD_SEPARATOR; ++p) {}
|
||||
|
||||
/* Get the parameter type. */
|
||||
char32_t parameter_type;
|
||||
size_t len = Utf8Decode(¶meter_type, s);
|
||||
s += len;
|
||||
|
||||
switch (parameter_type) {
|
||||
case SCC_ENCODED: {
|
||||
uint64_t param = std::strtoull(s, &p, 16);
|
||||
if (param >= TAB_SIZE_GAMESCRIPT) {
|
||||
while (*p != '\0') p++;
|
||||
builder += "(invalid sub-StringID)";
|
||||
return p;
|
||||
}
|
||||
param = MakeStringID(TEXT_TAB_GAMESCRIPT_START, StringIndexInTab(param));
|
||||
sub_args.SetParam(i++, param);
|
||||
break;
|
||||
}
|
||||
|
||||
case SCC_ENCODED_NUMERIC: {
|
||||
uint64_t param = std::strtoull(s, &p, 16);
|
||||
sub_args.SetParam(i++, param);
|
||||
break;
|
||||
}
|
||||
|
||||
case SCC_ENCODED_STRING: {
|
||||
sub_args.SetParam(i++, std::string(s, p - s));
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
/* Skip unknown parameter. */
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
StringID stringid = MakeStringID(TEXT_TAB_GAMESCRIPT_START, id);
|
||||
GetStringWithArgs(builder, stringid, sub_args, true);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse most format codes within a string and write the result to a buffer.
|
||||
* @param builder The string builder to write the final string to.
|
||||
@ -1018,87 +1091,9 @@ static void FormatString(StringBuilder &builder, const char *str_arg, StringPara
|
||||
|
||||
args.SetTypeOfNextParameter(b);
|
||||
switch (b) {
|
||||
case SCC_ENCODED: {
|
||||
ArrayStringParameters<20> sub_args;
|
||||
|
||||
char *p;
|
||||
StringIndexInTab stringid(std::strtoul(str, &p, 16));
|
||||
if (*p != ':' && *p != '\0') {
|
||||
while (*p != '\0') p++;
|
||||
str = p;
|
||||
builder += "(invalid SCC_ENCODED)";
|
||||
break;
|
||||
}
|
||||
if (stringid >= TAB_SIZE_GAMESCRIPT) {
|
||||
while (*p != '\0') p++;
|
||||
str = p;
|
||||
builder += "(invalid StringID)";
|
||||
break;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
while (*p != '\0' && i < 20) {
|
||||
uint64_t param;
|
||||
const char *s = ++p;
|
||||
|
||||
/* Find the next value */
|
||||
bool instring = false;
|
||||
bool escape = false;
|
||||
for (;; p++) {
|
||||
if (*p == '\\') {
|
||||
escape = true;
|
||||
continue;
|
||||
}
|
||||
if (*p == '"' && escape) {
|
||||
escape = false;
|
||||
continue;
|
||||
}
|
||||
escape = false;
|
||||
|
||||
if (*p == '"') {
|
||||
instring = !instring;
|
||||
continue;
|
||||
}
|
||||
if (instring) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (*p == ':') break;
|
||||
if (*p == '\0') break;
|
||||
}
|
||||
|
||||
if (*s != '"') {
|
||||
/* Check if we want to look up another string */
|
||||
char32_t l;
|
||||
size_t len = Utf8Decode(&l, s);
|
||||
bool lookup = (l == SCC_ENCODED);
|
||||
if (lookup) s += len;
|
||||
|
||||
param = std::strtoull(s, &p, 16);
|
||||
|
||||
if (lookup) {
|
||||
if (param >= TAB_SIZE_GAMESCRIPT) {
|
||||
while (*p != '\0') p++;
|
||||
str = p;
|
||||
builder += "(invalid sub-StringID)";
|
||||
break;
|
||||
}
|
||||
param = MakeStringID(TEXT_TAB_GAMESCRIPT_START, StringIndexInTab(param));
|
||||
}
|
||||
|
||||
sub_args.SetParam(i++, param);
|
||||
} else {
|
||||
s++; // skip the leading \"
|
||||
sub_args.SetParam(i++, std::string(s, p - s - 1)); // also skip the trailing \".
|
||||
}
|
||||
}
|
||||
/* If we didn't error out, we can actually print the string. */
|
||||
if (*str != '\0') {
|
||||
str = p;
|
||||
GetStringWithArgs(builder, MakeStringID(TEXT_TAB_GAMESCRIPT_START, stringid), sub_args, true);
|
||||
}
|
||||
case SCC_ENCODED:
|
||||
str = DecodeEncodedString(str, builder);
|
||||
break;
|
||||
}
|
||||
|
||||
case SCC_NEWGRF_STRINL: {
|
||||
StringID substr = Utf8Consume(&str);
|
||||
|
@ -15,14 +15,19 @@
|
||||
* by strgen to generate the language files.
|
||||
*/
|
||||
enum StringControlCode : uint16_t {
|
||||
SCC_RECORD_SEPARATOR = 0x1E,
|
||||
|
||||
SCC_CONTROL_START = 0xE000,
|
||||
SCC_CONTROL_END = 0xE1FF,
|
||||
|
||||
SCC_SPRITE_START = 0xE200,
|
||||
SCC_SPRITE_END = SCC_SPRITE_START + 0xFF,
|
||||
|
||||
/* This must be the first entry. It's encoded in strings that are saved. */
|
||||
SCC_ENCODED = SCC_CONTROL_START,
|
||||
/* All SCC_ENCODED* control codes must have stable ids are they are stored in strings that are saved in savegames. */
|
||||
SCC_ENCODED = SCC_CONTROL_START, ///< Encoded string marker and sub-string parameter.
|
||||
SCC_ENCODED_RESERVED, ///< Reserved for future non-GS encoded strings.
|
||||
SCC_ENCODED_NUMERIC, ///< Encoded numeric parameter.
|
||||
SCC_ENCODED_STRING, ///< Encoded string parameter.
|
||||
|
||||
/* Font selection codes, must be in same order as FontSize enum */
|
||||
SCC_FIRST_FONT,
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "../3rdparty/catch2/catch.hpp"
|
||||
|
||||
#include "../string_func.h"
|
||||
#include "../table/control_codes.h"
|
||||
|
||||
/**** String compare/equals *****/
|
||||
|
||||
@ -408,3 +409,67 @@ TEST_CASE("StrTrimView") {
|
||||
}
|
||||
}
|
||||
|
||||
extern void FixSCCEncoded(std::string &str, bool fix_code);
|
||||
|
||||
/* Helper to call FixSCCEncoded and return the result in a new string. */
|
||||
static std::string FixSCCEncodedWrapper(const std::string &str, bool fix_code)
|
||||
{
|
||||
std::string result = str;
|
||||
FixSCCEncoded(result, fix_code);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Helper to compose a string part from a unicode character */
|
||||
static void ComposePart(std::back_insert_iterator<std::string> &output, char32_t c)
|
||||
{
|
||||
Utf8Encode(output, c);
|
||||
}
|
||||
|
||||
/* Helper to compose a string part from a string. */
|
||||
static void ComposePart(std::back_insert_iterator<std::string> &output, const std::string &value)
|
||||
{
|
||||
for (const auto &c : value) *output = c;
|
||||
}
|
||||
|
||||
/* Helper to compose a string from unicde or string parts. */
|
||||
template <typename... Args>
|
||||
static std::string Compose(Args &&... args)
|
||||
{
|
||||
std::string result;
|
||||
auto output = std::back_inserter(result);
|
||||
(ComposePart(output, args), ...);
|
||||
return result;
|
||||
}
|
||||
|
||||
TEST_CASE("FixSCCEncoded")
|
||||
{
|
||||
/* Test conversion of empty string. */
|
||||
CHECK(FixSCCEncodedWrapper("", false) == "");
|
||||
|
||||
/* Test conversion of old code to new code. */
|
||||
CHECK(FixSCCEncodedWrapper("\uE0280", true) == Compose(SCC_ENCODED, "0"));
|
||||
|
||||
/* Test conversion of two old codes to new codes. */
|
||||
CHECK(FixSCCEncodedWrapper("\uE0280:\uE0281", true) == Compose(SCC_ENCODED, "0", SCC_RECORD_SEPARATOR, SCC_ENCODED, "1"));
|
||||
|
||||
/* Test conversion with no parameter. */
|
||||
CHECK(FixSCCEncodedWrapper("\uE0001", false) == Compose(SCC_ENCODED, "1"));
|
||||
|
||||
/* Test conversion with one numeric parameter. */
|
||||
CHECK(FixSCCEncodedWrapper("\uE00022:1", false) == Compose(SCC_ENCODED, "22", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "1"));
|
||||
|
||||
/* Test conversion with two numeric parameters. */
|
||||
CHECK(FixSCCEncodedWrapper("\uE0003:12:2", false) == Compose(SCC_ENCODED, "3", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "12", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "2"));
|
||||
|
||||
/* Test conversion with one string parameter. */
|
||||
CHECK(FixSCCEncodedWrapper("\uE0004:\"Foo\"", false) == Compose(SCC_ENCODED, "4", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo"));
|
||||
|
||||
/* Test conversion with two string parameters. */
|
||||
CHECK(FixSCCEncodedWrapper("\uE00055:\"Foo\":\"Bar\"", false) == Compose(SCC_ENCODED, "55", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Bar"));
|
||||
|
||||
/* Test conversion with two string parameters surrounding a numeric parameter. */
|
||||
CHECK(FixSCCEncodedWrapper("\uE0006:\"Foo\":7CA:\"Bar\"", false) == Compose(SCC_ENCODED, "6", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo", SCC_RECORD_SEPARATOR, SCC_ENCODED_NUMERIC, "7CA", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Bar"));
|
||||
|
||||
/* Test conversion with one sub-string and two string parameters. */
|
||||
CHECK(FixSCCEncodedWrapper("\uE000777:\uE0008888:\"Foo\":\"BarBaz\"", false) == Compose(SCC_ENCODED, "777", SCC_RECORD_SEPARATOR, SCC_ENCODED, "8888", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "Foo", SCC_RECORD_SEPARATOR, SCC_ENCODED_STRING, "BarBaz"));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user