From cd46b267f5715c0900b409d7bcb97f6bc9a43d9e Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 22 Jul 2018 15:33:07 -0400 Subject: [PATCH 1/3] string_util: Remove unnecessary std::string instance in TabsToSpaces() We can just use the variant of std::string's replace() function that can replace an occurrence with N copies of the same character, eliminating the need to allocate a std::string containing a buffer of spaces. --- src/common/string_util.cpp | 13 ++++++------- src/common/string_util.h | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index f3ad3d68a4..2099eebb8b 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -168,15 +168,14 @@ void SplitString(const std::string& str, const char delim, std::vector& output); From 26a157cd31676f419420078e7d99df90d9fbc910 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 22 Jul 2018 15:36:30 -0400 Subject: [PATCH 2/3] string_util: Use emplace_back() in SplitString() instead of push_back() This is equivalent to doing: push_back(std::string("")); which is likely not to cause issues, assuming a decent std::string implementation with small-string optimizations implemented in its design, however it's still a little unnecessary to copy that buffer regardless. Instead, we can use emplace_back() to directly construct the empty string within the std::vector instance, eliminating any possible overhead from the copy. --- src/common/string_util.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index 2099eebb8b..6737655a54 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -162,8 +162,9 @@ void SplitString(const std::string& str, const char delim, std::vector Date: Sun, 22 Jul 2018 15:47:37 -0400 Subject: [PATCH 3/3] string_util: Get rid of separate resize() in CPToUTF16(), UTF16ToUTF8(), CodeToUTF8() and UTF8ToUTF16() There's no need to perform the resize separately here, since the constructor allows presizing the buffer. Also move the empty string check before the construction of the string to make the early out more straightforward. --- src/common/string_util.cpp | 42 ++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index 6737655a54..1f0456aee9 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -220,31 +220,37 @@ std::u16string UTF8ToUTF16(const std::string& input) { } static std::wstring CPToUTF16(u32 code_page, const std::string& input) { - auto const size = + const auto size = MultiByteToWideChar(code_page, 0, input.data(), static_cast(input.size()), nullptr, 0); - std::wstring output; - output.resize(size); + if (size == 0) { + return L""; + } - if (size == 0 || - size != MultiByteToWideChar(code_page, 0, input.data(), static_cast(input.size()), - &output[0], static_cast(output.size()))) + std::wstring output(size, L'\0'); + + if (size != MultiByteToWideChar(code_page, 0, input.data(), static_cast(input.size()), + &output[0], static_cast(output.size()))) { output.clear(); + } return output; } std::string UTF16ToUTF8(const std::wstring& input) { - auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), static_cast(input.size()), + const auto size = WideCharToMultiByte(CP_UTF8, 0, input.data(), static_cast(input.size()), nullptr, 0, nullptr, nullptr); + if (size == 0) { + return ""; + } - std::string output; - output.resize(size); + std::string output(size, '\0'); - if (size == 0 || - size != WideCharToMultiByte(CP_UTF8, 0, input.data(), static_cast(input.size()), - &output[0], static_cast(output.size()), nullptr, nullptr)) + if (size != WideCharToMultiByte(CP_UTF8, 0, input.data(), static_cast(input.size()), + &output[0], static_cast(output.size()), nullptr, + nullptr)) { output.clear(); + } return output; } @@ -265,8 +271,6 @@ std::string CP1252ToUTF8(const std::string& input) { template static std::string CodeToUTF8(const char* fromcode, const std::basic_string& input) { - std::string result; - iconv_t const conv_desc = iconv_open("UTF-8", fromcode); if ((iconv_t)(-1) == conv_desc) { LOG_ERROR(Common, "Iconv initialization failure [{}]: {}", fromcode, strerror(errno)); @@ -278,8 +282,7 @@ static std::string CodeToUTF8(const char* fromcode, const std::basic_string& // Multiply by 4, which is the max number of bytes to encode a codepoint const size_t out_buffer_size = 4 * in_bytes; - std::string out_buffer; - out_buffer.resize(out_buffer_size); + std::string out_buffer(out_buffer_size, '\0'); auto src_buffer = &input[0]; size_t src_bytes = in_bytes; @@ -304,6 +307,7 @@ static std::string CodeToUTF8(const char* fromcode, const std::basic_string& } } + std::string result; out_buffer.resize(out_buffer_size - dst_bytes); out_buffer.swap(result); @@ -313,8 +317,6 @@ static std::string CodeToUTF8(const char* fromcode, const std::basic_string& } std::u16string UTF8ToUTF16(const std::string& input) { - std::u16string result; - iconv_t const conv_desc = iconv_open("UTF-16LE", "UTF-8"); if ((iconv_t)(-1) == conv_desc) { LOG_ERROR(Common, "Iconv initialization failure [UTF-8]: {}", strerror(errno)); @@ -326,8 +328,7 @@ std::u16string UTF8ToUTF16(const std::string& input) { // Multiply by 4, which is the max number of bytes to encode a codepoint const size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes; - std::u16string out_buffer; - out_buffer.resize(out_buffer_size); + std::u16string out_buffer(out_buffer_size, char16_t{}); char* src_buffer = const_cast(&input[0]); size_t src_bytes = in_bytes; @@ -352,6 +353,7 @@ std::u16string UTF8ToUTF16(const std::string& input) { } } + std::u16string result; out_buffer.resize(out_buffer_size - dst_bytes); out_buffer.swap(result);