Introduce fsyspath subclass of std::filesystem::path.
Our std::strings are UTF-8 encoded, so conversion from std::string to std::filesystem::path must use UTF-8 decoding. The native Windows std::filesystem::path constructor and assignment operator accepting std::string use "native narrow encoding," which mangles path strings containing UTF-8 encoded non-ASCII characters. fsyspath's std::string constructor and assignment operator explicitly engage std::filesystem::u8path() to handle encoding. u8path() is deprecated in C++20, but once we adapt fsyspath's conversion to C++20 conventions, consuming code need not be modified. (cherry picked from commit e399b02e3306a249cb161f07cac578d3f2617bab)master
parent
e6d0138a6a
commit
e829828dcd
|
|
@ -119,6 +119,7 @@ set(llcommon_HEADER_FILES
|
|||
commoncontrol.h
|
||||
ctype_workaround.h
|
||||
fix_macros.h
|
||||
fsyspath.h
|
||||
function_types.h
|
||||
indra_constants.h
|
||||
lazyeventapi.h
|
||||
|
|
|
|||
|
|
@ -0,0 +1,74 @@
|
|||
/**
|
||||
* @file fsyspath.h
|
||||
* @author Nat Goodspeed
|
||||
* @date 2024-04-03
|
||||
* @brief Adapt our UTF-8 std::strings for std::filesystem::path
|
||||
*
|
||||
* $LicenseInfo:firstyear=2024&license=viewerlgpl$
|
||||
* Copyright (c) 2024, Linden Research, Inc.
|
||||
* $/LicenseInfo$
|
||||
*/
|
||||
|
||||
#if ! defined(LL_FSYSPATH_H)
|
||||
#define LL_FSYSPATH_H
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
// While std::filesystem::path can be directly constructed from std::string on
|
||||
// both Posix and Windows, that's not what we want on Windows. Per
|
||||
// https://en.cppreference.com/w/cpp/filesystem/path/path:
|
||||
|
||||
// ... the method of conversion to the native character set depends on the
|
||||
// character type used by source.
|
||||
//
|
||||
// * If the source character type is char, the encoding of the source is
|
||||
// assumed to be the native narrow encoding (so no conversion takes place on
|
||||
// POSIX systems).
|
||||
// * If the source character type is char8_t, conversion from UTF-8 to native
|
||||
// filesystem encoding is used. (since C++20)
|
||||
// * If the source character type is wchar_t, the input is assumed to be the
|
||||
// native wide encoding (so no conversion takes places on Windows).
|
||||
|
||||
// The trouble is that on Windows, from std::string ("source character type is
|
||||
// char"), the "native narrow encoding" isn't UTF-8, so file paths containing
|
||||
// non-ASCII characters get mangled.
|
||||
//
|
||||
// Once we're building with C++20, we could pass a UTF-8 std::string through a
|
||||
// vector<char8_t> to engage std::filesystem::path's own UTF-8 conversion. But
|
||||
// sigh, as of 2024-04-03 we're not yet there.
|
||||
//
|
||||
// Anyway, encapsulating the important UTF-8 conversions in our own subclass
|
||||
// allows us to migrate forward to C++20 conventions without changing
|
||||
// referencing code.
|
||||
|
||||
class fsyspath: public std::filesystem::path
|
||||
{
|
||||
using super = std::filesystem::path;
|
||||
|
||||
public:
|
||||
// default
|
||||
fsyspath() {}
|
||||
// construct from UTF-8 encoded std::string
|
||||
fsyspath(const std::string& path): super(std::filesystem::u8path(path)) {}
|
||||
// construct from UTF-8 encoded const char*
|
||||
fsyspath(const char* path): super(std::filesystem::u8path(path)) {}
|
||||
// construct from existing path
|
||||
fsyspath(const super& path): super(path) {}
|
||||
|
||||
fsyspath& operator=(const super& p) { super::operator=(p); return *this; }
|
||||
fsyspath& operator=(const std::string& p)
|
||||
{
|
||||
super::operator=(std::filesystem::u8path(p));
|
||||
return *this;
|
||||
}
|
||||
fsyspath& operator=(const char* p)
|
||||
{
|
||||
super::operator=(std::filesystem::u8path(p));
|
||||
return *this;
|
||||
}
|
||||
|
||||
// shadow base-class string() method with UTF-8 aware method
|
||||
std::string string() const { return super::u8string(); }
|
||||
};
|
||||
|
||||
#endif /* ! defined(LL_FSYSPATH_H) */
|
||||
Loading…
Reference in New Issue