diff --git a/CMakeLists.txt b/CMakeLists.txt
index c8c3f3f6..bff6015e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -217,6 +217,24 @@ else()
set(LOCALEDIR_DEFINITION "${CMAKE_INSTALL_FULL_LOCALEDIR}")
endif()
+# When used with MSVC, CMake can merge .manifest files with
+# linker-generated manifests and embed the result in an executable.
+# However, when paired with MinGW-w64, CMake (3.30) ignores .manifest
+# files. Embedding a manifest with a resource file works with both
+# toochains. It's also the way to do it with Autotools.
+#
+# With MSVC, we need to disable the default manifest; attempting to add
+# two manifest entries would break the build. The flag /MANIFEST:NO
+# goes to the linker and it also affects behavior of CMake itself: it
+# looks what flags are being passed to the linker and when CMake sees
+# the /MANIFEST:NO option, other manifest-related linker flags are
+# no longer added (see the file Source/cmcmd.cxx in CMake).
+#
+# See: https://gitlab.kitware.com/cmake/cmake/-/issues/23066
+if(MSVC)
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO")
+endif()
+
# Definitions common to all targets:
add_compile_definitions(
# Package info:
diff --git a/src/Makefile.am b/src/Makefile.am
index 10613234..9e08af5e 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -35,4 +35,6 @@ EXTRA_DIST = \
common/tuklib_physmem.c \
common/tuklib_physmem.h \
common/tuklib_progname.c \
- common/tuklib_progname.h
+ common/tuklib_progname.h \
+ common/w32_application.manifest \
+ common/w32_application.manifest.comments.txt
diff --git a/src/common/common_w32res.rc b/src/common/common_w32res.rc
index 0242fcba..8114ee31 100644
--- a/src/common/common_w32res.rc
+++ b/src/common/common_w32res.rc
@@ -50,3 +50,8 @@ BEGIN
VALUE "Translation", 0x409, 1200
END
END
+
+/* Omit the manifest on Cygwin and MSYS2 (both define __CYGWIN__). */
+#if MY_TYPE == VFT_APP && !defined(__CYGWIN__)
+CREATEPROCESS_MANIFEST_RESOURCE_ID RT_MANIFEST "w32_application.manifest"
+#endif
diff --git a/src/common/w32_application.manifest b/src/common/w32_application.manifest
new file mode 100644
index 00000000..2f875087
--- /dev/null
+++ b/src/common/w32_application.manifest
@@ -0,0 +1,28 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ UTF-8
+
+
+
diff --git a/src/common/w32_application.manifest.comments.txt b/src/common/w32_application.manifest.comments.txt
new file mode 100644
index 00000000..ad0835cc
--- /dev/null
+++ b/src/common/w32_application.manifest.comments.txt
@@ -0,0 +1,178 @@
+
+Windows application manifest for UTF-8 and long paths
+=====================================================
+
+The .manifest file is embedded as is in the executables, thus
+the comments are here in a separate file. These comments were
+written in context of XZ Utils but might be useful when porting
+other command line tools from POSIX environments to Windows.
+
+ NOTE: On Cygwin and MSYS2, command line arguments and file
+ system access aren't tied to a Windows code page. Cygwin
+ and MSYS2 include a default application manifest. Replacing
+ it doesn't seem useful and might even be harmful if Cygwin
+ and MSYS2 some day change their default manifest.
+
+
+UTF-8 code page
+---------------
+
+On Windows, command line applications can use main() or wmain().
+With the Windows-specific wmain(), argv contains UTF-16 code units
+which is the native encoding on Windows. With main(), argv uses the
+system active code page by default. It typically is a legacy code
+page like Windows-1252.
+
+ NOTE: On POSIX, argv for main() is constructed by the calling
+ process. On Windows, argv is constructed by a new process
+ itself: a program receives the command line as a single string,
+ and the startup code splits it into individual arguments,
+ including quote removal and wildcard expansion. Then main() or
+ wmain() is called.
+
+This application manifest forces the process code page to UTF-8
+when the application runs on Windows 10 version 1903 or later.
+This is useful for programs that use main():
+
+ * UTF-8 allows such programs to access files whose names contain
+ characters that don't exist in the current legacy code page.
+ However, filenames on Windows may contain unpaired surrogates
+ (invalid UTF-16). Such files cannot be accesses even with the
+ UTF-8 code page.
+
+ * UTF-8 avoids a security issue in command line argument handling:
+ If a command line contains Unicode characters (for example,
+ filenames) that don't exist in the current legacy code page,
+ the characters are converted to similar-looking characters
+ with best-fit mapping. Some best-fit mappings result in ASCII
+ characters that change the meaning of the command line, which
+ can be exploited with malicious filenames. For example:
+
+ - Double quote (") breaks quoting and makes argument
+ injection possible.
+
+ - Question mark (?) is a wildcard character which may
+ expand to one or more filenames.
+
+ - Forward slash (/) makes a directory traversal attack
+ possible. This character can appear in a dangerous way
+ even from a wildcard expansion; a look-alike character
+ doesn't need to be passed directly on the command line.
+
+ UTF-8 avoids best-fit mappings. However, it's still not
+ perfect. Unpaired surrogates (invalid UTF-16) on the command
+ line (including those from wildcard expansion) are converted
+ to the replacement character U+FFFD. Thus, filenames with
+ different unpaired surrogates appear identical when converted
+ to the UTF-8 code page and aren't distinguishable from
+ filenames that contain the actual replacement character U+FFFD.
+
+If different programs use different code pages, compatibility issues
+are possible. For example, if one program produces a list of
+filenames and another program reads it, both programs should use
+the same code page because the code page affects filenames in the
+char-based file system APIs.
+
+If building with a MinGW-w64 toolchain, it is strongly recommended
+to use UCRT instead of the old MSVCRT. For example, with the UTF-8
+code page, MSVCRT doesn't convert non-ASCII characters correctly
+when writing to console with printf(). With UCRT it works.
+
+
+Long path names
+---------------
+
+The manifest enables support for path names longer than 259
+characters if the feature has been enabled in the Windows registry.
+Omit the longPathAware element from the manifest if the application
+isn't compatible with it. For example, uses of MAX_PATH might be
+a sign of incompatibility.
+
+Documentation of the registry setting:
+https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=registry#enable-long-paths-in-windows-10-version-1607-and-later
+
+
+Summary of the manifest contents
+--------------------------------
+
+See also Microsoft's documentation:
+https://learn.microsoft.com/en-us/windows/win32/sbscs/application-manifests
+
+assemblyIdentity (omitted)
+
+ This is documented as mandatory but not all apps in the real world
+ have it, and of those that do, not all put an up-to-date version
+ number there. Things seem to work correctly without
+ so let's keep this simpler and omit it.
+
+compatibility
+
+ Declare the application compatible with different Windows versions.
+ Without this, Windows versions newer than Vista will run the
+ application using Vista as the Operating System Context.
+
+trustInfo
+
+ Declare the application as UAC-compliant. This avoids file system
+ and registry virtualization that Windows otherwise does with 32-bit
+ executables to make some ancient applications work. UAC-compliancy
+ also stops Windows from using heuristics based on the filename
+ (like setup.exe) to guess when elevated privileges might be
+ needed which would then bring up the UAC prompt.
+
+longPathAware
+
+ Declare the application as long path aware. This way many file
+ system operations aren't limited by MAX_PATH (260 characters
+ including the terminating null character) if the feature has
+ also been enabled in the Windows registry.
+
+activeCodePage
+
+ Force the process code page to UTF-8 on Windows 10 version 1903
+ and later. For example:
+
+ - main() gets the command line arguments in UTF-8 instead of
+ in a legacy code page.
+
+ - File system APIs that take char-based strings use UTF-8
+ instead of a legacy code page.
+
+ - Text written to the console via stdio.h's stdout or stderr
+ (like calling printf()) are expected to be in UTF-8.
+
+
+CMake notes
+-----------
+
+As of CMake 3.30, one can add a .manifest file as a source file but
+it only works with MSVC; it's ignored with MinGW-w64 toolchains.
+Embedding the manifest with a resource file works with all
+toolchains. However, then the default manifest needs to be
+disabled with MSVC in CMakeLists.txt to avoid duplicate
+manifests which would break the build.
+
+w32_application.manifest.rc:
+
+ #include
+ CREATEPROCESS_MANIFEST_RESOURCE_ID RT_MANIFEST "w32_application.manifest"
+
+Or the same thing without the #include:
+
+ 1 24 "w32_application.manifest"
+
+CMakeLists.txt:
+
+ if(MSVC)
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO")
+ endif()
+
+ add_executable(foo foo.c)
+
+ # WIN32 isn't set on Cygwin or MSYS2, thus if(WIN32) is correct here.
+ if(WIN32)
+ target_sources(foo PRIVATE w32_application.manifest.rc)
+ set_source_files_properties(w32_application.manifest.rc PROPERTIES
+ OBJECT_DEPENDS w32_application.manifest
+ )
+ endif()