Add native threading support on Windows.

Now liblzma only uses "mythread" functions and types which are defined in mythread.h matching the desired threading method. Before Windows Vista, there is no direct equivalent to pthread condition variables. Since this package doesn't use pthread_cond_broadcast(), pre-Vista threading can still be kept quite simple. The pre-Vista code doesn't use anything that wasn't already available in Windows 95, so the binaries should run even on Windows 95 if someone happens to care.
2026-01-05 04:08:43 +00:00 · 2013-09-17 11:52:28 +03:00 · 2013-09-17 11:52:28 +03:00 · 6b44b4a775
commit 6b44b4a775
parent ae0ab74a88
7 changed files with 574 additions and 216 deletions
--- a/39
+++ b/39
@ -307,16 +307,37 @@ XZ Utils Installation
                the amount of RAM on the operating system you use. See
                src/common/tuklib_physmem.c for details.

-    --disable-threads
-                Disable threading support. This makes some things
-                thread-unsafe, meaning that if multithreaded application
-                calls liblzma functions from more than one thread,
-                something bad may happen.
+    --enable-threads=METHOD
+                Threading support is enabled by default so normally there
+                is no need to specify this option.

-                Use this option if threading support causes you trouble,
-                or if you know that you will use liblzma only from
-                single-threaded applications and want to avoid dependency
-                on libpthread.
+                Supported values for METHOD:
+
+                        yes     Autodetect the threading method. If none
+                                is found, configure will give an error.
+
+                        posix   Use POSIX pthreads. This is the default
+                                except on Windows outside Cygwin.
+
+                        win95   Use Windows 95 compatible threads. This
+                                is compatible with Windows XP and later
+                                too. This is the default for 32-bit x86
+                                Windows builds. The `win95' threading is
+                                incompatible with --enable-small.
+
+                        vista   Use Windows Vista compatible threads. The
+                                resulting binaries won't run on Windows XP
+                                or older. This is the default for Windows
+                                excluding 32-bit x86 builds (that is, on
+                                x86-64 the default is `vista').
+
+                        no      Disable threading support. This is the
+                                same as using --disable-threads.
+                                NOTE: If combined with --enable-small, the
+                                resulting liblzma won't be thread safe,
+                                that is, if a multi-threaded application
+                                calls any liblzma functions from more than
+                                one thread, something bad may happen.

    --enable-symbol-versions
                Use symbol versioning for liblzma. This is enabled by
--- a/configure.ac
+++ b/configure.ac
@ -328,15 +328,48 @@ AM_CONDITIONAL(COND_SMALL, test "x$enable_small" = xyes)
 #############

 AC_MSG_CHECKING([if threading support is wanted])
-AC_ARG_ENABLE([threads], AC_HELP_STRING([--disable-threads],
-		[Disable threading support.
-		This makes some things thread-unsafe.]),
+AC_ARG_ENABLE([threads], AC_HELP_STRING([--enable-threads=METHOD],
+		[Supported METHODS are `yes', `no', `posix', `win95', and
+		`vista'. The default is `yes'. Using `no' together with
+		--enable-small makes liblzma thread unsafe.]),
 	[], [enable_threads=yes])
-if test "x$enable_threads" != xyes && test "x$enable_threads" != xno; then
-	AC_MSG_RESULT([])
-	AC_MSG_ERROR([--enable-threads accepts only \`yes' or \`no'])
+
+if test "x$enable_threads" = xyes; then
+	case $host_os in
+		mingw*)
+			case $host_cpu in
+				i?86)   enable_threads=win95 ;;
+				*)      enable_threads=vista ;;
+			esac
+			;;
+		*)
+			enable_threads=posix
+			;;
+	esac
 fi
-AC_MSG_RESULT([$enable_threads])
+
+case $enable_threads in
+	posix | win95 | vista)
+		AC_MSG_RESULT([yes, $enable_threads])
+		;;
+	no)
+		AC_MSG_RESULT([no])
+		;;
+	*)
+		AC_MSG_RESULT([])
+		AC_MSG_ERROR([--enable-threads only accepts
+			\`yes', \`no', \`posix', \`win95', or \`vista'])
+		;;
+esac
+
+# The Win95 threading lacks thread-safe one-time initialization function.
+# It's better to disallow it instead of allowing threaded but thread-unsafe
+# build.
+if test "x$enable_small$enable_threads" = xyeswin95; then
+	AC_MSG_ERROR([--enable-threads=win95 and --enable-small cannot be
+		used at the same time])
+fi
+
 # We use the actual result a little later.


@ -455,27 +488,49 @@ AM_PROG_CC_C_O
 AM_PROG_AS
 AC_USE_SYSTEM_EXTENSIONS

-if test "x$enable_threads" = xyes; then
-	echo
-	echo "Threading support:"
-	AX_PTHREAD
-	LIBS="$LIBS $PTHREAD_LIBS"
-	AM_CFLAGS="$AM_CFLAGS $PTHREAD_CFLAGS"
+case $enable_threads in
+	posix)
+		echo
+		echo "POSIX threading support:"
+		AX_PTHREAD([:]) dnl We don't need the HAVE_PTHREAD macro.
+		LIBS="$LIBS $PTHREAD_LIBS"
+		AM_CFLAGS="$AM_CFLAGS $PTHREAD_CFLAGS"

-	dnl NOTE: PTHREAD_CC is ignored. It would be useful on AIX, but
-	dnl it's tricky to get it right together with AC_PROG_CC_C99.
-	dnl Thus, this is handled by telling the user in INSTALL to set
-	dnl the correct CC manually.
+		dnl NOTE: PTHREAD_CC is ignored. It would be useful on AIX,
+		dnl but it's tricky to get it right together with
+		dnl AC_PROG_CC_C99. Thus, this is handled by telling the
+		dnl user in INSTALL to set the correct CC manually.

-	# These are nice to have but not mandatory.
-	OLD_CFLAGS=$CFLAGS
-	CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
-	AC_SEARCH_LIBS([clock_gettime], [rt])
-	AC_CHECK_FUNCS([clock_gettime pthread_condattr_setclock])
-	AC_CHECK_DECLS([CLOCK_MONOTONIC], [], [], [[#include <time.h>]])
-	CFLAGS=$OLD_CFLAGS
-fi
-AM_CONDITIONAL([COND_THREADS], [test "x$ax_pthread_ok" = xyes])
+		AC_DEFINE([MYTHREAD_POSIX], [1],
+			[Define to 1 when using POSIX threads (pthreads).])
+
+		# These are nice to have but not mandatory.
+		#
+		# FIXME: xz uses clock_gettime if it is available and can do
+		# it even when threading is disabled. Moving this outside
+		# of pthread detection may be undesirable because then
+		# liblzma may get linked against librt even when librt isn't
+		# needed by liblzma.
+		OLD_CFLAGS=$CFLAGS
+		CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+		AC_SEARCH_LIBS([clock_gettime], [rt])
+		AC_CHECK_FUNCS([clock_gettime pthread_condattr_setclock])
+		AC_CHECK_DECLS([CLOCK_MONOTONIC], [], [], [[#include <time.h>]])
+		CFLAGS=$OLD_CFLAGS
+		;;
+	win95)
+		AC_DEFINE([MYTHREAD_WIN95], [1], [Define to 1 when using
+			Windows 95 (and thus XP) compatible threads.
+			This avoids use of features that were added in
+			Windows Vista.])
+		;;
+	vista)
+		AC_DEFINE([MYTHREAD_VISTA], [1], [Define to 1 when using
+			Windows Vista compatible threads. This uses
+			features that are not available on Windows XP.])
+		;;
+esac
+AM_CONDITIONAL([COND_THREADS], [test "x$enable_threads" != xno])

 echo
 echo "Initializing Libtool:"
@ -748,3 +803,10 @@ if test x$tuklib_cv_cpucores_method = xunknown; then
 	echo "WARNING:"
 	echo "No supported method to detect the number of CPU cores."
 fi
+
+if test "x$enable_threads$enable_small" = xnoyes; then
+	echo
+	echo "NOTE:"
+	echo "liblzma will be thread unsafe due the combination"
+	echo "of --disable-threads --enable-small."
+fi
--- a/src/common/mythread.h
+++ b/src/common/mythread.h
@ -15,8 +15,84 @@

 #include "sysdefs.h"

+// If any type of threading is enabled, #define MYTHREAD_ENABLED.
+#if defined(MYTHREAD_POSIX) || defined(MYTHREAD_WIN95) \
+		|| defined(MYTHREAD_VISTA)
+#	define MYTHREAD_ENABLED 1
+#endif

-#ifdef HAVE_PTHREAD
+
+#ifdef MYTHREAD_ENABLED
+
+////////////////////////////////////////
+// Shared betewen all threading types //
+////////////////////////////////////////
+
+// Locks a mutex for a duration of a block.
+//
+// Perform mythread_mutex_lock(&mutex) in the beginning of a block
+// and mythread_mutex_unlock(&mutex) at the end of the block. "break"
+// may be used to unlock the mutex and jump out of the block.
+// mythread_sync blocks may be nested.
+//
+// Example:
+//
+//     mythread_sync(mutex) {
+//         foo();
+//         if (some_error)
+//             break; // Skips bar()
+//         bar();
+//     }
+//
+// At least GCC optimizes the loops completely away so it doesn't slow
+// things down at all compared to plain mythread_mutex_lock(&mutex)
+// and mythread_mutex_unlock(&mutex) calls.
+//
+#define mythread_sync(mutex) mythread_sync_helper1(mutex, __LINE__)
+#define mythread_sync_helper1(mutex, line) mythread_sync_helper2(mutex, line)
+#define mythread_sync_helper2(mutex, line) \
+	for (unsigned int mythread_i_ ## line = 0; \
+			mythread_i_ ## line \
+				? (mythread_mutex_unlock(&(mutex)), 0) \
+				: (mythread_mutex_lock(&(mutex)), 1); \
+			mythread_i_ ## line = 1) \
+		for (unsigned int mythread_j_ ## line = 0; \
+				!mythread_j_ ## line; \
+				mythread_j_ ## line = 1)
+#endif
+
+
+#if !defined(MYTHREAD_ENABLED)
+
+//////////////////
+// No threading //
+//////////////////
+
+// Calls the given function once. This isn't thread safe.
+#define mythread_once(func) \
+do { \
+	static bool once_ = false; \
+	if (!once_) { \
+		func(); \
+		once_ = true; \
+	} \
+} while (0)
+
+
+#if !(defined(_WIN32) && !defined(__CYGWIN__))
+// Use sigprocmask() to set the signal mask in single-threaded programs.
+static inline void
+mythread_sigmask(int how, const sigset_t *restrict set,
+		sigset_t *restrict oset)
+{
+	int ret = sigprocmask(how, set, oset);
+	assert(ret == 0);
+	(void)ret;
+}
+#endif
+
+
+#elif defined(MYTHREAD_POSIX)

 ////////////////////
 // Using pthreads //
@ -26,83 +102,117 @@
 #include <pthread.h>
 #include <signal.h>
 #include <time.h>
+#include <errno.h>

+#define MYTHREAD_RET_TYPE void *
+#define MYTHREAD_RET_VALUE NULL

-#ifdef __VMS
-// Do nothing on OpenVMS. It doesn't have pthread_sigmask().
-#define mythread_sigmask(how, set, oset) do { } while (0)
-#else
-/// \brief      Set the process signal mask
-///
-/// If threads are disabled, sigprocmask() is used instead
-/// of pthread_sigmask().
-#define mythread_sigmask(how, set, oset) \
-	pthread_sigmask(how, set, oset)
-#endif
-
-/// \brief      Call the given function once
-///
-/// If threads are disabled, a thread-unsafe version is used.
-#define mythread_once(func) \
-do { \
-	static pthread_once_t once_ = PTHREAD_ONCE_INIT; \
-	pthread_once(&once_, &func); \
-} while (0)
-
-
-/// \brief      Lock a mutex for a duration of a block
-///
-/// Perform pthread_mutex_lock(&mutex) in the beginning of a block
-/// and pthread_mutex_unlock(&mutex) at the end of the block. "break"
-/// may be used to unlock the mutex and jump out of the block.
-/// mythread_sync blocks may be nested.
-///
-/// Example:
-///
-///     mythread_sync(mutex) {
-///         foo();
-///         if (some_error)
-///             break; // Skips bar()
-///         bar();
-///     }
-///
-/// At least GCC optimizes the loops completely away so it doesn't slow
-/// things down at all compared to plain pthread_mutex_lock(&mutex)
-/// and pthread_mutex_unlock(&mutex) calls.
-///
-#define mythread_sync(mutex) mythread_sync_helper1(mutex, __LINE__)
-#define mythread_sync_helper1(mutex, line) mythread_sync_helper2(mutex, line)
-#define mythread_sync_helper2(mutex, line) \
-	for (unsigned int mythread_i_ ## line = 0; \
-			mythread_i_ ## line \
-				? (pthread_mutex_unlock(&(mutex)), 0) \
-				: (pthread_mutex_lock(&(mutex)), 1); \
-			mythread_i_ ## line = 1) \
-		for (unsigned int mythread_j_ ## line = 0; \
-				!mythread_j_ ## line; \
-				mythread_j_ ## line = 1)
-
+typedef pthread_t mythread;
+typedef pthread_mutex_t mythread_mutex;

 typedef struct {
-	/// Condition variable
 	pthread_cond_t cond;
-
 #ifdef HAVE_CLOCK_GETTIME
-	/// Clock ID (CLOCK_REALTIME or CLOCK_MONOTONIC) associated with
-	/// the condition variable
+	// Clock ID (CLOCK_REALTIME or CLOCK_MONOTONIC) associated with
+	// the condition variable.
 	clockid_t clk_id;
 #endif
-
 } mythread_cond;

+typedef struct timespec mythread_condtime;

-/// \brief      Initialize a condition variable to use CLOCK_MONOTONIC
-///
-/// Using CLOCK_MONOTONIC instead of the default CLOCK_REALTIME makes the
-/// timeout in pthread_cond_timedwait() work correctly also if system time
-/// is suddenly changed. Unfortunately CLOCK_MONOTONIC isn't available
-/// everywhere while the default CLOCK_REALTIME is, so the default is
-/// used if CLOCK_MONOTONIC isn't available.
+
+// Calls the given function once in a thread-safe way.
+#define mythread_once(func) \
+	do { \
+		static pthread_once_t once_ = PTHREAD_ONCE_INIT; \
+		pthread_once(&once_, &func); \
+	} while (0)
+
+
+// Use pthread_sigmask() to set the signal mask in multi-threaded programs.
+// Do nothing on OpenVMS since it lacks pthread_sigmask().
+static inline void
+mythread_sigmask(int how, const sigset_t *restrict set,
+		sigset_t *restrict oset)
+{
+#ifdef __VMS
+	(void)how;
+	(void)set;
+	(void)oset;
+#else
+	int ret = pthread_sigmask(how, set, oset);
+	assert(ret == 0);
+	(void)ret;
+#endif
+}
+
+
+// Creates a new thread with all signals blocked. Returns zero on success
+// and non-zero on error.
+static inline int
+mythread_create(mythread *thread, void *(*func)(void *arg), void *arg)
+{
+	sigset_t old;
+	sigset_t all;
+	sigfillset(&all);
+
+	mythread_sigmask(SIG_SETMASK, &all, &old);
+	const int ret = pthread_create(thread, NULL, func, arg);
+	mythread_sigmask(SIG_SETMASK, &old, NULL);
+
+	return ret;
+}
+
+// Joins a thread. Returns zero on success and non-zero on error.
+static inline int
+mythread_join(mythread thread)
+{
+	return pthread_join(thread, NULL);
+}
+
+
+// Initiatlizes a mutex. Returns zero on success and non-zero on error.
+static inline int
+mythread_mutex_init(mythread_mutex *mutex)
+{
+	return pthread_mutex_init(mutex, NULL);
+}
+
+static inline void
+mythread_mutex_destroy(mythread_mutex *mutex)
+{
+	int ret = pthread_mutex_destroy(mutex);
+	assert(ret == 0);
+	(void)ret;
+}
+
+static inline void
+mythread_mutex_lock(mythread_mutex *mutex)
+{
+	int ret = pthread_mutex_lock(mutex);
+	assert(ret == 0);
+	(void)ret;
+}
+
+static inline void
+mythread_mutex_unlock(mythread_mutex *mutex)
+{
+	int ret = pthread_mutex_unlock(mutex);
+	assert(ret == 0);
+	(void)ret;
+}
+
+
+// Initializes a condition variable.
+//
+// Using CLOCK_MONOTONIC instead of the default CLOCK_REALTIME makes the
+// timeout in pthread_cond_timedwait() work correctly also if system time
+// is suddenly changed. Unfortunately CLOCK_MONOTONIC isn't available
+// everywhere while the default CLOCK_REALTIME is, so the default is
+// used if CLOCK_MONOTONIC isn't available.
+//
+// If clock_gettime() isn't available at all, gettimeofday() will be used.
 static inline int
 mythread_cond_init(mythread_cond *mycond)
 {
@ -131,6 +241,8 @@ mythread_cond_init(mythread_cond *mycond)
 	}

 	// If anything above fails, fall back to the default CLOCK_REALTIME.
+	// POSIX requires that all implementations of clock_gettime() must
+	// support at least CLOCK_REALTIME.
 #	endif

 	mycond->clk_id = CLOCK_REALTIME;
@ -139,89 +251,268 @@ mythread_cond_init(mythread_cond *mycond)
 	return pthread_cond_init(&mycond->cond, NULL);
 }

-
-/// \brief      Convert relative time to absolute time for use with timed wait
-///
-/// The current time of the clock associated with the condition variable
-/// is added to the relative time in *ts.
 static inline void
-mythread_cond_abstime(const mythread_cond *mycond, struct timespec *ts)
+mythread_cond_destroy(mythread_cond *cond)
 {
+	int ret = pthread_cond_destroy(&cond->cond);
+	assert(ret == 0);
+	(void)ret;
+}
+
+static inline void
+mythread_cond_signal(mythread_cond *cond)
+{
+	int ret = pthread_cond_signal(&cond->cond);
+	assert(ret == 0);
+	(void)ret;
+}
+
+static inline void
+mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex)
+{
+	int ret = pthread_cond_wait(&cond->cond, mutex);
+	assert(ret == 0);
+	(void)ret;
+}
+
+// Waits on a condition or until a timeout expires. If the timeout expires,
+// non-zero is returned, otherwise zero is returned.
+static inline int
+mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex,
+		const mythread_condtime *condtime)
+{
+	int ret = pthread_cond_timedwait(&cond->cond, mutex, condtime);
+	assert(ret == 0 || ret == ETIMEDOUT);
+	return ret;
+}
+
+// Sets condtime to the absolute time that is timeout_ms milliseconds
+// in the future. The type of the clock to use is taken from cond.
+static inline void
+mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond,
+		uint32_t timeout_ms)
+{
+	condtime->tv_sec = timeout_ms / 1000;
+	condtime->tv_nsec = (timeout_ms % 1000) * 1000000;
+
 #ifdef HAVE_CLOCK_GETTIME
 	struct timespec now;
-	clock_gettime(mycond->clk_id, &now);
+	int ret = clock_gettime(cond->clk_id, &now);
+	assert(ret == 0);
+	(void)ret;

-	ts->tv_sec += now.tv_sec;
-	ts->tv_nsec += now.tv_nsec;
+	condtime->tv_sec += now.tv_sec;
+	condtime->tv_nsec += now.tv_nsec;
 #else
-	(void)mycond;
+	(void)cond;

 	struct timeval now;
 	gettimeofday(&now, NULL);

-	ts->tv_sec += now.tv_sec;
-	ts->tv_nsec += now.tv_usec * 1000L;
+	condtime->tv_sec += now.tv_sec;
+	condtime->tv_nsec += now.tv_usec * 1000L;
 #endif

 	// tv_nsec must stay in the range [0, 999_999_999].
-	if (ts->tv_nsec >= 1000000000L) {
-		ts->tv_nsec -= 1000000000L;
-		++ts->tv_sec;
+	if (condtime->tv_nsec >= 1000000000L) {
+		condtime->tv_nsec -= 1000000000L;
+		++condtime->tv_sec;
 	}
-
-	return;
 }


-#define mythread_cond_wait(mycondptr, mutexptr) \
-	pthread_cond_wait(&(mycondptr)->cond, mutexptr)
+#elif defined(MYTHREAD_WIN95) || defined(MYTHREAD_VISTA)

-#define mythread_cond_timedwait(mycondptr, mutexptr, abstimeptr) \
-	pthread_cond_timedwait(&(mycondptr)->cond, mutexptr, abstimeptr)
+/////////////////////
+// Windows threads //
+/////////////////////

-#define mythread_cond_signal(mycondptr) \
-	pthread_cond_signal(&(mycondptr)->cond)
+#define WIN32_LEAN_AND_MEAN
+#ifdef MYTHREAD_VISTA
+#	undef _WIN32_WINNT
+#	define _WIN32_WINNT 0x0600
+#endif
+#include <windows.h>
+#include <process.h>

-#define mythread_cond_broadcast(mycondptr) \
-	pthread_cond_broadcast(&(mycondptr)->cond)
+#define MYTHREAD_RET_TYPE unsigned int __stdcall
+#define MYTHREAD_RET_VALUE 0

-#define mythread_cond_destroy(mycondptr) \
-	pthread_cond_destroy(&(mycondptr)->cond)
+typedef HANDLE mythread;
+typedef CRITICAL_SECTION mythread_mutex;
+
+#ifdef MYTHREAD_WIN95
+typedef HANDLE mythread_cond;
+#else
+typedef CONDITION_VARIABLE mythread_cond;
+#endif
+
+typedef struct {
+	// Tick count (milliseconds) in the beginning of the timeout.
+	// NOTE: This is 32 bits so it wraps around after 49.7 days.
+	// Multi-day timeouts may not work as expected.
+	DWORD start;
+
+	// Length of the timeout in milliseconds. The timeout expires
+	// when the current tick count minus "start" is equal or greater
+	// than "timeout".
+	DWORD timeout;
+} mythread_condtime;
+
+
+// mythread_once() is only available with Vista threads.
+#ifdef MYTHREAD_VISTA
+#define mythread_once(func) \
+	do { \
+		static INIT_ONCE once_ = INIT_ONCE_STATIC_INIT; \
+		BOOL pending_; \
+		if (!InitOnceBeginInitialize(&once_, 0, &pending_, NULL)) \
+			abort(); \
+		if (pending_) \
+			func(); \
+		if (!InitOnceComplete(&once, 0, NULL)) \
+			abort(); \
+	} while (0)
+#endif
+
+
+// mythread_sigmask() isn't available on Windows. Even a dummy version would
+// make no sense because the other POSIX signal functions are missing anyway.


-/// \brief      Create a thread with all signals blocked
 static inline int
-mythread_create(pthread_t *thread, void *(*func)(void *arg), void *arg)
+mythread_create(mythread *thread,
+		unsigned int (__stdcall *func)(void *arg), void *arg)
 {
-	sigset_t old;
-	sigset_t all;
-	sigfillset(&all);
+	uintptr_t ret = _beginthreadex(NULL, 0, func, arg, 0, NULL);
+	if (ret == 0)
+		return -1;

-	pthread_sigmask(SIG_SETMASK, &all, &old);
-	const int ret = pthread_create(thread, NULL, func, arg);
-	pthread_sigmask(SIG_SETMASK, &old, NULL);
+	*thread = (HANDLE)ret;
+	return 0;
+}
+
+static inline int
+mythread_join(mythread thread)
+{
+	int ret = 0;
+
+	if (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0)
+		ret = -1;
+
+	if (!CloseHandle(thread))
+		ret = -1;

 	return ret;
 }

+
+static inline int
+mythread_mutex_init(mythread_mutex *mutex)
+{
+	InitializeCriticalSection(mutex);
+	return 0;
+}
+
+static inline void
+mythread_mutex_destroy(mythread_mutex *mutex)
+{
+	DeleteCriticalSection(mutex);
+}
+
+static inline void
+mythread_mutex_lock(mythread_mutex *mutex)
+{
+	EnterCriticalSection(mutex);
+}
+
+static inline void
+mythread_mutex_unlock(mythread_mutex *mutex)
+{
+	LeaveCriticalSection(mutex);
+}
+
+
+static inline int
+mythread_cond_init(mythread_cond *cond)
+{
+#ifdef MYTHREAD_WIN95
+	*cond = CreateEvent(NULL, FALSE, FALSE, NULL);
+	return *cond == NULL ? -1 : 0;
 #else
+	InitializeConditionVariable(cond);
+	return 0;
+#endif
+}

-//////////////////
-// No threading //
-//////////////////
+static inline void
+mythread_cond_destroy(mythread_cond *cond)
+{
+#ifdef MYTHREAD_WIN95
+	CloseHandle(*cond);
+#else
+	(void)cond;
+#endif
+}

-#define mythread_sigmask(how, set, oset) \
-	sigprocmask(how, set, oset)
+static inline void
+mythread_cond_signal(mythread_cond *cond)
+{
+#ifdef MYTHREAD_WIN95
+	SetEvent(*cond);
+#else
+	WakeConditionVariable(cond);
+#endif
+}

+static inline void
+mythread_cond_wait(mythread_cond *cond, mythread_mutex *mutex)
+{
+#ifdef MYTHREAD_WIN95
+	LeaveCriticalSection(mutex);
+	WaitForSingleObject(*cond, INFINITE);
+	EnterCriticalSection(mutex);
+#else
+	BOOL ret = SleepConditionVariableCS(cond, mutex, INFINITE);
+	assert(ret);
+	(void)ret;
+#endif
+}

-#define mythread_once(func) \
-do { \
-	static bool once_ = false; \
-	if (!once_) { \
-		func(); \
-		once_ = true; \
-	} \
-} while (0)
+static inline int
+mythread_cond_timedwait(mythread_cond *cond, mythread_mutex *mutex,
+		const mythread_condtime *condtime)
+{
+#ifdef MYTHREAD_WIN95
+	LeaveCriticalSection(mutex);
+#endif
+
+	DWORD elapsed = GetTickCount() - condtime->start;
+	DWORD timeout = elapsed >= condtime->timeout
+			? 0 : condtime->timeout - elapsed;
+
+#ifdef MYTHREAD_WIN95
+	DWORD ret = WaitForSingleObject(*cond, timeout);
+	assert(ret == WAIT_OBJECT_0 || ret == WAIT_TIMEOUT);
+
+	EnterCriticalSection(mutex);
+
+	return ret == WAIT_TIMEOUT;
+#else
+	BOOL ret = SleepConditionVariableCS(cond, mutex, timeout);
+	assert(ret || GetLastError() == ERROR_TIMEOUT);
+	return !ret;
+#endif
+}
+
+static inline void
+mythread_condtime_set(mythread_condtime *condtime, const mythread_cond *cond,
+		uint32_t timeout)
+{
+	(void)cond;
+	condtime->start = GetTickCount();
+	condtime->timeout = timeout;
+}

 #endif

--- a/src/liblzma/common/stream_encoder_mt.c
+++ b/src/liblzma/common/stream_encoder_mt.c
@ -87,12 +87,12 @@ struct worker_thread_s {
 	/// Next structure in the stack of free worker threads.
 	worker_thread *next;

-	pthread_mutex_t mutex;
-	pthread_cond_t cond;
+	mythread_mutex mutex;
+	mythread_cond cond;

 	/// The ID of this thread is used to join the thread
 	/// when it's not needed anymore.
-	pthread_t thread_id;
+	mythread thread_id;
 };


@ -133,12 +133,9 @@ struct lzma_coder_s {
 	lzma_outq outq;


-	/// True if wait_max is used.
-	bool has_timeout;
-
 	/// Maximum wait time if cannot use all the input and cannot
-	/// fill the output buffer.
-	struct timespec wait_max;
+	/// fill the output buffer. This is in milliseconds.
+	uint32_t timeout;


 	/// Error code from a worker thread
@ -174,7 +171,7 @@ struct lzma_coder_s {
 	uint64_t progress_out;


-	pthread_mutex_t mutex;
+	mythread_mutex mutex;
 	mythread_cond cond;
 };

@ -253,7 +250,7 @@ worker_encode(worker_thread *thr, worker_state state)

 			while (in_size == thr->in_size
 					&& thr->state == THR_RUN)
-				pthread_cond_wait(&thr->cond, &thr->mutex);
+				mythread_cond_wait(&thr->cond, &thr->mutex);

 			state = thr->state;
 			in_size = thr->in_size;
@ -305,7 +302,7 @@ worker_encode(worker_thread *thr, worker_state state)
 		// First wait that we have gotten all the input.
 		mythread_sync(thr->mutex) {
 			while (thr->state == THR_RUN)
-				pthread_cond_wait(&thr->cond, &thr->mutex);
+				mythread_cond_wait(&thr->cond, &thr->mutex);

 			state = thr->state;
 			in_size = thr->in_size;
@ -344,7 +341,7 @@ worker_encode(worker_thread *thr, worker_state state)
 }


-static void *
+static MYTHREAD_RET_TYPE
 worker_start(void *thr_ptr)
 {
 	worker_thread *thr = thr_ptr;
@ -358,14 +355,14 @@ worker_start(void *thr_ptr)
 				// requested to stop, just set the state.
 				if (thr->state == THR_STOP) {
 					thr->state = THR_IDLE;
-					pthread_cond_signal(&thr->cond);
+					mythread_cond_signal(&thr->cond);
 				}

 				state = thr->state;
 				if (state != THR_IDLE)
 					break;

-				pthread_cond_wait(&thr->cond, &thr->mutex);
+				mythread_cond_wait(&thr->cond, &thr->mutex);
 			}
 		}

@ -384,7 +381,7 @@ worker_start(void *thr_ptr)
 		mythread_sync(thr->mutex) {
 			if (thr->state != THR_EXIT) {
 				thr->state = THR_IDLE;
-				pthread_cond_signal(&thr->cond);
+				mythread_cond_signal(&thr->cond);
 			}
 		}

@ -409,12 +406,12 @@ worker_start(void *thr_ptr)
 	}

 	// Exiting, free the resources.
-	pthread_mutex_destroy(&thr->mutex);
-	pthread_cond_destroy(&thr->cond);
+	mythread_mutex_destroy(&thr->mutex);
+	mythread_cond_destroy(&thr->cond);

 	lzma_next_end(&thr->block_encoder, thr->allocator);
 	lzma_free(thr->in, thr->allocator);
-	return NULL;
+	return MYTHREAD_RET_VALUE;
 }


@ -426,7 +423,7 @@ threads_stop(lzma_coder *coder, bool wait_for_threads)
 	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
 		mythread_sync(coder->threads[i].mutex) {
 			coder->threads[i].state = THR_STOP;
-			pthread_cond_signal(&coder->threads[i].cond);
+			mythread_cond_signal(&coder->threads[i].cond);
 		}
 	}

@ -437,7 +434,7 @@ threads_stop(lzma_coder *coder, bool wait_for_threads)
 	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
 		mythread_sync(coder->threads[i].mutex) {
 			while (coder->threads[i].state != THR_IDLE)
-				pthread_cond_wait(&coder->threads[i].cond,
+				mythread_cond_wait(&coder->threads[i].cond,
 						&coder->threads[i].mutex);
 		}
 	}
@ -454,12 +451,12 @@ threads_end(lzma_coder *coder, const lzma_allocator *allocator)
 	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
 		mythread_sync(coder->threads[i].mutex) {
 			coder->threads[i].state = THR_EXIT;
-			pthread_cond_signal(&coder->threads[i].cond);
+			mythread_cond_signal(&coder->threads[i].cond);
 		}
 	}

 	for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
-		int ret = pthread_join(coder->threads[i].thread_id, NULL);
+		int ret = mythread_join(coder->threads[i].thread_id);
 		assert(ret == 0);
 		(void)ret;
 	}
@ -479,10 +476,10 @@ initialize_new_thread(lzma_coder *coder, const lzma_allocator *allocator)
 	if (thr->in == NULL)
 		return LZMA_MEM_ERROR;

-	if (pthread_mutex_init(&thr->mutex, NULL))
+	if (mythread_mutex_init(&thr->mutex))
 		goto error_mutex;

-	if (pthread_cond_init(&thr->cond, NULL))
+	if (mythread_cond_init(&thr->cond))
 		goto error_cond;

 	thr->state = THR_IDLE;
@ -501,10 +498,10 @@ initialize_new_thread(lzma_coder *coder, const lzma_allocator *allocator)
 	return LZMA_OK;

 error_thread:
-	pthread_cond_destroy(&thr->cond);
+	mythread_cond_destroy(&thr->cond);

 error_cond:
-	pthread_mutex_destroy(&thr->mutex);
+	mythread_mutex_destroy(&thr->mutex);

 error_mutex:
 	lzma_free(thr->in, allocator);
@ -543,7 +540,7 @@ get_thread(lzma_coder *coder, const lzma_allocator *allocator)
 		coder->thr->state = THR_RUN;
 		coder->thr->in_size = 0;
 		coder->thr->outbuf = lzma_outq_get_buf(&coder->outq);
-		pthread_cond_signal(&coder->thr->cond);
+		mythread_cond_signal(&coder->thr->cond);
 	}

 	return LZMA_OK;
@ -594,7 +591,7 @@ stream_encode_in(lzma_coder *coder, const lzma_allocator *allocator,
 				if (finish)
 					coder->thr->state = THR_FINISH;

-				pthread_cond_signal(&coder->thr->cond);
+				mythread_cond_signal(&coder->thr->cond);
 			}
 		}

@ -619,21 +616,20 @@ stream_encode_in(lzma_coder *coder, const lzma_allocator *allocator,
 /// Wait until more input can be consumed, more output can be read, or
 /// an optional timeout is reached.
 static bool
-wait_for_work(lzma_coder *coder, struct timespec *wait_abs,
+wait_for_work(lzma_coder *coder, mythread_condtime *wait_abs,
 		bool *has_blocked, bool has_input)
 {
-	if (coder->has_timeout && !*has_blocked) {
+	if (coder->timeout != 0 && !*has_blocked) {
 		// Every time when stream_encode_mt() is called via
-		// lzma_code(), *has_block starts as false. We set it
+		// lzma_code(), *has_blocked starts as false. We set it
 		// to true here and calculate the absolute time when
 		// we must return if there's nothing to do.
 		//
 		// The idea of *has_blocked is to avoid unneeded calls
-		// to mythread_cond_abstime(), which may do a syscall
+		// to mythread_condtime_set(), which may do a syscall
 		// depending on the operating system.
 		*has_blocked = true;
-		*wait_abs = coder->wait_max;
-		mythread_cond_abstime(&coder->cond, wait_abs);
+		mythread_condtime_set(wait_abs, &coder->cond, coder->timeout);
 	}

 	bool timed_out = false;
@ -651,7 +647,7 @@ wait_for_work(lzma_coder *coder, struct timespec *wait_abs,
 				&& !lzma_outq_is_readable(&coder->outq)
 				&& coder->thread_error == LZMA_OK
 				&& !timed_out) {
-			if (coder->has_timeout)
+			if (coder->timeout != 0)
 				timed_out = mythread_cond_timedwait(
 						&coder->cond, &coder->mutex,
 						wait_abs) != 0;
@ -692,7 +688,7 @@ stream_encode_mt(lzma_coder *coder, const lzma_allocator *allocator,

 		// These are for wait_for_work().
 		bool has_blocked = false;
-		struct timespec wait_abs;
+		mythread_condtime wait_abs;

 		while (true) {
 			mythread_sync(coder->mutex) {
@ -828,7 +824,7 @@ stream_encoder_mt_end(lzma_coder *coder, const lzma_allocator *allocator)
 	lzma_index_end(coder->index, allocator);

 	mythread_cond_destroy(&coder->cond);
-	pthread_mutex_destroy(&coder->mutex);
+	mythread_mutex_destroy(&coder->mutex);

 	lzma_free(coder, allocator);
 	return;
@ -949,14 +945,14 @@ stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
 		// the error handling has to be done here because
 		// stream_encoder_mt_end() doesn't know if they have
 		// already been initialized or not.
-		if (pthread_mutex_init(&next->coder->mutex, NULL)) {
+		if (mythread_mutex_init(&next->coder->mutex)) {
 			lzma_free(next->coder, allocator);
 			next->coder = NULL;
 			return LZMA_MEM_ERROR;
 		}

 		if (mythread_cond_init(&next->coder->cond)) {
-			pthread_mutex_destroy(&next->coder->mutex);
+			mythread_mutex_destroy(&next->coder->mutex);
 			lzma_free(next->coder, allocator);
 			next->coder = NULL;
 			return LZMA_MEM_ERROR;
@ -1011,14 +1007,7 @@ stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
 			outbuf_size_max, options->threads));

 	// Timeout
-	if (options->timeout > 0) {
-		next->coder->wait_max.tv_sec = options->timeout / 1000;
-		next->coder->wait_max.tv_nsec
-				= (options->timeout % 1000) * 1000000L;
-		next->coder->has_timeout = true;
-	} else {
-		next->coder->has_timeout = false;
-	}
+	next->coder->timeout = options->timeout;

 	// Free the old filter chain and copy the new one.
 	for (size_t i = 0; next->coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i)
--- a/src/xz/coder.c
+++ b/src/xz/coder.c
@ -51,7 +51,7 @@ static lzma_check check;
 /// This becomes false if the --check=CHECK option is used.
 static bool check_default = true;

-#ifdef HAVE_PTHREAD
+#ifdef MYTHREAD_ENABLED
 static lzma_mt mt_options = {
 	.flags = 0,
 	.timeout = 300,
@ -200,7 +200,7 @@ coder_set_compression_settings(void)
 	const uint64_t memory_limit = hardware_memlimit_get(opt_mode);
 	uint64_t memory_usage;
 	if (opt_mode == MODE_COMPRESS) {
-#ifdef HAVE_PTHREAD
+#ifdef MYTHREAD_ENABLED
 		if (opt_format == FORMAT_XZ && hardware_threads_get() > 1) {
 			mt_options.threads = hardware_threads_get();
 			mt_options.block_size = opt_block_size;
@ -245,7 +245,7 @@ coder_set_compression_settings(void)

 	assert(opt_mode == MODE_COMPRESS);

-#ifdef HAVE_PTHREAD
+#ifdef MYTHREAD_ENABLED
 	if (opt_format == FORMAT_XZ && mt_options.threads > 1) {
 		// Try to reduce the number of threads before
 		// adjusting the compression settings down.
@ -408,7 +408,7 @@ coder_init(file_pair *pair)
 			break;

 		case FORMAT_XZ:
-#ifdef HAVE_PTHREAD
+#ifdef MYTHREAD_ENABLED
 			if (hardware_threads_get() > 1)
 				ret = lzma_stream_encoder_mt(
 						&strm, &mt_options);
--- a/windows/README-Windows.txt
+++ b/windows/README-Windows.txt
@ -29,7 +29,7 @@ Package contents
    There is one directory for each type of binaries:

        bin_i486        32-bit x86 (i486 and up), Windows 95 and later
-        bin_x86-64      64-bit x86-64, Windows XP and later
+        bin_x86-64      64-bit x86-64, Windows Vista and later

    Each of the above directories have the following files:

--- a/windows/build.bash
+++ b/windows/build.bash
@ -69,11 +69,10 @@ buildit()
 	# Clean up if it was already configured.
 	[ -f Makefile ] && make distclean

-	# Build the size-optimized binaries. Note that I don't want to
-	# provide size-optimized liblzma (shared nor static), because
-	# that isn't thread-safe now, and depending on bunch of things,
-	# maybe it will never be on Windows (pthreads-win32 helps but
-	# static liblzma might bit a bit tricky with it).
+	# Build the size-optimized binaries. Providing size-optimized liblzma
+	# could be considered but I don't know if it should only use -Os or
+	# should it also use --enable-small and if it should support
+	# threading. So I don't include a size-optimized liblzma for now.
 	./configure \
 		--prefix= \
 		--disable-nls \
@ -90,16 +89,11 @@ buildit()

 	make distclean

-	# Build the normal speed-optimized binaries. Note that while
-	# --disable-threads has been documented to make some things
-	# thread-unsafe, it's not actually true with this combination
-	# of configure flags in XZ Utils 5.0.x. Things can (and probably
-	# will) change after 5.0.x, and this script will be updated too.
+	# Build the normal speed-optimized binaries.
 	./configure \
 		--prefix= \
 		--disable-nls \
 		--disable-scripts \
-		--disable-threads \
 		--build="$BUILD" \
 		CFLAGS="$CFLAGS -O2"
 	make -C src/liblzma
@ -132,8 +126,9 @@ txtcp()
 }

 # FIXME: Make sure that we don't get i686 or i586 code from the runtime.
-# Actually i586 would be fine, but i686 probably not if the idea is to
-# support even Win95.
+# Or if we do, update the strings here to match the generated code.
+# i686 has cmov which can help like maybe 1 % in performance but things
+# like SSE don't help, so i486 isn't horrible for performance.
 #
 # FIXME: Using i486 in the configure triplet may be wrong.
 if [ -d "$MINGW_W32_DIR" ]; then
@ -153,7 +148,7 @@ elif [ -d "$MINGW_DIR" ]; then
 fi

 if [ -d "$MINGW_W64_DIR" ]; then
-	# 64-bit x86, WinXP or later, using MinGW-w64
+	# x86-64, Windows Vista or later, using MinGW-w64
 	PATH=$MINGW_W64_DIR/bin:$MINGW_W64_DIR/x86_64-w64-mingw32/bin:$PATH \
 			buildit \
 			pkg/bin_x86-64 \