From 2a22de439ec63da1927b640eda309296a1e8dce5 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Mon, 13 May 2019 20:05:17 +0300 Subject: [PATCH] liblzma: Avoid memcpy(NULL, foo, 0) because it is undefined behavior. I should have always known this but I didn't. Here is an example as a reminder to myself: int mycopy(void *dest, void *src, size_t n) { memcpy(dest, src, n); return dest == NULL; } In the example, a compiler may assume that dest != NULL because passing NULL to memcpy() would be undefined behavior. Testing with GCC 8.2.1, mycopy(NULL, NULL, 0) returns 1 with -O0 and -O1. With -O2 the return value is 0 because the compiler infers that dest cannot be NULL because it was already used with memcpy() and thus the test for NULL gets optimized out. In liblzma, if a null-pointer was passed to memcpy(), there were no checks for NULL *after* the memcpy() call, so I cautiously suspect that it shouldn't have caused bad behavior in practice, but it's hard to be sure, and the problematic cases had to be fixed anyway. Thanks to Jeffrey Walton. --- src/liblzma/common/common.c | 6 +++++- src/liblzma/lz/lz_decoder.c | 12 +++++++++--- src/liblzma/simple/simple_coder.c | 10 +++++++++- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/liblzma/common/common.c b/src/liblzma/common/common.c index 1399b928..18453ae0 100644 --- a/src/liblzma/common/common.c +++ b/src/liblzma/common/common.c @@ -99,7 +99,11 @@ lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos, const size_t out_avail = out_size - *out_pos; const size_t copy_size = my_min(in_avail, out_avail); - memcpy(out + *out_pos, in + *in_pos, copy_size); + // Call memcpy() only if there is something to copy. If there is + // nothing to copy, in or out might be NULL and then the memcpy() + // call would trigger undefined behavior. + if (copy_size > 0) + memcpy(out + *out_pos, in + *in_pos, copy_size); *in_pos += copy_size; *out_pos += copy_size; diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c index bb21d0d0..6c9024e2 100644 --- a/src/liblzma/lz/lz_decoder.c +++ b/src/liblzma/lz/lz_decoder.c @@ -91,11 +91,17 @@ decode_buffer(lzma_coder *coder, in, in_pos, in_size); // Copy the decoded data from the dictionary to the out[] - // buffer. + // buffer. Do it conditionally because out can be NULL + // (in which case copy_size is always 0). Calling memcpy() + // with a null-pointer is undefined even if the third + // argument is 0. const size_t copy_size = coder->dict.pos - dict_start; assert(copy_size <= out_size - *out_pos); - memcpy(out + *out_pos, coder->dict.buf + dict_start, - copy_size); + + if (copy_size > 0) + memcpy(out + *out_pos, coder->dict.buf + dict_start, + copy_size); + *out_pos += copy_size; // Reset the dictionary if so requested by coder->lz.code(). diff --git a/src/liblzma/simple/simple_coder.c b/src/liblzma/simple/simple_coder.c index 13ebabc7..4f499bef 100644 --- a/src/liblzma/simple/simple_coder.c +++ b/src/liblzma/simple/simple_coder.c @@ -118,7 +118,15 @@ simple_code(void *coder_ptr, const lzma_allocator *allocator, // coder->pos and coder->size yet. This way the coder can be // restarted if the next filter in the chain returns e.g. // LZMA_MEM_ERROR. - memcpy(out + *out_pos, coder->buffer + coder->pos, buf_avail); + // + // Do the memcpy() conditionally because out can be NULL + // (in which case buf_avail is always 0). Calling memcpy() + // with a null-pointer is undefined even if the third + // argument is 0. + if (buf_avail > 0) + memcpy(out + *out_pos, coder->buffer + coder->pos, + buf_avail); + *out_pos += buf_avail; // Copy/Encode/Decode more data to out[].