From 0bc9eab243dee3be764b3530433a7fcdc3f7c6a1 Mon Sep 17 00:00:00 2001 From: Lasse Collin Date: Sun, 24 Jan 2010 23:50:54 +0200 Subject: [PATCH] Add initial version of xz --list. This is a bit rough but should be useful for basic things. Ideas (with detailed examples) about the output format are welcome. The output of --robot --list is not necessarily stable yet, although I don't currently have any plans about changing it. The man page hasn't been updated yet. --- src/xz/Makefile.am | 1 + src/xz/list.c | 1072 ++++++++++++++++++++++++++++---------------- src/xz/list.h | 18 + src/xz/main.c | 19 +- src/xz/private.h | 1 + 5 files changed, 710 insertions(+), 401 deletions(-) create mode 100644 src/xz/list.h diff --git a/src/xz/Makefile.am b/src/xz/Makefile.am index 08ac236f..49307c09 100644 --- a/src/xz/Makefile.am +++ b/src/xz/Makefile.am @@ -16,6 +16,7 @@ xz_SOURCES = \ file_io.h \ hardware.c \ hardware.h \ + list.c \ main.c \ main.h \ message.c \ diff --git a/src/xz/list.c b/src/xz/list.c index ba298e43..bb793e02 100644 --- a/src/xz/list.c +++ b/src/xz/list.c @@ -1,7 +1,7 @@ /////////////////////////////////////////////////////////////////////////////// // /// \file list.c -/// \brief Listing information about .lzma files +/// \brief Listing information about .xz files // // Author: Lasse Collin // @@ -11,460 +11,742 @@ /////////////////////////////////////////////////////////////////////////////// #include "private.h" +#include "tuklib_integer.h" -/* - -1. Check the file type: native, alone, unknown - -Alone: -1. Show info about header. Don't look for concatenated parts. - -Native: -1. Check that Stream Header is valid. -2. Seek to the end of the file. -3. Skip padding. -4. Reverse decode Stream Footer. -5. Seek Backward Size bytes. -6. - -*/ +/// Totals that are displayed if there was more than one file. +/// The "files" counter is also used in print_info_adv() to show +/// the file number. +static struct { + uint64_t files; + uint64_t streams; + uint64_t blocks; + uint64_t compressed_size; + uint64_t uncompressed_size; + uint32_t checks; +} totals = { 0, 0, 0, 0, 0, 0 }; -static void -unsupported_file(file_handle *handle) +/// \brief Parse the Index(es) from the given .xz file +/// +/// \param idx If decoding is successful, *idx will be set to point +/// to lzma_index containing the decoded information. +/// On error, *idx is not modified. +/// \param pair Input file +/// +/// \return On success, false is returned. On error, true is returned. +/// +// TODO: This function is pretty big. liblzma should have a function that +// takes a callback function to parse the Index(es) from a .xz file to make +// it easy for applications. +static bool +parse_indexes(lzma_index **idx, file_pair *pair) { - errmsg(V_ERROR, "%s: Unsupported file type", handle->name); - set_exit_status(ERROR); - (void)io_close(handle); - return; -} - - -/// Primitive escaping function, that escapes only ASCII control characters. -static void -print_escaped(const uint8_t *str) -{ - while (*str != '\0') { - if (*str <= 0x1F || *str == 0x7F) - printf("\\x%02X", *str); - else - putchar(*str); - - ++str; + if (pair->src_st.st_size <= 0) { + message_error(_("%s: File is empty"), pair->src_name); + return true; } - return; -} + if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { + message_error(_("%s: Too small to be a valid .xz file"), + pair->src_name); + return true; + } + io_buf buf; + lzma_stream_flags header_flags; + lzma_stream_flags footer_flags; + lzma_ret ret; -static void -list_native(file_handle *handle) -{ + // lzma_stream for the Index decoder lzma_stream strm = LZMA_STREAM_INIT; - lzma_stream_flags flags; - lzma_ret ret = lzma_stream_header_decoder(&strm, &flags); + // All Indexes decoded so far + lzma_index *combined_index = NULL; + + // The Index currently being decoded + lzma_index *this_index = NULL; + + // Current position in the file. We parse the file backwards so + // initialize it to point to the end of the file. + off_t pos = pair->src_st.st_size; + + // Each loop iteration decodes one Index. + do { + // Check that there is enough data left to contain at least + // the Stream Header and Stream Footer. This check cannot + // fail in the first pass of this loop. + if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + pos -= LZMA_STREAM_HEADER_SIZE; + lzma_vli stream_padding = 0; + + // Locate the Stream Footer. There may be Stream Padding which + // we must skip when reading backwards. + while (true) { + if (pos < LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm( + LZMA_DATA_ERROR)); + goto error; + } + + if (io_pread(pair, &buf, + LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + // Stream Padding is always a multiple of four bytes. + int i = 2; + if (buf.u32[i] != 0) + break; + + // To avoid calling io_pread() for every four bytes + // of Stream Padding, take advantage that we read + // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and + // check them too before calling io_pread() again. + do { + stream_padding += 4; + pos -= 4; + --i; + } while (i >= 0 && buf.u32[i] == 0); + } + + // Decode the Stream Footer. + ret = lzma_stream_footer_decode(&footer_flags, buf.u8); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + // Check that the size of the Index field looks sane. + lzma_vli index_size = footer_flags.backward_size; + if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + // Set pos to the beginning of the Index. + pos -= index_size; + + // See how much memory we can use for decoding this Index. + uint64_t memlimit = hardware_memlimit_get(); + uint64_t memused = 0; + if (combined_index != NULL) { + memused = lzma_index_memused(combined_index); + if (memused > memlimit) + message_bug(); + + memlimit -= memused; + } + + // Decode the Index. + ret = lzma_index_decoder(&strm, &this_index, memlimit); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + do { + // Don't give the decoder more input than the + // Index size. + strm.avail_in = MIN(IO_BUFFER_SIZE, index_size); + if (io_pread(pair, &buf, strm.avail_in, pos)) + goto error; + + pos += strm.avail_in; + index_size -= strm.avail_in; + + strm.next_in = buf.u8; + ret = lzma_code(&strm, LZMA_RUN); + + } while (ret == LZMA_OK); + + // If the decoding seems to be successful, check also that + // the Index decoder consumed as much input as indicated + // by the Backward Size field. + if (ret == LZMA_STREAM_END) + if (index_size != 0 || strm.avail_in != 0) + ret = LZMA_DATA_ERROR; + + if (ret != LZMA_STREAM_END) { + // LZMA_BUFFER_ERROR means that the Index decoder + // would have liked more input than what the Index + // size should be according to Stream Footer. + // The message for LZMA_DATA_ERROR makes more + // sense in that case. + if (ret == LZMA_BUF_ERROR) + ret = LZMA_DATA_ERROR; + + message_error("%s: %s", pair->src_name, + message_strm(ret)); + + // If the error was too low memory usage limit, + // show also how much memory would have been needed. + if (ret == LZMA_MEMLIMIT_ERROR) { + uint64_t needed = lzma_memusage(&strm); + if (UINT64_MAX - needed < memused) + needed = UINT64_MAX; + else + needed += memused; + + message_mem_needed(V_ERROR, needed); + } + + goto error; + } + + // Decode the Stream Header and check that its Stream Flags + // match the Stream Footer. + pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; + if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + goto error; + } + + pos -= lzma_index_total_size(this_index); + if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos)) + goto error; + + ret = lzma_stream_header_decode(&header_flags, buf.u8); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + ret = lzma_stream_flags_compare(&header_flags, &footer_flags); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + + // Store the decoded Stream Flags into this_index. This is + // needed so that we can print which Check is used in each + // Stream. + ret = lzma_index_stream_flags(this_index, &footer_flags); + if (ret != LZMA_OK) + message_bug(); + + // Store also the size of the Stream Padding field. It is + // needed to show the offsets of the Streams correctly. + ret = lzma_index_stream_padding(this_index, stream_padding); + if (ret != LZMA_OK) + message_bug(); + + if (combined_index != NULL) { + // Append the earlier decoded Indexes + // after this_index. + ret = lzma_index_cat( + this_index, combined_index, NULL); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + goto error; + } + } + + combined_index = this_index; + this_index = NULL; + + } while (pos > 0); + + lzma_end(&strm); + + // All OK. Make combined_index available to the caller. + *idx = combined_index; + return false; + +error: + // Something went wrong, free the allocated memory. + lzma_end(&strm); + lzma_index_end(combined_index, NULL); + lzma_index_end(this_index, NULL); + return true; +} + + +/// \brief Get the compression ratio +/// +/// This has slightly different format than that is used by in message.c. +static const char * +get_ratio(uint64_t compressed_size, uint64_t uncompressed_size) +{ + if (uncompressed_size == 0) + return "---"; + + const double ratio = (double)(compressed_size) + / (double)(uncompressed_size); + if (ratio > 9.999) + return "---"; + + static char buf[6]; + snprintf(buf, sizeof(buf), "%.3f", ratio); + return buf; +} + + +static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = { + "None", + "CRC32", + "Unknown-2", + "Unknown-3", + "CRC64", + "Unknown-5", + "Unknown-6", + "Unknown-7", + "Unknown-8", + "Unknown-9", + "SHA-256", + "Unknown-11", + "Unknown-12", + "Unknown-13", + "Unknown-14", + "Unknown-15", +}; + + +/// \brief Get a comma-separated list of Check names +/// +/// \param checks Bit mask of Checks to print +/// \param space_after_comma +/// It's better to not use spaces in table-like listings, +/// but in more verbose formats a space after a comma +/// is good for readability. +static const char * +get_check_names(uint32_t checks, bool space_after_comma) +{ + assert(checks != 0); + + static char buf[sizeof(check_names)]; + char *pos = buf; + size_t left = sizeof(buf); + + const char *sep = space_after_comma ? ", " : ","; + bool comma = false; + + for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) { + if (checks & (UINT32_C(1) << i)) { + my_snprintf(&pos, &left, "%s%s", + comma ? sep : "", check_names[i]); + comma = true; + } + } + + return buf; +} + + +/// \brief Read the Check value from the .xz file and print it +/// +/// Since this requires a seek, listing all Check values for all Blocks can +/// be slow. +/// +/// \param pair Input file +/// \param iter Location of the Block whose Check value should +/// be printed. +/// +/// \return False on success, true on I/O error. +static bool +print_check_value(file_pair *pair, const lzma_index_iter *iter) +{ + // Don't read anything from the file if there is no integrity Check. + if (iter->stream.flags->check == LZMA_CHECK_NONE) { + printf("---"); + return false; + } + + // Locate and read the Check field. + const uint32_t size = lzma_check_size(iter->stream.flags->check); + const off_t offset = iter->block.compressed_file_offset + + iter->block.total_size - size; + io_buf buf; + if (io_pread(pair, &buf, size, offset)) + return true; + + // CRC32 and CRC64 are in little endian. Guess that all the future + // 32-bit and 64-bit Check values are little endian too. It shouldn't + // be a too big problem if this guess is wrong. + if (size == 4) { + printf("%08" PRIx32, conv32le(buf.u32[0])); + } else if (size == 8) { + printf("%016" PRIx64, conv64le(buf.u64[0])); + } else { + for (size_t i = 0; i < size; ++i) + printf("%02x", buf.u8[i]); + } + + return false; } static void -list_alone(const listing_handle *handle) +print_info_basic(const lzma_index *idx, file_pair *pair) { - if (handle->buffer[0] > (4 * 5 + 4) * 9 + 8) { - unsupported_file(handle); - return; + static bool headings_displayed = false; + if (!headings_displayed) { + headings_displayed = true; + // TRANSLATORS: These are column titles. From Strms (Streams) + // to Ratio, the columns are right aligned. Check and Filename + // are left aligned. If you need longer words, it's OK to + // use two lines here. Test with xz --list. + puts(_("Strms Blocks Compressed Uncompressed Ratio " + "Check Filename")); } - const unsigned int pb = handle->buffer[0] / (9 * 5); - handle->buffer[0] -= pb * 9 * 5; - const unsigned int lp = handle->buffer[0] / 9; - const unsigned int lc = handle->buffer[0] - lp * 9; + printf("%5s %7s %11s %11s %5s %-7s %s\n", + uint64_to_str(lzma_index_stream_count(idx), 0), + uint64_to_str(lzma_index_block_count(idx), 1), + uint64_to_nicestr(lzma_index_file_size(idx), + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(lzma_index_uncompressed_size(idx), + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx)), + get_check_names(lzma_index_checks(idx), false), + pair->src_name); - uint32_t dict = 0; - for (size_t i = 1; i < 5; ++i) { - dict <<= 8; - dict |= header[i]; + return; +} + + +static void +print_adv_helper(uint64_t stream_count, uint64_t block_count, + uint64_t compressed_size, uint64_t uncompressed_size, + uint32_t checks) +{ + printf(_(" Stream count: %s\n"), + uint64_to_str(stream_count, 0)); + printf(_(" Block count: %s\n"), + uint64_to_str(block_count, 0)); + printf(_(" Compressed size: %s\n"), + uint64_to_nicestr(compressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(_(" Uncompressed size: %s\n"), + uint64_to_nicestr(uncompressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(_(" Ratio: %s\n"), + get_ratio(compressed_size, uncompressed_size)); + printf(_(" Check: %s\n"), + get_check_names(checks, true)); + return; +} + + +static void +print_info_adv(const lzma_index *idx, file_pair *pair) +{ + // Print an empty line between files. + static bool first_filename_printed = false; + if (!first_filename_printed) + first_filename_printed = true; + else + putchar('\n'); + + // Print the filename and overall information. + printf("%s (%" PRIu64 "):\n", pair->src_name, totals.files); + print_adv_helper(lzma_index_stream_count(idx), + lzma_index_block_count(idx), + lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx), + lzma_index_checks(idx)); + + // TODO: The rest of this function needs some work. Currently + // the offsets are not printed, which could be useful even when + // printed in a less accurate format. On the other hand, maybe + // this should print the information with exact byte values, + // or maybe there should be at least an option to do that. + // + // We could also display some other info. E.g. it could be useful + // to quickly see how big is the biggest Block (uncompressed size) + // and if all Blocks have Compressed Size and Uncompressed Size + // fields present, which can be used e.g. for multithreaded + // decompression. + + // Avoid printing Stream and Block lists when they wouldn't be useful. + bool show_blocks = false; + if (lzma_index_stream_count(idx) > 1) { + puts(_(" Streams:")); + puts(_(" Number Blocks Compressed " + "Uncompressed Ratio Check")); + + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) { + if (iter.stream.block_count > 1) + show_blocks = true; + + printf(" %8s %10s %11s %11s %5s %s\n", + uint64_to_str(iter.stream.number, 0), + uint64_to_str(iter.stream.block_count, 1), + uint64_to_nicestr( + iter.stream.compressed_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr( + iter.stream.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + check_names[iter.stream.flags->check]); + } } - if (dict > LZMA_DICTIONARY_SIZE_MAX) { - unsupported_file(handle); - return; + if (show_blocks || lzma_index_block_count(idx) + > lzma_index_stream_count(idx) + || message_verbosity_get() >= V_DEBUG) { + puts(_(" Blocks:")); + // FIXME: Number in Stream/file, which one is better? + puts(_(" Stream Number Compressed " + "Uncompressed Ratio Check")); + + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + printf(" %8s %10s %11s %11s %5s %-7s", + uint64_to_str(iter.stream.number, 0), + uint64_to_str(iter.block.number_in_stream, 1), + uint64_to_nicestr(iter.block.total_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr( + iter.block.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + check_names[iter.stream.flags->check]); + + if (message_verbosity_get() >= V_DEBUG) + if (print_check_value(pair, &iter)) + return; + + putchar('\n'); + } } +} - uint64_t uncompressed_size = 0; - for (size_t i = 5; i < 13; ++i) { - uncompressed_size <<= 8; - uncompressed_size |= header[i]; - } - // Reject files with uncompressed size of 256 GiB or more. It's - // an arbitrary limit trying to avoid at least some false positives. - if (uncompressed_size != UINT64_MAX - && uncompressed_size >= (UINT64_C(1) << 38)) { - unsupported_file(handle); - return; - } +static void +print_info_robot(const lzma_index *idx, file_pair *pair) +{ + printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%s\n", + lzma_index_stream_count(idx), + lzma_index_block_count(idx), + lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx), + get_ratio(lzma_index_file_size(idx), + lzma_index_uncompressed_size(idx)), + get_check_names(lzma_index_checks(idx), false), + pair->src_name); - if (verbosity < V_WARNING) { - printf("name="); - print_escaped(handle->name); - printf("\nformat=alone\n"); + if (message_verbosity_get() >= V_VERBOSE) { + lzma_index_iter iter; + lzma_index_iter_init(&iter, idx); - if (uncompressed_size == UINT64_MAX) - printf("uncompressed_size=unknown\n"); - else - printf("uncompressed_size=%" PRIu64 "\n", - uncompressed_size); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) + printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%" PRIu64 "\t%s\n", + iter.stream.number, + iter.stream.compressed_offset, + iter.stream.uncompressed_offset, + iter.stream.compressed_size, + iter.stream.uncompressed_size, + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + iter.stream.padding, + check_names[iter.stream.flags->check]); - printf("dict=%" PRIu32 "\n", dict); + lzma_index_iter_rewind(&iter); + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s", + iter.stream.number, + iter.block.number_in_stream, + iter.block.number_in_file, + iter.block.compressed_file_offset, + iter.block.uncompressed_file_offset, + iter.block.total_size, + iter.block.uncompressed_size, + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + check_names[iter.stream.flags->check]); - printf("lc=%u\nlp=%u\npb=%u\n\n", lc, lp, pb); + if (message_verbosity_get() >= V_DEBUG) { + putchar('\t'); + if (print_check_value(pair, &iter)) + return; + } - } else { - printf("File name: "); - print_escaped(handle->name); - printf("\nFile format: LZMA_Alone\n") - - printf("Uncompressed size: "); - if (uncompressed_size == UINT64_MAX) - printf("unknown\n"); - else - printf("%," PRIu64 " bytes (%" PRIu64 " MiB)\n", - uncompressed_size, - (uncompressed_size + 1024 * 512) - / (1024 * 1024)); - - printf("Dictionary size: %," PRIu32 " bytes " - "(%" PRIu32 " MiB)\n", - dict, (dict + 1024 * 512) / (1024 * 1024)); - - printf("Literal context bits (lc): %u\n", lc); - printf("Literal position bits (lc): %u\n", lp); - printf("Position bits (pb): %u\n", pb); + putchar('\n'); + } } return; } - - -typedef struct { - const char *filename; - struct stat st; - int fd; - - lzma_stream strm; - lzma_stream_flags stream_flags; - lzma_info *info; - - lzma_vli backward_size; - lzma_vli uncompressed_size; - - size_t buffer_size; - uint8_t buffer[IO_BUFFER_SIZE]; -} listing_handle; - - -static bool -listing_pread(listing_handle *handle, uint64_t offset) +static void +update_totals(const lzma_index *idx) { - if (offset >= (uint64_t)(handle->st.st_size)) { - errmsg(V_ERROR, "%s: Trying to read past the end of " - "the file.", handle->filename); - return true; - } - -#ifdef HAVE_PREAD - const ssize_t ret = pread(handle->fd, handle->buffer, IO_BUFFER_SIZE, - (off_t)(offset)); -#else - // Use lseek() + read() since we don't have pread(). We don't care - // to which offset the reading position is left. - if (lseek(handle->fd, (off_t)(offset), SEEK_SET) == -1) { - errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno)); - return true; - } - - const ssize_t ret = read(handle->fd, handle->buffer, IO_BUFFER_SIZE); -#endif - - if (ret == -1) { - errmsg(V_ERROR, "%s: %s", handle->filename, strerror(errno)); - return true; - } - - if (ret == 0) { - errmsg(V_ERROR, "%s: Trying to read past the end of " - "the file.", handle->filename); - return true; - } - - handle->buffer_size = (size_t)(ret); - return false; + // TODO: Integer overflow checks + ++totals.files; + totals.streams += lzma_index_stream_count(idx); + totals.blocks += lzma_index_block_count(idx); + totals.compressed_size += lzma_index_file_size(idx); + totals.uncompressed_size += lzma_index_uncompressed_size(idx); + totals.checks |= lzma_index_checks(idx); + return; } - -static bool -parse_stream_header(listing_handle *handle) -{ - if (listing_pread(handle, 0)) - return true; - - // TODO Got enough input? - - lzma_ret ret = lzma_stream_header_decoder( - &handle->strm, &handle->stream_flags); - if (ret != LZMA_OK) { - errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); - return true; - } - - handle->strm.next_in = handle->buffer; - handle->strm.avail_in = handle->buffer_size; - ret = lzma_code(&handle->strm, LZMA_RUN); - if (ret != LZMA_STREAM_END) { - assert(ret != LZMA_OK); - errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); - return true; - } - - return false; -} - - -static bool -parse_stream_tail(listing_handle *handle) -{ - uint64_t offset = (uint64_t)(handle->st.st_size); - - // Skip padding - do { - if (offset == 0) { - errmsg(V_ERROR, "%s: %s", handle->name, - str_strm_error(LZMA_DATA_ERROR)); - return true; - } - - if (offset < IO_BUFFER_SIZE) - offset = 0; - else - offset -= IO_BUFFER_SIZE; - - if (listing_pread(handle, offset)) - return true; - - while (handle->buffer_size > 0 - && handle->buffer[handle->buffer_size - 1] - == '\0') - --handle->buffer_size; - - } while (handle->buffer_size == 0); - - if (handle->buffer_size < LZMA_STREAM_TAIL_SIZE) { - // TODO - } - - lzma_stream_flags stream_flags; - lzma_ret ret = lzma_stream_tail_decoder(&handle->strm, &stream_flags); - if (ret != LZMA_OK) { - errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); - return true; - } - - handle->strm.next_in = handle->buffer + handle->buffer_size - - LZMA_STREAM_TAIL_SIZE; - handle->strm.avail_in = LZMA_STREAM_TAIL_SIZE; - handle->buffer_size -= LZMA_STREAM_TAIL_SIZE; - ret = lzma_code(&handle->strm, LZMA_RUN); - if (ret != LZMA_OK) { - assert(ret != LZMA_OK); - errmsg(V_ERROR, "%s: %s", handle->name, str_strm_error(ret)); - return true; - } - - if (!lzma_stream_flags_is_equal(handle->stream_flags, stream_flags)) { - // TODO - // Possibly corrupt, possibly concatenated file. - } - - handle->backward_size = 0; - ret = lzma_vli_reverse_decode(&handle->backward_size, handle->buffer, - &handle->buffer_size); - if (ret != LZMA_OK) { - // It may be LZMA_BUF_ERROR too, but it doesn't make sense - // as an error message displayed to the user. - errmsg(V_ERROR, "%s: %s", handle->name, - str_strm_error(LZMA_DATA_ERROR)); - return true; - } - - if (!stream_flags.is_multi) { - handle->uncompressed_size = 0; - size_t tmp = handle->buffer_size; - ret = lzma_vli_reverse_decode(&handle->uncompressed_size, - handle->buffer, &tmp); - if (ret != LZMA_OK) - handle->uncompressed_size = LZMA_VLI_UNKNOWN; - } - - // Calculate the Header Metadata Block start offset. - - - return false; -} - - - static void -list_native(listing_handle *handle) +print_totals_basic(void) { - lzma_memory_limiter *limiter - = lzma_memory_limiter_create(opt_memory); - if (limiter == NULL) { - errmsg(V_ERROR, - } - lzma_info *info = + // Print a separator line. + char line[80]; + memset(line, '-', sizeof(line)); + line[sizeof(line) - 1] = '\0'; + puts(line); + // Print the totals except the file count, which needs + // special handling. + printf("%5s %7s %11s %11s %5s %-7s ", + uint64_to_str(totals.streams, 0), + uint64_to_str(totals.blocks, 1), + uint64_to_nicestr(totals.compressed_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(totals.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(totals.compressed_size, + totals.uncompressed_size), + get_check_names(totals.checks, false)); - // Parse Stream Header + // Since we print totals only when there are at least two files, + // the English message will always use "%s files". But some other + // languages need different forms for different plurals so we + // have to translate this string still. // - // Single-Block Stream: - // - Parse Block Header - // - Parse Stream Footer - // - If Backward Size doesn't match, error out - // - // Multi-Block Stream: - // - Parse Header Metadata Block, if any - // - Parse Footer Metadata Block - // - Parse Stream Footer - // - If Footer Metadata Block doesn't match the Stream, error out - // - // In other words, we don't support concatened files. - if (parse_stream_header(handle)) - return; + // TRANSLATORS: This simply indicates the number of files shown + // by --list even though the format string uses %s. + printf(N_("%s file", "%s files\n", + totals.files <= ULONG_MAX ? totals.files + : (totals.files % 1000000) + 1000000), + uint64_to_str(totals.files, 0)); - if (parse_block_header(handle)) - return; - - if (handle->stream_flags.is_multi) { - if (handle->block_options.is_metadata) { - if (parse_metadata(handle) - return; - } - - if (my_seek(handle, - - } else { - if (handle->block_options.is_metadata) { - FILE_IS_CORRUPT(); - return; - } - - if (parse_stream_footer(handle)) - return; - - // If Uncompressed Size isn't present in Block Header, - // it must be present in Stream Footer. - if (handle->block_options.uncompressed_size - == LZMA_VLI_UNKNOWN - && handle->stream_flags.uncompressed_size - == LZMA_VLI_UNKNOWN) { - FILE_IS_CORRUPT(); - return; - } - - // Construct a single-Record Index. - lzma_index *index = malloc(sizeof(lzma_index)); - if (index == NULL) { - out_of_memory(); - return; - } - - // Pohdintaa: - // Jos Block coder hoitaisi Uncompressed ja Backward Sizet, - // voisi index->total_sizeksi laittaa suoraan Backward Sizen. - index->total_size = - - if () { - - } - } - - - if (handle->block_options.is_metadata) { - if (!handle->stream_flags.is_multi) { - FILE_IS_CORRUPT(); - return; - } - - if (parse_metadata(handle)) - return; - - } + return; } +static void +print_totals_adv(void) +{ + putchar('\n'); + puts(_("Totals:")); + printf(_(" Number of files: %s\n"), + uint64_to_str(totals.files, 0)); + print_adv_helper(totals.streams, totals.blocks, + totals.compressed_size, totals.uncompressed_size, + totals.checks); + + return; +} + + +static void +print_totals_robot(void) +{ + printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%" PRIu64 "\n", + totals.streams, + totals.blocks, + totals.compressed_size, + totals.uncompressed_size, + get_ratio(totals.compressed_size, + totals.uncompressed_size), + get_check_names(totals.checks, false), + totals.files); + + return; +} + extern void -list(const char *filename) +list_totals(void) { + if (opt_robot) { + // Always print totals in --robot mode. It can be convenient + // in some cases and doesn't complicate usage of the + // single-file case much. + print_totals_robot(); + + } else if (totals.files > 1) { + // For non-robot mode, totals are printed only if there + // is more than one file. + if (message_verbosity_get() <= V_WARNING) + print_totals_basic(); + else + print_totals_adv(); + } + + return; +} + + +extern void +list_file(const char *filename) +{ + if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO) + message_fatal(_("--list works only on .xz files " + "(--format=xz or --format=auto)")); + if (strcmp(filename, "-") == 0) { - errmsg(V_ERROR, "%s: --list does not support reading from " - "standard input", filename); + message_error(_("--list does not support reading from " + "standard input")); return; } if (is_empty_filename(filename)) return; - listing_handle handle; - handle.filename = filename; - - handle.fd = open(filename, O_RDONLY | O_NOCTTY); - if (handle.fd == -1) { - errmsg(V_ERROR, "%s: %s", filename, strerror(errno)); + // Set opt_stdout so that io_open() won't create a new file. + // Disable also sparse mode so that it doesn't remove O_APPEND + // from stdout. + opt_stdout = true; + io_no_sparse(); + file_pair *pair = io_open(filename); + if (pair == NULL) return; + + lzma_index *idx; + if (!parse_indexes(&idx, pair)) { + // Update the totals that are displayed after all + // the individual files have been listed. + update_totals(idx); + + // We have three main modes: + // - --robot, which has submodes if --verbose is specified + // once or twice + // - Normal --list without --verbose + // - --list with one or two --verbose + if (opt_robot) + print_info_robot(idx, pair); + else if (message_verbosity_get() <= V_WARNING) + print_info_basic(idx, pair); + else + print_info_adv(idx, pair); + + lzma_index_end(idx, NULL); } - if (fstat(handle.fd, &handle.st)) { - errmsg(V_ERROR, "%s: %s", filename, strerror(errno)); - goto out; - } - - if (!S_ISREG(handle.st.st_mode)) { - errmsg(V_WARNING, _("%s: Not a regular file, skipping"), - filename); - goto out; - } - - if (handle.st.st_size <= 0) { - errmsg(V_ERROR, _("%s: File is empty"), filename); - goto out; - } - - if (listing_pread(&handle, 0)) - goto out; - - if (handle.buffer[0] == 0xFF) { - if (opt_header == HEADER_ALONE) { - errmsg(V_ERROR, "%s: FIXME", filename); // FIXME - goto out; - } - - list_native(&handle); - } else { - if (opt_header != HEADER_AUTO && opt_header != HEADER_ALONE) { - errmsg(V_ERROR, "%s: FIXME", filename); // FIXME - goto out; - } - - list_alone(&handle); - } - -out: - (void)close(fd); + io_close(pair, false); return; } diff --git a/src/xz/list.h b/src/xz/list.h new file mode 100644 index 00000000..a4c6ec7d --- /dev/null +++ b/src/xz/list.h @@ -0,0 +1,18 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.h +/// \brief List information about .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief List information about the given .xz file +extern void list_file(const char *filename); + + +/// \brief Show the totals after all files have been listed +extern void list_totals(void); diff --git a/src/xz/main.c b/src/xz/main.c index 7445e98a..a2681f23 100644 --- a/src/xz/main.c +++ b/src/xz/main.c @@ -153,10 +153,7 @@ main(int argc, char **argv) args_info args; args_parse(&args, argc, argv); - if (opt_mode == MODE_LIST) - message_fatal("--list is not implemented yet."); - - if (opt_robot) + if (opt_mode != MODE_LIST && opt_robot) message_fatal(_("Compression and decompression with --robot " "are not supported yet.")); @@ -184,6 +181,11 @@ main(int argc, char **argv) // line arguments. signals_init(); + // coder_run() handles compression, decopmression, and testing. + // list_file() is for --list. + void (*run)(const char *filename) = opt_mode == MODE_LIST + ? &list_file : &coder_run; + // Process the files given on the command line. Note that if no names // were given, parse_args() gave us a fake "-" filename. for (size_t i = 0; i < args.arg_count && !user_abort; ++i) { @@ -218,7 +220,7 @@ main(int argc, char **argv) } // Do the actual compression or uncompression. - coder_run(args.arg_names[i]); + run(args.arg_names[i]); } // If --files or --files0 was used, process the filenames from the @@ -234,13 +236,18 @@ main(int argc, char **argv) // read_name() doesn't return empty names. assert(name[0] != '\0'); - coder_run(name); + run(name); } if (args.files_name != stdin_filename) (void)fclose(args.files_file); } + // All files have now been handled. If in --list mode, display + // the totals before exiting. + if (opt_mode == MODE_LIST) + list_totals(); + // If we have got a signal, raise it to kill the program instead // of calling tuklib_exit(). signals_exit(); diff --git a/src/xz/private.h b/src/xz/private.h index 8d9ce978..b5434357 100644 --- a/src/xz/private.h +++ b/src/xz/private.h @@ -48,3 +48,4 @@ #include "signals.h" #include "suffix.h" #include "util.h" +#include "list.h"