17fa27ce4SDimitry Andric //===-- ZipFile.cpp -------------------------------------------------------===//
27fa27ce4SDimitry Andric //
37fa27ce4SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47fa27ce4SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
57fa27ce4SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67fa27ce4SDimitry Andric //
77fa27ce4SDimitry Andric //===----------------------------------------------------------------------===//
87fa27ce4SDimitry Andric
97fa27ce4SDimitry Andric #include "lldb/Utility/ZipFile.h"
107fa27ce4SDimitry Andric #include "lldb/Utility/DataBuffer.h"
117fa27ce4SDimitry Andric #include "lldb/Utility/FileSpec.h"
127fa27ce4SDimitry Andric #include "llvm/Support/Endian.h"
137fa27ce4SDimitry Andric
147fa27ce4SDimitry Andric using namespace lldb_private;
157fa27ce4SDimitry Andric using namespace llvm::support;
167fa27ce4SDimitry Andric
177fa27ce4SDimitry Andric namespace {
187fa27ce4SDimitry Andric
197fa27ce4SDimitry Andric // Zip headers.
207fa27ce4SDimitry Andric // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
217fa27ce4SDimitry Andric
227fa27ce4SDimitry Andric // The end of central directory record.
237fa27ce4SDimitry Andric struct EocdRecord {
247fa27ce4SDimitry Andric static constexpr char kSignature[] = {0x50, 0x4b, 0x05, 0x06};
257fa27ce4SDimitry Andric char signature[sizeof(kSignature)];
267fa27ce4SDimitry Andric unaligned_uint16_t disks;
277fa27ce4SDimitry Andric unaligned_uint16_t cd_start_disk;
287fa27ce4SDimitry Andric unaligned_uint16_t cds_on_this_disk;
297fa27ce4SDimitry Andric unaligned_uint16_t cd_records;
307fa27ce4SDimitry Andric unaligned_uint32_t cd_size;
317fa27ce4SDimitry Andric unaligned_uint32_t cd_offset;
327fa27ce4SDimitry Andric unaligned_uint16_t comment_length;
337fa27ce4SDimitry Andric };
347fa27ce4SDimitry Andric
357fa27ce4SDimitry Andric // Logical find limit for the end of central directory record.
367fa27ce4SDimitry Andric const size_t kEocdRecordFindLimit =
377fa27ce4SDimitry Andric sizeof(EocdRecord) +
387fa27ce4SDimitry Andric std::numeric_limits<decltype(EocdRecord::comment_length)>::max();
397fa27ce4SDimitry Andric
407fa27ce4SDimitry Andric // Central directory record.
417fa27ce4SDimitry Andric struct CdRecord {
427fa27ce4SDimitry Andric static constexpr char kSignature[] = {0x50, 0x4b, 0x01, 0x02};
437fa27ce4SDimitry Andric char signature[sizeof(kSignature)];
447fa27ce4SDimitry Andric unaligned_uint16_t version_made_by;
457fa27ce4SDimitry Andric unaligned_uint16_t version_needed_to_extract;
467fa27ce4SDimitry Andric unaligned_uint16_t general_purpose_bit_flag;
477fa27ce4SDimitry Andric unaligned_uint16_t compression_method;
487fa27ce4SDimitry Andric unaligned_uint16_t last_modification_time;
497fa27ce4SDimitry Andric unaligned_uint16_t last_modification_date;
507fa27ce4SDimitry Andric unaligned_uint32_t crc32;
517fa27ce4SDimitry Andric unaligned_uint32_t compressed_size;
527fa27ce4SDimitry Andric unaligned_uint32_t uncompressed_size;
537fa27ce4SDimitry Andric unaligned_uint16_t file_name_length;
547fa27ce4SDimitry Andric unaligned_uint16_t extra_field_length;
557fa27ce4SDimitry Andric unaligned_uint16_t comment_length;
567fa27ce4SDimitry Andric unaligned_uint16_t file_start_disk;
577fa27ce4SDimitry Andric unaligned_uint16_t internal_file_attributes;
587fa27ce4SDimitry Andric unaligned_uint32_t external_file_attributes;
597fa27ce4SDimitry Andric unaligned_uint32_t local_file_header_offset;
607fa27ce4SDimitry Andric };
617fa27ce4SDimitry Andric // Immediately after CdRecord,
627fa27ce4SDimitry Andric // - file name (file_name_length)
637fa27ce4SDimitry Andric // - extra field (extra_field_length)
647fa27ce4SDimitry Andric // - comment (comment_length)
657fa27ce4SDimitry Andric
667fa27ce4SDimitry Andric // Local file header.
677fa27ce4SDimitry Andric struct LocalFileHeader {
687fa27ce4SDimitry Andric static constexpr char kSignature[] = {0x50, 0x4b, 0x03, 0x04};
697fa27ce4SDimitry Andric char signature[sizeof(kSignature)];
707fa27ce4SDimitry Andric unaligned_uint16_t version_needed_to_extract;
717fa27ce4SDimitry Andric unaligned_uint16_t general_purpose_bit_flag;
727fa27ce4SDimitry Andric unaligned_uint16_t compression_method;
737fa27ce4SDimitry Andric unaligned_uint16_t last_modification_time;
747fa27ce4SDimitry Andric unaligned_uint16_t last_modification_date;
757fa27ce4SDimitry Andric unaligned_uint32_t crc32;
767fa27ce4SDimitry Andric unaligned_uint32_t compressed_size;
777fa27ce4SDimitry Andric unaligned_uint32_t uncompressed_size;
787fa27ce4SDimitry Andric unaligned_uint16_t file_name_length;
797fa27ce4SDimitry Andric unaligned_uint16_t extra_field_length;
807fa27ce4SDimitry Andric };
817fa27ce4SDimitry Andric // Immediately after LocalFileHeader,
827fa27ce4SDimitry Andric // - file name (file_name_length)
837fa27ce4SDimitry Andric // - extra field (extra_field_length)
847fa27ce4SDimitry Andric // - file data (should be compressed_size == uncompressed_size, page aligned)
857fa27ce4SDimitry Andric
FindEocdRecord(lldb::DataBufferSP zip_data)867fa27ce4SDimitry Andric const EocdRecord *FindEocdRecord(lldb::DataBufferSP zip_data) {
877fa27ce4SDimitry Andric // Find backward the end of central directory record from the end of the zip
887fa27ce4SDimitry Andric // file to the find limit.
897fa27ce4SDimitry Andric const uint8_t *zip_data_end = zip_data->GetBytes() + zip_data->GetByteSize();
907fa27ce4SDimitry Andric const uint8_t *find_limit = zip_data_end - kEocdRecordFindLimit;
917fa27ce4SDimitry Andric const uint8_t *p = zip_data_end - sizeof(EocdRecord);
927fa27ce4SDimitry Andric for (; p >= zip_data->GetBytes() && p >= find_limit; p--) {
937fa27ce4SDimitry Andric auto eocd = reinterpret_cast<const EocdRecord *>(p);
947fa27ce4SDimitry Andric if (::memcmp(eocd->signature, EocdRecord::kSignature,
957fa27ce4SDimitry Andric sizeof(EocdRecord::kSignature)) == 0) {
967fa27ce4SDimitry Andric // Found the end of central directory. Sanity check the values.
977fa27ce4SDimitry Andric if (eocd->cd_records * sizeof(CdRecord) > eocd->cd_size ||
987fa27ce4SDimitry Andric zip_data->GetBytes() + eocd->cd_offset + eocd->cd_size > p)
997fa27ce4SDimitry Andric return nullptr;
1007fa27ce4SDimitry Andric
1017fa27ce4SDimitry Andric // This is a valid end of central directory record.
1027fa27ce4SDimitry Andric return eocd;
1037fa27ce4SDimitry Andric }
1047fa27ce4SDimitry Andric }
1057fa27ce4SDimitry Andric return nullptr;
1067fa27ce4SDimitry Andric }
1077fa27ce4SDimitry Andric
GetFile(lldb::DataBufferSP zip_data,uint32_t local_file_header_offset,lldb::offset_t & file_offset,lldb::offset_t & file_size)1087fa27ce4SDimitry Andric bool GetFile(lldb::DataBufferSP zip_data, uint32_t local_file_header_offset,
1097fa27ce4SDimitry Andric lldb::offset_t &file_offset, lldb::offset_t &file_size) {
1107fa27ce4SDimitry Andric auto local_file_header = reinterpret_cast<const LocalFileHeader *>(
1117fa27ce4SDimitry Andric zip_data->GetBytes() + local_file_header_offset);
1127fa27ce4SDimitry Andric // The signature should match.
1137fa27ce4SDimitry Andric if (::memcmp(local_file_header->signature, LocalFileHeader::kSignature,
1147fa27ce4SDimitry Andric sizeof(LocalFileHeader::kSignature)) != 0)
1157fa27ce4SDimitry Andric return false;
1167fa27ce4SDimitry Andric
1177fa27ce4SDimitry Andric auto file_data = reinterpret_cast<const uint8_t *>(local_file_header + 1) +
1187fa27ce4SDimitry Andric local_file_header->file_name_length +
1197fa27ce4SDimitry Andric local_file_header->extra_field_length;
1207fa27ce4SDimitry Andric // File should be uncompressed.
1217fa27ce4SDimitry Andric if (local_file_header->compressed_size !=
1227fa27ce4SDimitry Andric local_file_header->uncompressed_size)
1237fa27ce4SDimitry Andric return false;
1247fa27ce4SDimitry Andric
1257fa27ce4SDimitry Andric // This file is valid. Return the file offset and size.
1267fa27ce4SDimitry Andric file_offset = file_data - zip_data->GetBytes();
1277fa27ce4SDimitry Andric file_size = local_file_header->uncompressed_size;
1287fa27ce4SDimitry Andric return true;
1297fa27ce4SDimitry Andric }
1307fa27ce4SDimitry Andric
FindFile(lldb::DataBufferSP zip_data,const EocdRecord * eocd,const llvm::StringRef file_path,lldb::offset_t & file_offset,lldb::offset_t & file_size)1317fa27ce4SDimitry Andric bool FindFile(lldb::DataBufferSP zip_data, const EocdRecord *eocd,
1327fa27ce4SDimitry Andric const llvm::StringRef file_path, lldb::offset_t &file_offset,
1337fa27ce4SDimitry Andric lldb::offset_t &file_size) {
1347fa27ce4SDimitry Andric // Find the file from the central directory records.
1357fa27ce4SDimitry Andric auto cd = reinterpret_cast<const CdRecord *>(zip_data->GetBytes() +
1367fa27ce4SDimitry Andric eocd->cd_offset);
1377fa27ce4SDimitry Andric size_t cd_records = eocd->cd_records;
1387fa27ce4SDimitry Andric for (size_t i = 0; i < cd_records; i++) {
1397fa27ce4SDimitry Andric // The signature should match.
1407fa27ce4SDimitry Andric if (::memcmp(cd->signature, CdRecord::kSignature,
1417fa27ce4SDimitry Andric sizeof(CdRecord::kSignature)) != 0)
1427fa27ce4SDimitry Andric return false;
1437fa27ce4SDimitry Andric
1447fa27ce4SDimitry Andric // Sanity check the file name values.
1457fa27ce4SDimitry Andric auto file_name = reinterpret_cast<const char *>(cd + 1);
1467fa27ce4SDimitry Andric size_t file_name_length = cd->file_name_length;
1477fa27ce4SDimitry Andric if (file_name + file_name_length >= reinterpret_cast<const char *>(eocd) ||
1487fa27ce4SDimitry Andric file_name_length == 0)
1497fa27ce4SDimitry Andric return false;
1507fa27ce4SDimitry Andric
1517fa27ce4SDimitry Andric // Compare the file name.
1527fa27ce4SDimitry Andric if (file_path == llvm::StringRef(file_name, file_name_length)) {
1537fa27ce4SDimitry Andric // Found the file.
1547fa27ce4SDimitry Andric return GetFile(zip_data, cd->local_file_header_offset, file_offset,
1557fa27ce4SDimitry Andric file_size);
1567fa27ce4SDimitry Andric } else {
1577fa27ce4SDimitry Andric // Skip to the next central directory record.
1587fa27ce4SDimitry Andric cd = reinterpret_cast<const CdRecord *>(
1597fa27ce4SDimitry Andric reinterpret_cast<const char *>(cd) + sizeof(CdRecord) +
1607fa27ce4SDimitry Andric cd->file_name_length + cd->extra_field_length + cd->comment_length);
1617fa27ce4SDimitry Andric // Sanity check the pointer.
1627fa27ce4SDimitry Andric if (reinterpret_cast<const char *>(cd) >=
1637fa27ce4SDimitry Andric reinterpret_cast<const char *>(eocd))
1647fa27ce4SDimitry Andric return false;
1657fa27ce4SDimitry Andric }
1667fa27ce4SDimitry Andric }
1677fa27ce4SDimitry Andric
1687fa27ce4SDimitry Andric return false;
1697fa27ce4SDimitry Andric }
1707fa27ce4SDimitry Andric
1717fa27ce4SDimitry Andric } // end anonymous namespace
1727fa27ce4SDimitry Andric
Find(lldb::DataBufferSP zip_data,const llvm::StringRef file_path,lldb::offset_t & file_offset,lldb::offset_t & file_size)1737fa27ce4SDimitry Andric bool ZipFile::Find(lldb::DataBufferSP zip_data, const llvm::StringRef file_path,
1747fa27ce4SDimitry Andric lldb::offset_t &file_offset, lldb::offset_t &file_size) {
1757fa27ce4SDimitry Andric const EocdRecord *eocd = FindEocdRecord(zip_data);
1767fa27ce4SDimitry Andric if (!eocd)
1777fa27ce4SDimitry Andric return false;
1787fa27ce4SDimitry Andric
1797fa27ce4SDimitry Andric return FindFile(zip_data, eocd, file_path, file_offset, file_size);
1807fa27ce4SDimitry Andric }
181