diff --git a/examples/pdb2hpp.rs b/examples/pdb2hpp.rs index 0235d45..3e146bb 100644 --- a/examples/pdb2hpp.rs +++ b/examples/pdb2hpp.rs @@ -7,8 +7,8 @@ use pdb::FallibleIterator; type TypeSet = BTreeSet; -pub fn type_name<'p>( - type_finder: &pdb::TypeFinder<'p>, +pub fn type_name( + type_finder: &pdb::TypeFinder<'_>, type_index: pdb::TypeIndex, needed_types: &mut TypeSet, ) -> pdb::Result { @@ -359,8 +359,8 @@ impl<'p> Method<'p> { } } -fn argument_list<'p>( - type_finder: &pdb::TypeFinder<'p>, +fn argument_list( + type_finder: &pdb::TypeFinder<'_>, type_index: pdb::TypeIndex, needed_types: &mut TypeSet, ) -> pdb::Result> { diff --git a/examples/pdb_symbols.rs b/examples/pdb_symbols.rs index bf6d3eb..0d75027 100644 --- a/examples/pdb_symbols.rs +++ b/examples/pdb_symbols.rs @@ -3,7 +3,7 @@ use std::env; use pdb2 as pdb; use getopts::Options; -use pdb::{FallibleIterator, PdbInternalSectionOffset}; +use pdb::{FallibleIterator, PdbInternalSectionOffset, RawString}; fn print_usage(program: &str, opts: Options) { let brief = format!("Usage: {} input.pdb", program); @@ -28,6 +28,15 @@ fn print_symbol(symbol: &pdb::Symbol<'_>) -> pdb::Result<()> { pdb::SymbolData::Procedure(data) => { print_row(data.offset, "function", data.name); } + pdb::SymbolData::ManagedProcedure(data) => { + match data.name { + None => print_row(data.offset, "function", RawString::from(&b""[..])), + Some(name) => print_row(data.offset, "function", name), + } + } + pdb::SymbolData::ManagedSlot(data) => { + print_row(data.offset, "data", data.name); + } _ => { // ignore everything else } diff --git a/src/common.rs b/src/common.rs index 893a857..937b5af 100644 --- a/src/common.rs +++ b/src/common.rs @@ -14,7 +14,7 @@ use std::result; use std::slice; use scroll::ctx::TryFromCtx; -use scroll::{self, Endian, Pread, LE}; +use scroll::{Endian, Pread, LE}; use crate::tpi::constants; @@ -581,7 +581,7 @@ pub trait ItemIndex: /// [`ModuleInfo`](crate::ModuleInfo). Note that this comparison needs to be done /// case-insensitively as the name in the DBI stream and name table are known to not /// have matching cases. - /// 4. Resolve the [`Local`](crate::Local) index into a global one using + /// 4. Resolve the [`Local`] index into a global one using /// [`CrossModuleExports`](crate::CrossModuleExports). /// /// Cross module references are specially formatted indexes with the most significant bit set to @@ -605,6 +605,14 @@ impl_pread!(TypeIndex); impl ItemIndex for TypeIndex {} +/// COM+ metadata token for managed procedures (`CV_tkn_t`). +#[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct COMToken(pub u32); + +impl_convert!(COMToken, u32); +impl_hex_fmt!(COMToken); +impl_pread!(COMToken); + /// Index of an [`Id`](crate::Id) in [`IdInformation`](crate::IdInformation) stream. /// /// If this index is a [cross module reference](ItemIndex::is_cross_module), it must be resolved diff --git a/src/dbi.rs b/src/dbi.rs index 86379cb..2993cb6 100644 --- a/src/dbi.rs +++ b/src/dbi.rs @@ -88,6 +88,17 @@ impl<'s> DebugInformation<'s> { } } + /// Returns whether or not this PDB has been marked as stripped. Stripped PDBs do not contain + /// type information, line number information, or per-object CV symbols. + /// + /// This flag is set when a PDB is written with [/PDBSTRIPPED] by MSVC. + /// + /// [/PDBSTRIPPED]: https://learn.microsoft.com/en-us/cpp/build/reference/pdbstripped-strip-private-symbols?view=msvc-170 + pub fn is_stripped(&self) -> bool { + // flags.fStripped + (self.header.flags & 0x2) != 0 + } + /// Returns an iterator that can traverse the modules list in sequential order. pub fn modules(&self) -> Result> { let mut buf = self.stream.parse_buffer(); diff --git a/src/modi/c13.rs b/src/modi/c13.rs index 645d0b0..223bb99 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -6,8 +6,8 @@ use scroll::{ctx::TryFromCtx, Endian, Pread}; use crate::common::*; use crate::modi::{ - constants, CrossModuleExport, CrossModuleRef, FileChecksum, FileIndex, FileInfo, LineInfo, - LineInfoKind, ModuleRef, + constants, CrossModuleExport, CrossModuleRef, FileChecksum, FileInfo, LineInfo, LineInfoKind, + ModuleRef, }; use crate::symbol::{BinaryAnnotation, BinaryAnnotationsIter, InlineSiteSymbol}; use crate::FallibleIterator; @@ -345,6 +345,7 @@ enum LineEntry { /// Declares a source line number. Number(LineNumberEntry), /// Declares a debugging marker. + #[allow(dead_code)] // reason = "the inner `LineMarkerEntry` is not (yet) accessed" Marker(LineMarkerEntry), } @@ -1418,8 +1419,6 @@ impl<'a> LineProgram<'a> { mod tests { use super::*; - use std::mem; - use crate::symbol::BinaryAnnotations; #[test] diff --git a/src/modi/mod.rs b/src/modi/mod.rs index cdb2372..c0e2dc7 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -49,7 +49,7 @@ impl<'s> ModuleInfo<'s> { } fn lines_data(&self, size: usize) -> &[u8] { - let start = self.symbols_size as usize; + let start = self.symbols_size; &self.stream[start..start + size] } diff --git a/src/msf/mod.rs b/src/msf/mod.rs index 92b6db3..045fb6d 100644 --- a/src/msf/mod.rs +++ b/src/msf/mod.rs @@ -299,7 +299,7 @@ mod big { let _ = stream_table.take((stream_count - stream_number - 1) as usize * 4)?; // skip the preceding streams' page numbers - let _ = stream_table.take((page_numbers_to_skip as usize) * 4)?; + let _ = stream_table.take(page_numbers_to_skip * 4)?; // we're now at the list of pages for our stream // accumulate them into a PageList @@ -344,8 +344,201 @@ mod big { } mod small { - pub const MAGIC: &[u8] = b"Microsoft C/C++ program database 2.00\r\n\x1a\x4a\x47"; - // TODO: implement SmallMSF + use super::*; + + pub const MAGIC: &[u8; 44] = b"Microsoft C/C++ program database 2.00\r\n\x1a\x4a\x47\x00\x00"; + + /// The PDB header as stored on disk. + /// + /// See the Microsoft code for reference: + #[repr(C)] + #[derive(Debug, Copy, Clone)] + struct RawHeader { + magic: [u8; 44], + page_size: u32, + free_page_map: u16, + pages_used: u16, + directory_size: u32, + _reserved: u32, + } + + impl<'t> TryFromCtx<'t, Endian> for RawHeader { + type Error = scroll::Error; + + fn try_from_ctx( + this: &'t [u8], + le: Endian, + ) -> std::result::Result<(Self, usize), Self::Error> { + let mut offset = 0; + let data = Self { + magic: { + let mut tmp = [0; 44]; + this.gread_inout_with(&mut offset, &mut tmp, le)?; + tmp + }, + page_size: this.gread_with(&mut offset, le)?, + free_page_map: this.gread_with(&mut offset, le)?, + pages_used: this.gread_with(&mut offset, le)?, + directory_size: this.gread_with(&mut offset, le)?, + _reserved: this.gread_with(&mut offset, le)?, + }; + Ok((data, offset)) + } + } + + #[derive(Debug)] + pub struct SmallMSF<'s, S> { + header: Header, + source: S, + stream_table: StreamTable<'s>, + } + + impl<'s, S: Source<'s>> SmallMSF<'s, S> { + pub fn new(mut source: S, header_view: Box>) -> Result> { + let mut buf = ParseBuffer::from(header_view.as_slice()); + + let header: RawHeader = buf.parse()?; + + if &header.magic != MAGIC { + return Err(Error::UnrecognizedFileFormat); + } + + // TODO: check if this is correct for small MSF + if header.page_size.count_ones() != 1 + || header.page_size < 0x100 + || header.page_size > (128 * 0x10000) + { + return Err(Error::InvalidPageSize(header.page_size as _)); + } + + let header_object = Header { + page_size: header.page_size as _, + maximum_valid_page_number: header.pages_used as _, + }; + + // build the stream table page list + let mut stream_table_page_list = PageList::new(header_object.page_size); + let mut i = 0; + + while i < header.directory_size { + let n = buf.parse_u16()? as u32; + stream_table_page_list.push(header_object.validate_page_number(n)?); + i += header.page_size; + } + + // truncate the stream table page list to the correct size + stream_table_page_list.truncate(header.directory_size as _); + + let stream_table_view = view(&mut source, &stream_table_page_list)?; + + Ok(SmallMSF { + header: header_object, + source, + stream_table: StreamTable::Available { stream_table_view }, + }) + } + + fn look_up_stream(&mut self, stream_number: u32) -> Result { + // ensure the stream table is available + let StreamTable::Available { + ref stream_table_view, + } = self.stream_table else { + unreachable!() + }; + + let stream_table_slice = stream_table_view.as_slice(); + let mut stream_table = ParseBuffer::from(stream_table_slice); + + // the stream table is structured as: + // stream_count: u16 + // reserved: u16 + // for _ in 0..stream_count: + // size of stream in bytes: u32 (0xffffffff indicating "stream does not exist") + // reserved: u32 + // stream 0: PageNumber: u16 + // stream 1: PageNumber: u16, PageNumber: u16 + // stream 2: PageNumber: u16, PageNumber: u16, PageNumber; u16, PageNumber: u16, PageNumber: u16 + // stream 3: PageNumber: u16, PageNumber: u16, PageNumber; u16, PageNumber: u16 + // (number of pages determined by number of bytes) + + let stream_count = stream_table.parse_u16()? as u32; + let _reserved = stream_table.parse_u16()?; + + // check if we've already outworn our welcome + if stream_number >= stream_count { + return Err(Error::StreamNotFound(stream_number)); + } + + // we now have {stream_count} u32s describing the length of each stream + + // walk over the streams before the requested stream + // we need to pay attention to how big each one is, since their page numbers come + // before our page numbers in the stream table + let mut page_numbers_to_skip: usize = 0; + + for _ in 0..stream_number { + let bytes = stream_table.parse_u32()?; + let _reserved = stream_table.parse_u32()?; + + if bytes == u32::max_value() { + // stream is not present, ergo nothing to skip + } else { + page_numbers_to_skip += self.header.pages_needed_to_store(bytes as usize); + } + } + + // read our stream's size + let bytes_in_stream = stream_table.parse_u32()?; + let _reserved = stream_table.parse_u32()?; + + if bytes_in_stream == u32::max_value() { + return Err(Error::StreamNotFound(stream_number)); + } + + let pages_in_stream = self.header.pages_needed_to_store(bytes_in_stream as usize); + + // skip the remaining streams' byte counts + let _ = stream_table.take((stream_count - stream_number - 1) as usize * 8)?; + + // skip the preceding streams' page numbers + let _ = stream_table.take(page_numbers_to_skip * 2)?; + + // we're now at the list of pages for our stream + // accumulate them into a PageList + let mut page_list = PageList::new(self.header.page_size); + + for _ in 0..pages_in_stream { + let page_number = stream_table.parse_u16()? as u32; + page_list.push(self.header.validate_page_number(page_number)?); + } + + // truncate to the size of the stream + page_list.truncate(bytes_in_stream as usize); + + // done! + Ok(page_list) + } + } + + impl<'s, S: Source<'s>> Msf<'s, S> for SmallMSF<'s, S> { + fn get(&mut self, stream_number: u32, limit: Option) -> Result> { + // look up the stream + let mut page_list = self.look_up_stream(stream_number)?; + + // apply any limits we have + if let Some(limit) = limit { + page_list.truncate(limit); + } + + // now that we know where this stream lives, we can view it + let view = view(&mut self.source, &page_list)?; + + // pack it into a Stream + let stream = Stream { source_view: view }; + + Ok(stream) + } + } } /// Represents a single Stream within the multi-stream file. @@ -414,8 +607,9 @@ pub fn open_msf<'s, S: Source<'s> + Send + 's>( } if header_matches(header_view.as_slice(), small::MAGIC) { - // sorry - return Err(Error::UnimplementedFeature("small MSF file format")); + // claimed! + let smallmsf = small::SmallMSF::new(source, header_view)?; + return Ok(Box::new(smallmsf)); } Err(Error::UnrecognizedFileFormat) diff --git a/src/msf/page_list.rs b/src/msf/page_list.rs index 88570b6..e5eace9 100644 --- a/src/msf/page_list.rs +++ b/src/msf/page_list.rs @@ -99,7 +99,6 @@ impl PageList { #[cfg(test)] mod tests { use crate::msf::page_list::*; - use crate::source::SourceSlice; #[test] fn test_push() { diff --git a/src/omap.rs b/src/omap.rs index e0c1d57..442dfa8 100644 --- a/src/omap.rs +++ b/src/omap.rs @@ -77,7 +77,7 @@ impl fmt::Debug for OMAPRecord { impl PartialOrd for OMAPRecord { #[inline] fn partial_cmp(&self, other: &Self) -> Option { - self.source_address().partial_cmp(&other.source_address()) + Some(self.cmp(other)) } } @@ -577,8 +577,6 @@ impl PdbInternalSectionOffset { mod tests { use super::*; - use std::mem; - #[test] fn test_omap_record() { assert_eq!(mem::size_of::(), 8); diff --git a/src/symbol/constants.rs b/src/symbol/constants.rs index dd8f9ac..9a02051 100644 --- a/src/symbol/constants.rs +++ b/src/symbol/constants.rs @@ -483,7 +483,7 @@ impl<'a> TryFromCtx<'a, Endian> for CPUType { } /// These values correspond to the CV_CFL_LANG enumeration, and are documented -/// [on MSDN](https://msdn.microsoft.com/en-us/library/bw3aekw6.aspx). +/// [on MSDN](https://learn.microsoft.com/en-us/visualstudio/debugger/debug-interface-access/cv-cfl-lang?view=vs-2022). #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum SourceLanguage { @@ -525,7 +525,18 @@ pub enum SourceLanguage { MSIL = 0x0f, /// Application language is High Level Shader Language. HLSL = 0x10, - + /// Application language is Objective-C. + ObjC = 0x11, + /// Application language is Objective-C++. + ObjCXX = 0x12, + /// Application language is Swift. + Swift = 0x13, + /// Application is a module generated by the aliasobj tool. + AliasObj = 0x14, + /// Application language is Rust. + Rust = 0x15, + /// Application language is Go. + Go = 0x16, /// The DMD compiler emits 'D' for the CV source language. Microsoft doesn't /// have an enumerator for it yet. D = 0x44, @@ -533,26 +544,34 @@ pub enum SourceLanguage { impl fmt::Display for SourceLanguage { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::C => write!(f, "C"), - Self::Cpp => write!(f, "Cpp"), - Self::Fortran => write!(f, "Fortran"), - Self::Masm => write!(f, "Masm"), - Self::Pascal => write!(f, "Pascal"), - Self::Basic => write!(f, "Basic"), - Self::Cobol => write!(f, "Cobol"), - Self::Link => write!(f, "Link"), - Self::Cvtres => write!(f, "Cvtres"), - Self::Cvtpgd => write!(f, "Cvtpgd"), - Self::CSharp => write!(f, "CSharp"), - Self::VB => write!(f, "VB"), - Self::ILAsm => write!(f, "ILAsm"), - Self::Java => write!(f, "Java"), - Self::JScript => write!(f, "JScript"), - Self::MSIL => write!(f, "MSIL"), - Self::HLSL => write!(f, "HLSL"), - Self::D => write!(f, "D"), - } + let str_repr = match self { + Self::C => "C", + Self::Cpp => "Cpp", + Self::Fortran => "Fortran", + Self::Masm => "Masm", + Self::Pascal => "Pascal", + Self::Basic => "Basic", + Self::Cobol => "Cobol", + Self::Link => "Link", + Self::Cvtres => "Cvtres", + Self::Cvtpgd => "Cvtpgd", + Self::CSharp => "CSharp", + Self::VB => "VB", + Self::ILAsm => "ILAsm", + Self::Java => "Java", + Self::JScript => "JScript", + Self::MSIL => "MSIL", + Self::HLSL => "HLSL", + Self::ObjC => "ObjC", + Self::ObjCXX => "ObjCXX", + Self::Swift => "Swift", + Self::AliasObj => "AliasObj", + Self::Rust => "Rust", + Self::Go => "Go", + Self::D => "D", + }; + + write!(f, "{str_repr}") } } @@ -576,6 +595,12 @@ impl From for SourceLanguage { 0x0e => Self::JScript, 0x0f => Self::MSIL, 0x10 => Self::HLSL, + 0x11 => Self::ObjC, + 0x12 => Self::ObjCXX, + 0x13 => Self::Swift, + 0x14 => Self::AliasObj, + 0x15 => Self::Rust, + 0x16 => Self::Go, 0x44 => Self::D, _ => Self::Masm, // There is no unknown, so we just force to Masm as the default. } diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 98d6cf7..e78dfd0 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -12,6 +12,7 @@ use scroll::{ctx::TryFromCtx, Endian, Pread, LE}; use crate::common::*; use crate::msf::*; use crate::FallibleIterator; +use crate::SectionCharacteristics; mod annotations; mod constants; @@ -185,6 +186,8 @@ pub enum SymbolData<'t> { Public(PublicSymbol<'t>), /// A procedure, such as a function or method. Procedure(ProcedureSymbol<'t>), + /// A managed procedure, such as a function or method. + ManagedProcedure(ManagedProcedureSymbol<'t>), /// A thread local variable. ThreadStorage(ThreadStorageSymbol<'t>), /// Flags used to compile a module. @@ -197,12 +200,16 @@ pub enum SymbolData<'t> { DataReference(DataReferenceSymbol<'t>), /// Reference to an annotation. AnnotationReference(AnnotationReferenceSymbol<'t>), + /// Reference to a managed procedure. + TokenReference(TokenReferenceSymbol<'t>), /// Trampoline thunk. Trampoline(TrampolineSymbol), /// An exported symbol. Export(ExportSymbol<'t>), /// A local symbol in optimized code. Local(LocalSymbol<'t>), + /// A managed local variable slot. + ManagedSlot(ManagedSlotSymbol<'t>), /// Reference to build information. BuildInfo(BuildInfoSymbol), /// The callsite of an inlined function. @@ -221,6 +228,14 @@ pub enum SymbolData<'t> { Thunk(ThunkSymbol<'t>), /// A block of separated code. SeparatedCode(SeparatedCodeSymbol), + /// OEM information. + OEM(OemSymbol<'t>), + /// Environment block split off from S_COMPILE2. + EnvBlock(EnvBlockSymbol<'t>), + /// A COFF section in a PE executable. + Section(SectionSymbol<'t>), + /// A COFF group. + CoffGroup(CoffGroupSymbol<'t>), } impl<'t> SymbolData<'t> { @@ -236,15 +251,18 @@ impl<'t> SymbolData<'t> { Self::Data(data) => Some(data.name), Self::Public(data) => Some(data.name), Self::Procedure(data) => Some(data.name), + Self::ManagedProcedure(data) => data.name, Self::ThreadStorage(data) => Some(data.name), Self::CompileFlags(_) => None, Self::UsingNamespace(data) => Some(data.name), Self::ProcedureReference(data) => data.name, Self::DataReference(data) => data.name, Self::AnnotationReference(data) => Some(data.name), + Self::TokenReference(data) => Some(data.name), Self::Trampoline(_) => None, Self::Export(data) => Some(data.name), Self::Local(data) => Some(data.name), + Self::ManagedSlot(data) => Some(data.name), Self::InlineSite(_) => None, Self::BuildInfo(_) => None, Self::InlineSiteEnd => None, @@ -254,6 +272,10 @@ impl<'t> SymbolData<'t> { Self::RegisterRelative(data) => Some(data.name), Self::Thunk(data) => Some(data.name), Self::SeparatedCode(_) => None, + Self::OEM(_) => None, + Self::EnvBlock(_) => None, + Self::Section(data) => Some(data.name), + Self::CoffGroup(data) => Some(data.name), } } } @@ -283,6 +305,7 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { S_PUB32 | S_PUB32_ST => SymbolData::Public(buf.parse_with(kind)?), S_LPROC32 | S_LPROC32_ST | S_GPROC32 | S_GPROC32_ST | S_LPROC32_ID | S_GPROC32_ID | S_LPROC32_DPC | S_LPROC32_DPC_ID => SymbolData::Procedure(buf.parse_with(kind)?), + S_LMANPROC | S_GMANPROC => SymbolData::ManagedProcedure(buf.parse_with(kind)?), S_LTHREAD32 | S_LTHREAD32_ST | S_GTHREAD32 | S_GTHREAD32_ST => { SymbolData::ThreadStorage(buf.parse_with(kind)?) } @@ -296,8 +319,10 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { S_TRAMPOLINE => Self::Trampoline(buf.parse_with(kind)?), S_DATAREF | S_DATAREF_ST => SymbolData::DataReference(buf.parse_with(kind)?), S_ANNOTATIONREF => SymbolData::AnnotationReference(buf.parse_with(kind)?), + S_TOKENREF => SymbolData::TokenReference(buf.parse_with(kind)?), S_EXPORT => SymbolData::Export(buf.parse_with(kind)?), S_LOCAL => SymbolData::Local(buf.parse_with(kind)?), + S_MANSLOT | S_MANSLOT_ST => SymbolData::ManagedSlot(buf.parse_with(kind)?), S_BUILDINFO => SymbolData::BuildInfo(buf.parse_with(kind)?), S_INLINESITE | S_INLINESITE2 => SymbolData::InlineSite(buf.parse_with(kind)?), S_INLINESITE_END => SymbolData::InlineSiteEnd, @@ -307,6 +332,10 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { S_REGREL32 => SymbolData::RegisterRelative(buf.parse_with(kind)?), S_THUNK32 | S_THUNK32_ST => SymbolData::Thunk(buf.parse_with(kind)?), S_SEPCODE => SymbolData::SeparatedCode(buf.parse_with(kind)?), + S_OEM => SymbolData::OEM(buf.parse_with(kind)?), + S_ENVBLOCK => SymbolData::EnvBlock(buf.parse_with(kind)?), + S_SECTION => SymbolData::Section(buf.parse_with(kind)?), + S_COFFGROUP => SymbolData::CoffGroup(buf.parse_with(kind)?), other => return Err(Error::UnimplementedSymbolKind(other)), }; @@ -493,12 +522,19 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureReferenceSymbol<'t> { fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); + let global = matches!(kind, S_PROCREF | S_PROCREF_ST); + let sum_name = buf.parse()?; + let symbol_index = buf.parse()?; + // 1-based module index in the input - presumably 0 means invalid / not present + let module = buf.parse::()?.checked_sub(1).map(usize::from); + let name = parse_optional_name(&mut buf, kind)?; + let symbol = ProcedureReferenceSymbol { - global: matches!(kind, S_PROCREF | S_PROCREF_ST), - sum_name: buf.parse()?, - symbol_index: buf.parse()?, - module: buf.parse::()?.checked_sub(1).map(usize::from), - name: parse_optional_name(&mut buf, kind)?, + global, + sum_name, + symbol_index, + module, + name, }; Ok((symbol, buf.pos())) @@ -529,11 +565,17 @@ impl<'t> TryFromCtx<'t, SymbolKind> for DataReferenceSymbol<'t> { fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); + let sum_name = buf.parse()?; + let symbol_index = buf.parse()?; + // 1-based module index in the input - presumably 0 means invalid / not present + let module = buf.parse::()?.checked_sub(1).map(usize::from); + let name = parse_optional_name(&mut buf, kind)?; + let symbol = DataReferenceSymbol { - sum_name: buf.parse()?, - symbol_index: buf.parse()?, - module: buf.parse::()?.checked_sub(1).map(usize::from), - name: parse_optional_name(&mut buf, kind)?, + sum_name, + symbol_index, + module, + name, }; Ok((symbol, buf.pos())) @@ -564,11 +606,58 @@ impl<'t> TryFromCtx<'t, SymbolKind> for AnnotationReferenceSymbol<'t> { fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); + let sum_name = buf.parse()?; + let symbol_index = buf.parse()?; + // 1-based module index in the input - presumably 0 means invalid / not present + let module = buf.parse::()?.checked_sub(1).map(usize::from); + let name = parse_symbol_name(&mut buf, kind)?; + let symbol = AnnotationReferenceSymbol { - sum_name: buf.parse()?, - symbol_index: buf.parse()?, - module: buf.parse::()?.checked_sub(1).map(usize::from), - name: parse_symbol_name(&mut buf, kind)?, + sum_name, + symbol_index, + module, + name, + }; + + Ok((symbol, buf.pos())) + } +} + +/// Reference to a managed procedure symbol (`S_LMANPROC` or `S_GMANPROC`). +/// +/// Symbol kind `S_TOKENREF`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct TokenReferenceSymbol<'t> { + /// SUC of the name. + pub sum_name: u32, + /// Symbol index of the referenced [`ManagedProcedureSymbol`]. + /// + /// Note that this symbol might be located in a different module. + pub symbol_index: SymbolIndex, + /// Index of the module in [`DebugInformation::modules`](crate::DebugInformation::modules) + /// containing the actual symbol. + pub module: Option, + /// Name of the procedure reference. + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for TokenReferenceSymbol<'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let sum_name = buf.parse()?; + let symbol_index = buf.parse()?; + // 1-based module index in the input - presumably 0 means invalid / not present + let module = buf.parse::()?.checked_sub(1).map(usize::from); + let name = parse_symbol_name(&mut buf, kind)?; + + let symbol = TokenReferenceSymbol { + sum_name, + symbol_index, + module, + name, }; Ok((symbol, buf.pos())) @@ -840,6 +929,66 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureSymbol<'t> { } } +/// A managed procedure, such as a function or method. +/// +/// Symbol kinds: +/// - `S_GMANPROC`, `S_GMANPROCIA64` for global procedures +/// - `S_LMANPROC`, `S_LMANPROCIA64` for local procedures +/// +/// `S_GMANPROCIA64` and `S_LMANPROCIA64` are only mentioned, there is no available source. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ManagedProcedureSymbol<'t> { + /// Whether this is a global or local procedure. + pub global: bool, + /// The parent scope that this procedure is nested in. + pub parent: Option, + /// The end symbol of this procedure. + pub end: SymbolIndex, + /// The next procedure symbol. + pub next: Option, + /// The length of the code block covered by this procedure. + pub len: u32, + /// Start offset of the procedure's body code, which marks the end of the prologue. + pub dbg_start_offset: u32, + /// End offset of the procedure's body code, which marks the start of the epilogue. + pub dbg_end_offset: u32, + /// COM+ metadata token + pub token: COMToken, + /// Code offset of the start of this procedure. + pub offset: PdbInternalSectionOffset, + /// Detailed flags of this procedure. + pub flags: ProcedureFlags, + /// Register return value is in (may not be used for all archs). + pub return_register: u16, + /// Optional name of the procedure. + pub name: Option>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ManagedProcedureSymbol<'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let symbol = ManagedProcedureSymbol { + global: matches!(kind, S_GMANPROC), + parent: parse_optional_index(&mut buf)?, + end: buf.parse()?, + next: parse_optional_index(&mut buf)?, + len: buf.parse()?, + dbg_start_offset: buf.parse()?, + dbg_end_offset: buf.parse()?, + token: buf.parse()?, + offset: buf.parse()?, + flags: buf.parse()?, + return_register: buf.parse()?, + name: parse_optional_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } +} + /// The callsite of an inlined function. /// /// Symbol kind `S_INLINESITE`, or `S_INLINESITE2`. @@ -1169,6 +1318,41 @@ impl<'t> TryFromCtx<'t, SymbolKind> for LocalSymbol<'t> { } } +/// A managed local variable slot. +/// +/// Symbol kind `S_MANSLOT`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ManagedSlotSymbol<'t> { + /// Slot index. + pub slot: u32, + /// Type index or metadata token. + pub type_index: TypeIndex, + /// First code address where var is live. + pub offset: PdbInternalSectionOffset, + /// Local variable flags. + pub flags: LocalVariableFlags, + /// Length-prefixed name of the variable. + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ManagedSlotSymbol<'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let symbol = ManagedSlotSymbol { + slot: buf.parse()?, + type_index: buf.parse()?, + offset: buf.parse()?, + flags: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } +} + // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L4456 /// Flags of an [`ExportSymbol`]. #[non_exhaustive] @@ -1499,6 +1683,143 @@ impl<'t> TryFromCtx<'t, SymbolKind> for SeparatedCodeSymbol { } } +/// An OEM symbol. +/// +/// Symbol kind `S_OEM`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct OemSymbol<'t> { + /// OEM's identifier (16B GUID). + pub id_oem: RawString<'t>, + /// Type index. + pub type_index: TypeIndex, + /// User data with forced 4B-alignment. + /// + /// An array of variable size, currently only the first 4B are parsed. + pub rgl: u32, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for OemSymbol<'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], _kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let symbol = OemSymbol { + id_oem: buf.parse_cstring()?, + type_index: buf.parse()?, + rgl: buf.parse()?, + }; + + Ok((symbol, buf.pos())) + } +} + +/// Environment block split off from `S_COMPILE2`. +/// +/// Symbol kind `S_ENVBLOCK`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct EnvBlockSymbol<'t> { + /// EC flag (previously called `rev`). + pub edit_and_continue: bool, + /// Sequence of zero-terminated command strings. + pub rgsz: Vec>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for EnvBlockSymbol<'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + let flags: u8 = buf.parse()?; + + let mut strings: Vec> = Vec::new(); + + while !buf.is_empty() { + strings.push(parse_symbol_name(&mut buf, kind)?); + } + + let symbol = EnvBlockSymbol { + edit_and_continue: flags & 1 != 0, + rgsz: strings, + }; + + Ok((symbol, buf.pos())) + } +} + +/// A COFF section in a PE executable. +/// +/// Symbol kind `S_SECTION`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SectionSymbol<'t> { + /// Section number. + pub isec: u16, + /// Alignment of this section (power of 2). + pub align: u8, + /// Reserved. Must be zero. + pub reserved: u8, + /// Section's RVA. + pub rva: u32, + /// Section's CB. + pub cb: u32, + /// Section characteristics. + pub characteristics: SectionCharacteristics, + /// Section name. + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for SectionSymbol<'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let symbol = SectionSymbol { + isec: buf.parse()?, + align: buf.parse()?, + reserved: buf.parse()?, + rva: buf.parse()?, + cb: buf.parse()?, + characteristics: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } +} + +/// A COFF section in a PE executable. +/// +/// Symbol kind `S_COFFGROUP`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct CoffGroupSymbol<'t> { + /// COFF group's CB. + pub cb: u32, + /// COFF group characteristics. + pub characteristics: u32, + /// Symbol offset. + pub offset: PdbInternalSectionOffset, + /// COFF group name. + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for CoffGroupSymbol<'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let symbol = CoffGroupSymbol { + cb: buf.parse()?, + characteristics: buf.parse()?, + offset: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } +} + /// PDB symbol tables contain names, locations, and metadata about functions, global/static data, /// constants, data types, and more. /// @@ -2168,6 +2489,32 @@ mod tests { ); } + #[test] + fn kind_1137() { + // 0x1137 is S_COFFGROUP + let data = &[ + 55, 17, 160, 17, 0, 0, 64, 0, 0, 192, 0, 0, 0, 0, 3, 0, 46, 100, 97, 116, 97, 0, + ]; + + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; + assert_eq!(symbol.raw_kind(), 0x1137); + assert_eq!( + symbol.parse().expect("parse"), + SymbolData::CoffGroup(CoffGroupSymbol { + cb: 4512, + characteristics: 0xc000_0040, + offset: PdbInternalSectionOffset { + section: 0x3, + offset: 0 + }, + name: ".data".into(), + }) + ); + } + #[test] fn kind_113c() { let data = &[ diff --git a/src/tpi/data.rs b/src/tpi/data.rs index ed7d987..261dc58 100644 --- a/src/tpi/data.rs +++ b/src/tpi/data.rs @@ -25,6 +25,8 @@ pub enum TypeData<'t> { Nested(NestedType<'t>), BaseClass(BaseClassType), VirtualBaseClass(VirtualBaseClassType), + VirtualFunctionTable(VirtualFunctionTableType<'t>), + VirtualTableShape(VirtualTableShapeType), VirtualFunctionTablePointer(VirtualFunctionTablePointerType), Procedure(ProcedureType), Pointer(PointerType), @@ -33,6 +35,7 @@ pub enum TypeData<'t> { Enumerate(EnumerateType<'t>), Array(ArrayType), Union(UnionType<'t>), + Alias(AliasType<'t>), Bitfield(BitfieldType), FieldList(FieldList<'t>), ArgumentList(ArgumentList), @@ -50,7 +53,8 @@ impl<'t> TypeData<'t> { | Self::Nested(NestedType { ref name, .. }) | Self::Enumeration(EnumerationType { ref name, .. }) | Self::Enumerate(EnumerateType { ref name, .. }) - | Self::Union(UnionType { ref name, .. }) => name, + | Self::Union(UnionType { ref name, .. }) + | Self::Alias(AliasType { ref name, .. }) => name, _ => return None, }; @@ -166,7 +170,7 @@ pub(crate) fn parse_type_data<'t>(buf: &mut ParseBuffer<'t>) -> Result(buf: &mut ParseBuffer<'t>) -> Result Ok(TypeData::Alias(AliasType { + underlying_type: buf.parse()?, + name: parse_string(leaf, buf)?, + })), + // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2164-L2170 LF_BITFIELD => Ok(TypeData::Bitfield(BitfieldType { underlying_type: buf.parse()?, @@ -345,14 +355,46 @@ pub(crate) fn parse_type_data<'t>(buf: &mut ParseBuffer<'t>) -> Result { - // TODO - Err(Error::UnimplementedTypeKind(leaf)) + let mut vtshape = VirtualTableShapeType { + descriptors: vec![], + }; + let count = buf.parse_u16()? as usize; + // These are packed 4-bit values + for _ in 0..((count + 1) / 2) { + let desc: u8 = buf.parse()?; + + vtshape + .descriptors + .push(VirtualTableShapeDescriptor::from_u4(desc & 0xF)); + if vtshape.descriptors.len() < count { + vtshape + .descriptors + .push(VirtualTableShapeDescriptor::from_u4(desc >> 4)); + } + } + Ok(TypeData::VirtualTableShape(vtshape)) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1825-L1837 LF_VFTABLE => { - // TODO - Err(Error::UnimplementedTypeKind(leaf)) + let mut vftable = VirtualFunctionTableType { + owner: buf.parse()?, + base: buf.parse()?, + object_offset: parse_unsigned(buf)? as u32, + names: vec![], + }; + + let names_length = parse_unsigned(buf)? as usize; + + let mut len = 0; + while len < names_length { + let s = buf.parse_cstring()?; + len += s.len() + 1; + vftable.names.push(s); + } + assert_eq!(len, names_length); + + Ok(TypeData::VirtualFunctionTable(vftable)) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2521-L2528 @@ -627,6 +669,31 @@ impl FieldAttributes { matches!(self.method_properties(), 0x04 | 0x06) } + #[inline] + pub fn is_pseudo(self) -> bool { + self.0 & 0x0020 != 0 + } + + #[inline] + pub fn noinherit(self) -> bool { + self.0 & 0x0040 != 0 + } + + #[inline] + pub fn noconstruct(self) -> bool { + self.0 & 0x0080 != 0 + } + + #[inline] + pub fn is_compgenx(self) -> bool { + self.0 & 0x0100 != 0 + } + + #[inline] + pub fn sealed(self) -> bool { + self.0 & 0x0200 != 0 + } + // TODO } @@ -826,6 +893,51 @@ impl PointerAttributes { } } +/* +typedef enum CV_VTS_desc_e { + CV_VTS_near = 0x00, + CV_VTS_far = 0x01, + CV_VTS_thin = 0x02, + CV_VTS_outer = 0x03, + CV_VTS_meta = 0x04, + CV_VTS_near32 = 0x05, + CV_VTS_far32 = 0x06, + CV_VTS_unused = 0x07 +} CV_VTS_desc_e; + */ +#[allow(unused)] +#[repr(u8)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum VirtualTableShapeDescriptor { + Near = 0x00, + Far = 0x01, + Thin = 0x02, + Outer = 0x03, + Meta = 0x04, + Near32 = 0x05, + Far32 = 0x06, + Unused = 0x07, +} + +impl VirtualTableShapeDescriptor { + // Convert a 4-bit "u4" value in a u8 to a VirtualTableShapeDescriptor + // This is used in the VirtualTableShapeType parsing and is not public + // so we can safely assume that the value is in the correct range. + pub(crate) fn from_u4(val: u8) -> Self { + match val { + 0x00 => VirtualTableShapeDescriptor::Near, + 0x01 => VirtualTableShapeDescriptor::Far, + 0x02 => VirtualTableShapeDescriptor::Thin, + 0x03 => VirtualTableShapeDescriptor::Outer, + 0x04 => VirtualTableShapeDescriptor::Meta, + 0x05 => VirtualTableShapeDescriptor::Near32, + 0x06 => VirtualTableShapeDescriptor::Far32, + 0x07 => VirtualTableShapeDescriptor::Unused, + _ => unreachable!(), + } + } +} + /// The information parsed from a type record with kind /// `LF_CLASS`, `LF_CLASS_ST`, `LF_STRUCTURE`, `LF_STRUCTURE_ST` or `LF_INTERFACE`. // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1631 @@ -947,6 +1059,21 @@ pub struct VirtualFunctionTablePointerType { pub table: TypeIndex, } +/// The information parsed from a type record with kind `LF_VTSHAPE`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VirtualTableShapeType { + pub descriptors: Vec, +} + +/// The information parsed from a type record with kind `LF_VFTABLE`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VirtualFunctionTableType<'t> { + pub owner: TypeIndex, + pub base: TypeIndex, + pub object_offset: u32, + pub names: Vec>, +} + /// The information parsed from a type record with kind `LF_PROCEDURE`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct ProcedureType { @@ -1023,6 +1150,13 @@ pub struct UnionType<'t> { pub unique_name: Option>, } +/// The information parsed from a type record with kind `LF_ALIAS` or `LF_ALIAS_ST`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AliasType<'t> { + pub underlying_type: TypeIndex, + pub name: RawString<'t>, +} + /// The information parsed from a type record with kind `LF_BITFIELD`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct BitfieldType { diff --git a/src/tpi/mod.rs b/src/tpi/mod.rs index d79019b..25afd49 100644 --- a/src/tpi/mod.rs +++ b/src/tpi/mod.rs @@ -268,8 +268,8 @@ where /// populated by calling [`ItemFinder::update`] while iterating. There are two typedefs for easier /// use: /// -/// - [`TypeFinder`] for finding [`Type`]s in a [`TypeInformation`](crate::TypeInformation) (TPI stream). -/// - [`IdFinder`] for finding [`Id`]s in a [`IdInformation`](crate::IdInformation) (IPI stream). +/// - [`TypeFinder`] for finding [`Type`]s in a [`TypeInformation`] (TPI stream). +/// - [`IdFinder`] for finding [`Id`]s in a [`IdInformation`] (IPI stream). /// /// `ItemFinder` allocates all the memory it needs when it is first created. The footprint is /// directly proportional to the total number of types; see [`ItemInformation::len`]. @@ -434,8 +434,7 @@ where } } -/// An iterator over items in [`TypeInformation`](crate::TypeInformation) or -/// [`IdInformation`](crate::IdInformation). +/// An iterator over items in [`TypeInformation`] or [`IdInformation`]. /// /// The TPI and IPI streams are represented internally as a series of records, each of which have a /// length, a kind, and a type-specific field layout. Iteration performance is therefore similar to diff --git a/src/tpi/primitive.rs b/src/tpi/primitive.rs index da45e56..4ab68ac 100644 --- a/src/tpi/primitive.rs +++ b/src/tpi/primitive.rs @@ -60,6 +60,9 @@ pub enum PrimitiveKind { /// "Really a 32-bit char" RChar32, + /// UTF-8 character + Char8, + /// Signed 8-bit integer I8, @@ -81,13 +84,13 @@ pub enum PrimitiveKind { /// Signed 32-bit integer Long, - /// Unsigned 32-bit inteer + /// Unsigned 32-bit integer ULong, /// Signed 32-bit integer I32, - /// Unsigned 32-bit inteer + /// Unsigned 32-bit integer U32, /// Signed 64-bit integer @@ -225,6 +228,7 @@ pub fn type_data_for_primitive(index: TypeIndex) -> Result> { 0x71 => PrimitiveKind::WChar, 0x7a => PrimitiveKind::RChar16, 0x7b => PrimitiveKind::RChar32, + 0x7c => PrimitiveKind::Char8, 0x11 => PrimitiveKind::Short, 0x21 => PrimitiveKind::UShort,