From e90a4432876c645690e51e2f021ef26db381c52e Mon Sep 17 00:00:00 2001 From: LunaStev Date: Sun, 22 Feb 2026 23:14:11 +0900 Subject: [PATCH] feat: enhance C ABI compatibility, type inference, and JSON serialization This commit improves the robustness of the LLVM backend regarding type resolution under opaque pointers, refines C ABI lowering for aggregates, and introduces JSON serialization utilities. Changes: - **LLVM Codegen & ABI**: - **C ABI Refinement**: Updated `abi_c.rs` to pass small "mixed" aggregates as direct values, allowing LLVM to handle register assignment (INTEGER vs. SSE) more effectively for the C calling convention. - **Implicit Main Return**: Implemented implicit `return 0` for the `main` function. If `main` is defined as void-like in Wave, it is now lowered to an `i32` return in LLVM to satisfy standard execution environments. - **Symbol Resolution**: Fixed a bug in `gen_function_call` where extern C functions were being looked up by their Wave name instead of their redirected LLVM symbol name. - **Address Generation**: Introduced `generate_address_and_type_ir` to provide both the memory address and its corresponding LLVM type, simplifying indexing and field access logic. - **Type System & Pointers**: - **Enhanced Inference**: Implemented `infer_wave_type_of_expr` and `basic_ty_to_wave_ty` to improve Wave-to-LLVM type mapping, especially for complex lvalues. - **Legacy Compatibility**: Updated `deref` handling to allow redundant dereferences on already-addressable lvalues (e.g., `deref array[i]`), maintaining compatibility with existing code. - **Parser & Verification**: - **ABI Validation**: Added a verification pass to ensure only `extern(c)` is used, as other ABIs are not yet supported. - **JSON Utilities**: - Added `write_pretty_to` and `write_compact_to` to the `Json` enum. - Implemented full JSON string escaping (handles newlines, quotes, slashes, and control characters). - **CLI & Flags**: - Added support for `-Os` (optimize for size) and `-Ofast` (aggressive optimization) flags in the CLI and LLVM pass builder. These updates stabilize the transition to opaque pointers and provide a more predictable interface for system-level FFI and serialization. Signed-off-by: LunaStev --- front/parser/src/verification.rs | 9 + llvm/src/codegen/abi_c.rs | 15 +- llvm/src/codegen/address.rs | 25 ++- llvm/src/codegen/ir.rs | 38 +++- llvm/src/codegen/mod.rs | 2 +- llvm/src/expression/rvalue/assign.rs | 43 ++++- llvm/src/expression/rvalue/calls.rs | 2 +- llvm/src/expression/rvalue/index.rs | 229 +++---------------------- llvm/src/expression/rvalue/pointers.rs | 147 +++++++++++++--- llvm/src/expression/rvalue/structs.rs | 67 +------- llvm/src/statement/assign.rs | 15 +- llvm/src/statement/control.rs | 14 +- src/cli.rs | 2 +- src/flags.rs | 4 +- utils/src/json.rs | 90 ++++++++++ 15 files changed, 387 insertions(+), 315 deletions(-) diff --git a/front/parser/src/verification.rs b/front/parser/src/verification.rs index 50a59558..3839625d 100644 --- a/front/parser/src/verification.rs +++ b/front/parser/src/verification.rs @@ -247,6 +247,15 @@ fn validate_node( scopes.pop(); } + ASTNode::ExternFunction(ext) => { + if !ext.abi.eq_ignore_ascii_case("c") { + return Err(format!( + "unsupported extern ABI '{}' for function '{}': only extern(c) is currently supported", + ext.abi, ext.name + )); + } + } + _ => {} } diff --git a/llvm/src/codegen/abi_c.rs b/llvm/src/codegen/abi_c.rs index c08a32a9..406ff393 100644 --- a/llvm/src/codegen/abi_c.rs +++ b/llvm/src/codegen/abi_c.rs @@ -38,6 +38,7 @@ pub enum RetLowering<'ctx> { #[derive(Clone)] pub struct ExternCInfo<'ctx> { + pub llvm_name: String, // actual LLVM symbol name pub wave_ret: WaveType, // Wave-level return type (needed when sret => llvm void) pub ret: RetLowering<'ctx>, pub params: Vec>, // per-wave param @@ -168,9 +169,9 @@ fn classify_param<'ctx>( return ParamLowering::Direct(it.as_basic_type_enum()); } - // mixed small aggregate: safest is byval (conservative but correct) - let align = td.get_abi_alignment(&t) as u32; - return ParamLowering::ByVal { ty: t.as_any_type_enum(), align }; + // mixed small aggregate: keep as direct aggregate value. + // Let LLVM's C ABI lowering split/register-assign correctly. + return ParamLowering::Direct(t); } // non-aggregate: direct @@ -246,9 +247,9 @@ fn classify_ret<'ctx>( } } - // mixed small aggregate ret: safest sret - let align = td.get_abi_alignment(&t) as u32; - return RetLowering::SRet { ty: t.as_any_type_enum(), align }; + // mixed small aggregate ret: keep direct aggregate value. + // Let LLVM's C ABI lowering pick mixed INTEGER/SSE return registers. + return RetLowering::Direct(t); } RetLowering::Direct(t) @@ -261,6 +262,7 @@ pub fn lower_extern_c<'ctx>( struct_types: &HashMap>, ) -> LoweredExtern<'ctx> { let llvm_name = ext.symbol.as_deref().unwrap_or(ext.name.as_str()).to_string(); + let info_llvm_name = llvm_name.clone(); // wave types -> layout types let wave_param_layout: Vec> = ext.params.iter() @@ -311,6 +313,7 @@ pub fn lower_extern_c<'ctx>( llvm_name, fn_type, info: ExternCInfo { + llvm_name: info_llvm_name, wave_ret: ext.return_type.clone(), ret, params, diff --git a/llvm/src/codegen/address.rs b/llvm/src/codegen/address.rs index fc41ec4c..307b2ab9 100644 --- a/llvm/src/codegen/address.rs +++ b/llvm/src/codegen/address.rs @@ -198,7 +198,10 @@ fn addr_and_ty<'ctx>( ); if !slot_ty.is_pointer_type() { - panic!("Cannot deref non-pointer lvalue: {:?}", inner); + // Legacy compatibility: + // allow redundant `deref` on already-addressable lvalues + // like `deref q.rear` and `deref visited[x]`. + return (slot_ptr, slot_ty); } let pv = load_ptr_from_slot(context, builder, slot_ptr, "deref_target"); @@ -383,3 +386,23 @@ pub fn generate_address_ir<'ctx>( ) .0 } + +pub fn generate_address_and_type_ir<'ctx>( + context: &'ctx Context, + builder: &'ctx Builder<'ctx>, + expr: &Expression, + variables: &mut HashMap>, + module: &'ctx Module<'ctx>, + struct_types: &HashMap>, + struct_field_indices: &HashMap>, +) -> (PointerValue<'ctx>, BasicTypeEnum<'ctx>) { + addr_and_ty( + context, + builder, + expr, + variables, + module, + struct_types, + struct_field_indices, + ) +} diff --git a/llvm/src/codegen/ir.rs b/llvm/src/codegen/ir.rs index 637e548b..6f55e545 100644 --- a/llvm/src/codegen/ir.rs +++ b/llvm/src/codegen/ir.rs @@ -28,6 +28,14 @@ use crate::codegen::abi_c::{ ExternCInfo, lower_extern_c, apply_extern_c_attrs, }; +fn is_implicit_i32_main(name: &str, return_type: &Option) -> bool { + name == "main" && matches!(return_type, None | Some(WaveType::Void)) +} + +fn is_supported_extern_abi(abi: &str) -> bool { + abi.eq_ignore_ascii_case("c") +} + pub unsafe fn generate_ir(ast_nodes: &[ASTNode], opt_flag: &str) -> String { let context: &'static Context = Box::leak(Box::new(Context::create())); let module: &'static _ = Box::leak(Box::new(context.create_module("main"))); @@ -189,11 +197,15 @@ pub unsafe fn generate_ir(ast_nodes: &[ASTNode], opt_flag: &str) -> String { .map(|p| wave_type_to_llvm_type(context, &p.param_type, &struct_types, TypeFlavor::AbiC).into()) .collect(); - let fn_type = match return_type { - None | Some(WaveType::Void) => context.void_type().fn_type(¶m_types, false), - Some(wave_ret_ty) => { - let llvm_ret_type = wave_type_to_llvm_type(context, wave_ret_ty, &struct_types, TypeFlavor::AbiC); - llvm_ret_type.fn_type(¶m_types, false) + let fn_type = if is_implicit_i32_main(name, return_type) { + context.i32_type().fn_type(¶m_types, false) + } else { + match return_type { + None | Some(WaveType::Void) => context.void_type().fn_type(¶m_types, false), + Some(wave_ret_ty) => { + let llvm_ret_type = wave_type_to_llvm_type(context, wave_ret_ty, &struct_types, TypeFlavor::AbiC); + llvm_ret_type.fn_type(¶m_types, false) + } } }; @@ -202,6 +214,13 @@ pub unsafe fn generate_ir(ast_nodes: &[ASTNode], opt_flag: &str) -> String { } for ext in &extern_functions { + if !is_supported_extern_abi(&ext.abi) { + panic!( + "unsupported extern ABI '{}' for function '{}': only extern(c) is currently supported", + ext.abi, ext.name + ); + } + let lowered = lower_extern_c(context, td, ext, &struct_types); let f = module.add_function(&lowered.llvm_name, lowered.fn_type, None); @@ -263,13 +282,17 @@ pub unsafe fn generate_ir(ast_nodes: &[ASTNode], opt_flag: &str) -> String { let current_block = builder.get_insert_block().unwrap(); if current_block.get_terminator().is_none() { + let implicit_i32_main = is_implicit_i32_main(&func_node.name, &func_node.return_type); let is_void_like = match &func_node.return_type { None => true, Some(WaveType::Void) => true, _ => false, }; - if is_void_like { + if implicit_i32_main { + let zero = context.i32_type().const_zero(); + builder.build_return(Some(&zero)).unwrap(); + } else if is_void_like { builder.build_return(None).unwrap(); } else { panic!("Non-void function '{}' is missing a return statement", func_node.name); @@ -295,6 +318,7 @@ fn pipeline_from_opt_flag(opt_flag: &str) -> &'static str { "-O3" => "default", "-Os" => "default", "-Oz" => "default", + "-Ofast" => "default", other => panic!("unknown opt flag for LLVM passes: {}", other), } } @@ -522,4 +546,4 @@ fn add_enum_consts_to_globals( next += 1; } -} \ No newline at end of file +} diff --git a/llvm/src/codegen/mod.rs b/llvm/src/codegen/mod.rs index 769c9a65..0f65b7ac 100644 --- a/llvm/src/codegen/mod.rs +++ b/llvm/src/codegen/mod.rs @@ -18,7 +18,7 @@ pub mod legacy; pub mod plan; pub mod abi_c; -pub use address::generate_address_ir; +pub use address::{generate_address_and_type_ir, generate_address_ir}; pub use format::{wave_format_to_c, wave_format_to_scanf}; pub use ir::generate_ir; pub use types::{wave_type_to_llvm_type, VariableInfo}; diff --git a/llvm/src/expression/rvalue/assign.rs b/llvm/src/expression/rvalue/assign.rs index 73317430..babf1c91 100644 --- a/llvm/src/expression/rvalue/assign.rs +++ b/llvm/src/expression/rvalue/assign.rs @@ -43,6 +43,27 @@ fn wave_to_basic<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, wt: &WaveType) -> BasicTy wave_type_to_llvm_type(env.context, wt, env.struct_types, TypeFlavor::Value) } +fn basic_to_wave<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, bt: BasicTypeEnum<'ctx>) -> Option { + match bt { + BasicTypeEnum::IntType(it) => { + let bw = it.get_bit_width() as u16; + if bw == 1 { + Some(WaveType::Bool) + } else { + Some(WaveType::Int(bw)) + } + } + BasicTypeEnum::FloatType(ft) => Some(WaveType::Float(ft.get_bit_width() as u16)), + BasicTypeEnum::PointerType(_) => Some(WaveType::Pointer(Box::new(WaveType::Byte))), + BasicTypeEnum::ArrayType(at) => { + let elem = basic_to_wave(env, at.get_element_type())?; + Some(WaveType::Array(Box::new(elem), at.len())) + } + BasicTypeEnum::StructType(st) => Some(WaveType::Struct(resolve_struct_key(env, st))), + _ => None, + } +} + fn wave_type_of_lvalue<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, e: &Expression) -> Option { match e { Expression::Variable(name) => env.variables.get(name).map(|vi| vi.ty.clone()), @@ -54,7 +75,7 @@ fn wave_type_of_lvalue<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, e: &Expression) -> match inner_ty { WaveType::Pointer(t) => Some(*t), WaveType::String => Some(WaveType::Byte), - _ => None, + other => Some(other), } } Expression::IndexAccess { target, .. } => { @@ -66,6 +87,26 @@ fn wave_type_of_lvalue<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, e: &Expression) -> _ => None, } } + Expression::FieldAccess { object, field } => { + let object_ty = wave_type_of_lvalue(env, object)?; + let struct_name = match object_ty { + WaveType::Struct(name) => name, + WaveType::Pointer(inner) => match *inner { + WaveType::Struct(name) => name, + _ => return None, + }, + _ => return None, + }; + + let st = *env.struct_types.get(&struct_name)?; + let field_index = env + .struct_field_indices + .get(&struct_name) + .and_then(|m| m.get(field)) + .copied()?; + let field_bt = st.get_field_type_at_index(field_index)?; + basic_to_wave(env, field_bt) + } _ => None, } } diff --git a/llvm/src/expression/rvalue/calls.rs b/llvm/src/expression/rvalue/calls.rs index b99cd940..cca5c248 100644 --- a/llvm/src/expression/rvalue/calls.rs +++ b/llvm/src/expression/rvalue/calls.rs @@ -336,7 +336,7 @@ pub(crate) fn gen_function_call<'ctx, 'a>( if let Some(info) = env.extern_c_info.get(name) { let function = env.module - .get_function(name) + .get_function(&info.llvm_name) .unwrap_or_else(|| panic!("Extern function '{}' not found in module (symbol alias?)", name)); if args.len() != info.params.len() { diff --git a/llvm/src/expression/rvalue/index.rs b/llvm/src/expression/rvalue/index.rs index 73fe220c..27997e9f 100644 --- a/llvm/src/expression/rvalue/index.rs +++ b/llvm/src/expression/rvalue/index.rs @@ -10,216 +10,37 @@ // SPDX-License-Identifier: MPL-2.0 use super::ExprGenEnv; +use inkwell::types::BasicType; use inkwell::values::{BasicValue, BasicValueEnum}; -use parser::ast::{Expression, WaveType}; - -use crate::codegen::types::{wave_type_to_llvm_type, TypeFlavor}; - -fn idx_to_i32<'ctx>( - env: &mut ExprGenEnv<'ctx, '_>, - idx: inkwell::values::IntValue<'ctx>, -) -> inkwell::values::IntValue<'ctx> { - let i32t = env.context.i32_type(); - let w = idx.get_type().get_bit_width(); - if w == 32 { - idx - } else if w < 32 { - env.builder.build_int_s_extend(idx, i32t, "idx_sext").unwrap() - } else { - env.builder.build_int_truncate(idx, i32t, "idx_trunc").unwrap() - } -} - -fn wave_type_of_expr<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, e: &Expression) -> Option { - match e { - Expression::Variable(name) => env.variables.get(name).map(|vi| vi.ty.clone()), - Expression::Grouped(inner) => wave_type_of_expr(env, inner), - Expression::AddressOf(inner) => wave_type_of_expr(env, inner).map(|t| WaveType::Pointer(Box::new(t))), - Expression::Deref(inner) => { - match &**inner { - Expression::Variable(name) => { - let vi = env.variables.get(name)?; - match &vi.ty { - WaveType::Pointer(inner_ty) => Some((**inner_ty).clone()), - WaveType::String => Some(WaveType::Byte), - _ => None, - } - } - _ => None, - } - } - _ => None, - } -} +use parser::ast::Expression; +use crate::codegen::generate_address_and_type_ir; pub(crate) fn gen<'ctx, 'a>( env: &mut ExprGenEnv<'ctx, 'a>, target: &Expression, index: &Expression, ) -> BasicValueEnum<'ctx> { - unsafe { - let target_val = env.gen(target, None); - let index_val = env.gen(index, None); - - let index_int = match index_val { - BasicValueEnum::IntValue(i) => idx_to_i32(env, i), - _ => panic!("Index must be an integer"), - }; - - let zero = env.context.i32_type().const_zero(); - - match target_val { - BasicValueEnum::PointerValue(ptr_val) => { - let target_ty = wave_type_of_expr(env, target).unwrap_or_else(|| { - panic!( - "IndexAccess needs static type info under opaque pointers (LLVM15+). \ - Cannot infer WaveType for target expr: {:?}", - target - ) - }); - - match target_ty { - WaveType::Array(inner, size) => { - let arr_bt = wave_type_to_llvm_type( - env.context, - &WaveType::Array(inner.clone(), size), - env.struct_types, - TypeFlavor::Value, - ); - - let elem_ty = *inner; - let elem_bt = wave_type_to_llvm_type( - env.context, - &elem_ty, - env.struct_types, - TypeFlavor::Value, - ); - - let gep = env - .builder - .build_in_bounds_gep(arr_bt, ptr_val, &[zero, index_int], "array_index_gep") - .unwrap(); - - if matches!(elem_ty, WaveType::Array(_, _) | WaveType::Struct(_)) { - return gep.as_basic_value_enum(); - } - - return env - .builder - .build_load(elem_bt, gep, "load_array_elem") - .unwrap() - .as_basic_value_enum(); - } - - WaveType::Pointer(inner) => match *inner { - // case 1) ptr -> [array] - WaveType::Array(elem, size) => { - let arr_bt = wave_type_to_llvm_type( - env.context, - &WaveType::Array(elem.clone(), size), - env.struct_types, - TypeFlavor::Value, - ); - - let elem_ty = *elem; - let elem_bt = wave_type_to_llvm_type( - env.context, - &elem_ty, - env.struct_types, - TypeFlavor::Value, - ); - - let gep = env - .builder - .build_in_bounds_gep(arr_bt, ptr_val, &[zero, index_int], "array_index_gep_ptr") - .unwrap(); - - if matches!(elem_ty, WaveType::Array(_, _) | WaveType::Struct(_)) { - return gep.as_basic_value_enum(); - } - - return env - .builder - .build_load(elem_bt, gep, "load_array_elem_ptr") - .unwrap() - .as_basic_value_enum(); - } - - // case 2) ptr -> T - elem_ty => { - let elem_bt = wave_type_to_llvm_type( - env.context, - &elem_ty, - env.struct_types, - TypeFlavor::Value, - ); - - let gep = env - .builder - .build_in_bounds_gep(elem_bt, ptr_val, &[index_int], "ptr_index_gep") - .unwrap(); - - if matches!(elem_ty, WaveType::Array(_, _) | WaveType::Struct(_)) { - return gep.as_basic_value_enum(); - } - - return env - .builder - .build_load(elem_bt, gep, "load_ptr_elem") - .unwrap() - .as_basic_value_enum(); - } - }, - - WaveType::String => { - let i8t = env.context.i8_type(); - let gep = env - .builder - .build_in_bounds_gep(i8t, ptr_val, &[index_int], "str_index_gep") - .unwrap(); - - return env - .builder - .build_load(i8t, gep, "load_str_elem") - .unwrap() - .as_basic_value_enum(); - } - - other => { - panic!("Unsupported target type in IndexAccess: {:?}", other); - } - } - } - - BasicValueEnum::ArrayValue(arr_val) => { - // arr_val: [N x T] - let tmp = env - .builder - .build_alloca(arr_val.get_type(), "tmp_arr") - .unwrap(); - - env.builder.build_store(tmp, arr_val).unwrap(); - - let elem_bt = arr_val.get_type().get_element_type(); // ✅ ArrayType element type is OK - - let gep = env - .builder - .build_in_bounds_gep(arr_val.get_type(), tmp, &[zero, index_int], "array_index_gep_tmp") - .unwrap(); - - if elem_bt.is_array_type() || elem_bt.is_struct_type() { - return gep.as_basic_value_enum(); - } - - env.builder - .build_load(elem_bt, gep, "load_array_elem_tmp") - .unwrap() - .as_basic_value_enum() - } - - other => { - panic!("Unsupported target in IndexAccess: {:?}", other) - } - } + let full = Expression::IndexAccess { + target: Box::new(target.clone()), + index: Box::new(index.clone()), + }; + + let (addr, elem_ty) = generate_address_and_type_ir( + env.context, + env.builder, + &full, + env.variables, + env.module, + env.struct_types, + env.struct_field_indices, + ); + + if elem_ty.is_array_type() || elem_ty.is_struct_type() { + return addr.as_basic_value_enum(); } + + env.builder + .build_load(elem_ty, addr, "load_index_elem") + .unwrap() + .as_basic_value_enum() } diff --git a/llvm/src/expression/rvalue/pointers.rs b/llvm/src/expression/rvalue/pointers.rs index 799fbc53..6dc99417 100644 --- a/llvm/src/expression/rvalue/pointers.rs +++ b/llvm/src/expression/rvalue/pointers.rs @@ -10,9 +10,10 @@ // SPDX-License-Identifier: MPL-2.0 use super::ExprGenEnv; -use crate::codegen::generate_address_ir; +use crate::codegen::{generate_address_and_type_ir, generate_address_ir}; use crate::codegen::types::{wave_type_to_llvm_type, TypeFlavor}; use crate::statement::variable::{coerce_basic_value, CoercionMode}; +use inkwell::types::AsTypeRef; use inkwell::types::{BasicType, BasicTypeEnum}; use inkwell::values::{BasicValue, BasicValueEnum}; use parser::ast::{Expression, WaveType}; @@ -32,36 +33,140 @@ fn push_deref_into_base(expr: &Expression) -> Expression { } } +fn normalize_struct_name(raw: &str) -> &str { + raw.strip_prefix("struct.").unwrap_or(raw).trim_start_matches('%') +} + +fn resolve_struct_key<'ctx>( + st: inkwell::types::StructType<'ctx>, + struct_types: &std::collections::HashMap>, +) -> Option { + if let Some(raw) = st.get_name().and_then(|n| n.to_str().ok()) { + return Some(normalize_struct_name(raw).to_string()); + } + + let st_ref = st.as_type_ref(); + for (name, ty) in struct_types { + if ty.as_type_ref() == st_ref { + return Some(name.clone()); + } + } + + None +} + +fn basic_ty_to_wave_ty<'ctx>( + ty: BasicTypeEnum<'ctx>, + struct_types: &std::collections::HashMap>, +) -> Option { + match ty { + BasicTypeEnum::IntType(it) => { + let bw = it.get_bit_width() as u16; + if bw == 1 { + Some(WaveType::Bool) + } else { + Some(WaveType::Int(bw)) + } + } + BasicTypeEnum::FloatType(ft) => Some(WaveType::Float(ft.get_bit_width() as u16)), + BasicTypeEnum::PointerType(_) => Some(WaveType::Pointer(Box::new(WaveType::Byte))), + BasicTypeEnum::ArrayType(at) => { + let elem = basic_ty_to_wave_ty(at.get_element_type(), struct_types)?; + Some(WaveType::Array(Box::new(elem), at.len())) + } + BasicTypeEnum::StructType(st) => { + let name = resolve_struct_key(st, struct_types)?; + Some(WaveType::Struct(name)) + } + _ => None, + } +} + +fn infer_wave_type_of_expr<'ctx, 'a>( + env: &mut ExprGenEnv<'ctx, 'a>, + expr: &Expression, +) -> Option { + match expr { + Expression::Variable(name) => env.variables.get(name).map(|vi| vi.ty.clone()), + + Expression::Grouped(inner) => infer_wave_type_of_expr(env, inner), + + Expression::AddressOf(inner) => { + let inner_ty = infer_wave_type_of_expr(env, inner)?; + Some(WaveType::Pointer(Box::new(inner_ty))) + } + + Expression::Deref(inner) => { + let inner_ty = infer_wave_type_of_expr(env, inner)?; + match inner_ty { + WaveType::Pointer(t) => Some(*t), + WaveType::String => Some(WaveType::Byte), + _ => None, + } + } + + Expression::IndexAccess { target, .. } => { + let target_ty = infer_wave_type_of_expr(env, target)?; + match target_ty { + WaveType::Array(inner, _) => Some(*inner), + WaveType::Pointer(inner) => match *inner { + WaveType::Array(elem, _) => Some(*elem), + other => Some(other), + }, + WaveType::String => Some(WaveType::Byte), + _ => None, + } + } + + Expression::FieldAccess { object, field } => { + let full = Expression::FieldAccess { + object: Box::new((**object).clone()), + field: field.clone(), + }; + let (_, field_ty) = generate_address_and_type_ir( + env.context, + env.builder, + &full, + env.variables, + env.module, + env.struct_types, + env.struct_field_indices, + ); + basic_ty_to_wave_ty(field_ty, env.struct_types) + } + + _ => None, + } +} + fn infer_deref_load_ty<'ctx, 'a>( - env: &ExprGenEnv<'ctx, 'a>, + env: &mut ExprGenEnv<'ctx, 'a>, inner_expr: &Expression, expected_type: Option>, ) -> BasicTypeEnum<'ctx> { if let Some(t) = expected_type { return t; } - match inner_expr { - Expression::Grouped(inner) => infer_deref_load_ty(env, inner, None), - - Expression::Variable(name) => { - let vi = env - .variables - .get(name) - .unwrap_or_else(|| panic!("deref: variable '{}' not found", name)); - - match &vi.ty { - WaveType::Pointer(inner) => { - wave_type_to_llvm_type(env.context, inner, env.struct_types, TypeFlavor::Value) - } - WaveType::String => env.context.i8_type().as_basic_type_enum(), // *string -> byte - other => panic!("deref: '{}' is not a pointer type: {:?}", name, other), - } - } - _ => panic!( + let inferred = infer_wave_type_of_expr(env, inner_expr).unwrap_or_else(|| { + panic!( "deref needs expected_type in opaque-pointer mode (cannot infer load type from {:?})", inner_expr - ), + ) + }); + + match inferred { + WaveType::Pointer(inner) => { + wave_type_to_llvm_type(env.context, &inner, env.struct_types, TypeFlavor::Value) + } + WaveType::String => env.context.i8_type().as_basic_type_enum(), + other => match inner_expr { + // Preserve legacy behavior for lvalues like `deref visited[x]`. + Expression::IndexAccess { .. } | Expression::FieldAccess { .. } | Expression::Grouped(_) => { + wave_type_to_llvm_type(env.context, &other, env.struct_types, TypeFlavor::Value) + } + _ => panic!("deref expects pointer type, got {:?}", other), + }, } } diff --git a/llvm/src/expression/rvalue/structs.rs b/llvm/src/expression/rvalue/structs.rs index 7ad34a4c..6eb7895e 100644 --- a/llvm/src/expression/rvalue/structs.rs +++ b/llvm/src/expression/rvalue/structs.rs @@ -12,44 +12,8 @@ use super::ExprGenEnv; use inkwell::types::{BasicType, BasicTypeEnum}; use inkwell::values::{BasicValue, BasicValueEnum}; -use parser::ast::{Expression, WaveType}; -use crate::codegen::generate_address_ir; - -fn infer_struct_name<'ctx, 'a>(env: &ExprGenEnv<'ctx, 'a>, object: &Expression) -> Option { - match object { - Expression::Grouped(inner) => infer_struct_name(env, inner), - - Expression::Variable(name) => { - let vi = env.variables.get(name)?; - match &vi.ty { - WaveType::Struct(s) => Some(s.clone()), - WaveType::Pointer(inner) => match inner.as_ref() { - WaveType::Struct(s) => Some(s.clone()), - _ => None, - }, - _ => None, - } - } - - Expression::Deref(inner) => { - if let Expression::Variable(name) = inner.as_ref() { - let vi = env.variables.get(name)?; - match &vi.ty { - WaveType::Pointer(inner_ty) => match inner_ty.as_ref() { - WaveType::Struct(s) => Some(s.clone()), - _ => None, - }, - WaveType::String => None, - _ => None, - } - } else { - None - } - } - - _ => None, - } -} +use parser::ast::Expression; +use crate::codegen::generate_address_and_type_ir; pub(crate) fn gen_struct_literal<'ctx, 'a>( env: &mut ExprGenEnv<'ctx, 'a>, @@ -108,37 +72,12 @@ pub(crate) fn gen_field_access<'ctx, 'a>( object: &Expression, field: &str, ) -> BasicValueEnum<'ctx> { - let struct_name = infer_struct_name(env, object).unwrap_or_else(|| { - panic!( - "Cannot infer struct name for field access '.{}' from object expr: {:?}", - field, object - ) - }); - - let struct_ty = *env - .struct_types - .get(&struct_name) - .unwrap_or_else(|| panic!("Struct type '{}' not found", struct_name)); - - let field_indices = env - .struct_field_indices - .get(&struct_name) - .unwrap_or_else(|| panic!("Field index map for struct '{}' not found", struct_name)); - - let idx = *field_indices - .get(field) - .unwrap_or_else(|| panic!("Field '{}' not found in struct '{}'", field, struct_name)); - - let field_ty: BasicTypeEnum<'ctx> = struct_ty - .get_field_type_at_index(idx) - .unwrap_or_else(|| panic!("No field type at index {} for struct '{}'", idx, struct_name)); - let full = Expression::FieldAccess { object: Box::new(object.clone()), field: field.to_string(), }; - let ptr = generate_address_ir( + let (ptr, field_ty) = generate_address_and_type_ir( env.context, env.builder, &full, diff --git a/llvm/src/statement/assign.rs b/llvm/src/statement/assign.rs index 4e1e175b..7d33b684 100644 --- a/llvm/src/statement/assign.rs +++ b/llvm/src/statement/assign.rs @@ -10,7 +10,7 @@ // SPDX-License-Identifier: MPL-2.0 use crate::expression::rvalue::generate_expression_ir; -use crate::codegen::{generate_address_ir, wave_type_to_llvm_type, VariableInfo}; +use crate::codegen::{generate_address_and_type_ir, generate_address_ir, wave_type_to_llvm_type, VariableInfo}; use inkwell::module::Module; use inkwell::types::{AnyTypeEnum, BasicType, BasicTypeEnum, StructType}; use inkwell::values::{BasicValue, BasicValueEnum}; @@ -51,8 +51,17 @@ pub(super) fn gen_assign_ir<'ctx>( other => panic!("deref target is not a pointer/string: {:?}", other), } } - other => { - panic!("Unsupported deref assignment target: {:?}", other) + _ => { + let (_, ty) = generate_address_and_type_ir( + context, + builder, + inner_expr, + variables, + module, + struct_types, + struct_field_indices, + ); + ty } }; diff --git a/llvm/src/statement/control.rs b/llvm/src/statement/control.rs index 4b8294c0..446a8650 100644 --- a/llvm/src/statement/control.rs +++ b/llvm/src/statement/control.rs @@ -365,8 +365,16 @@ pub(super) fn gen_return_ir<'ctx>( panic!("Void function cannot return a value"); } - (Some(_), None) => { - panic!("Non-void function must return a value"); + (Some(ret_ty), None) => { + let is_i32_main = current_function.get_name().to_str().ok() == Some("main") + && matches!(ret_ty, inkwell::types::BasicTypeEnum::IntType(it) if it.get_bit_width() == 32); + + if is_i32_main { + let zero = context.i32_type().const_zero(); + builder.build_return(Some(&zero)).unwrap(); + } else { + panic!("Non-void function must return a value"); + } } (Some(ret_ty), Some(expr)) => { @@ -398,4 +406,4 @@ pub(super) fn gen_return_ir<'ctx>( builder.build_return(Some(&v)).unwrap(); } } -} \ No newline at end of file +} diff --git a/src/cli.rs b/src/cli.rs index 3834afe8..439f0040 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -338,7 +338,7 @@ pub fn print_help() { println!(" {:<18} {}", "--help".color("38,139,235"), "Show help"); println!("\nGlobal options (anywhere):"); - println!(" {:<22} {}", "-O0..-O3/-Oz/-Ofast".color("38,139,235"), "Optimization level"); + println!(" {:<22} {}", "-O0..-O3/-Os/-Oz/-Ofast".color("38,139,235"), "Optimization level"); println!(" {:<22} {}", "--debug-wave=...".color("38,139,235"), "tokens,ast,ir,mc,hex,all (comma ok)"); println!(" {:<22} {}", "--link=".color("38,139,235"), "Link library"); println!(" {:<22} {}", "-L / -L ".color("38,139,235"), "Library search path"); diff --git a/src/flags.rs b/src/flags.rs index 27f19a6f..0f4d8098 100644 --- a/src/flags.rs +++ b/src/flags.rs @@ -54,5 +54,5 @@ pub struct LinkFlags { } pub fn validate_opt_flag(flag: &str) -> bool { - matches!(flag, "-O0" | "-O1" | "-O2" | "-O3" | "-Oz" | "-Ofast") -} \ No newline at end of file + matches!(flag, "-O0" | "-O1" | "-O2" | "-O3" | "-Os" | "-Oz" | "-Ofast") +} diff --git a/utils/src/json.rs b/utils/src/json.rs index ccf33931..cad673bd 100644 --- a/utils/src/json.rs +++ b/utils/src/json.rs @@ -9,6 +9,9 @@ // // SPDX-License-Identifier: MPL-2.0 +use std::io; +use std::io::Write; + #[derive(Debug, Clone)] pub enum Json { Null, @@ -235,3 +238,90 @@ impl<'a> Parser<'a> { Ok(out) } } + +impl Json { + pub fn write_pretty_to(&self, mut w: W) -> io::Result<()> { + self.write_into(&mut w, true, 0) + } + + pub fn write_compact_to(&self, mut w: W) -> io::Result<()> { + self.write_into(&mut w, false, 0) + } + + fn write_into(&self, w: &mut W, pretty: bool, indent: usize) -> io::Result<()> { + match self { + Json::Null => write!(w, "null"), + Json::Bool(b) => write!(w, "{}", if *b { "true" } else { "false" }), + + Json::Num(n) => { + if n.is_finite() { + write!(w, "{}", n) + } else { + write!(w, "null") + } + } + + Json::Str(s) => write_json_string(w, s), + + Json::Arr(arr) => { + write!(w, "[")?; + if pretty && !arr.is_empty() { write!(w, "\n")?; } + + for (i, v) in arr.iter().enumerate() { + if pretty { write_indent(w, indent + 2)?; } + v.write_into(w, pretty, indent + 2)?; + + if i + 1 != arr.len() { write!(w, ",")?; } + if pretty { write!(w, "\n")?; } + } + + if pretty && !arr.is_empty() { write_indent(w, indent)?; } + write!(w, "]") + } + + Json::Obj(kv) => { + write!(w, "{{")?; + if pretty && !kv.is_empty() { write!(w, "\n")?; } + + for (i, (k, v)) in kv.iter().enumerate() { + if pretty { write_indent(w, indent + 2)?; } + write_json_string(w, k)?; + if pretty { write!(w, ": ")?; } else { write!(w, ":")?; } + v.write_into(w, pretty, indent + 2)?; + + if i + 1 != kv.len() { write!(w, ",")?; } + if pretty { write!(w, "\n")?; } + } + + if pretty && !kv.is_empty() { write_indent(w, indent)?; } + write!(w, "}}") + } + } + } +} + +fn write_indent(w: &mut W, n: usize) -> io::Result<()> { + for _ in 0..n { write!(w, " ")?; } + Ok(()) +} + +fn write_json_string(w: &mut W, s: &str) -> io::Result<()> { + write!(w, "\"")?; + for ch in s.chars() { + match ch { + '"' => write!(w, "\\\"")?, + '\\' => write!(w, "\\\\")?, + '\n' => write!(w, "\\n")?, + '\r' => write!(w, "\\r")?, + '\t' => write!(w, "\\t")?, + '\u{08}' => write!(w, "\\b")?, + '\u{0C}' => write!(w, "\\f")?, + c if (c as u32) < 0x20 => { + let v = c as u32; + write!(w, "\\u00{:02x}", v)?; + } + _ => write!(w, "{}", ch)?, + } + } + write!(w, "\"") +} \ No newline at end of file