diff --git a/sast-engine/graph/callgraph/resolution/c_types.go b/sast-engine/graph/callgraph/resolution/c_types.go new file mode 100644 index 00000000..752f17c6 --- /dev/null +++ b/sast-engine/graph/callgraph/resolution/c_types.go @@ -0,0 +1,282 @@ +package resolution + +import ( + "maps" + "sync" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" +) + +// declarationSource is the TypeInfo.Source value used for explicit types +// read directly from C/C++ declarations. It distinguishes types that the +// engine knows for certain (Confidence 1.0) from inferred or deduced +// types added in later phases. +const declarationSource = "declaration" + +// CVariableBinding captures the explicit type of a single variable +// declaration inside a C function. Multiple bindings may exist for the +// same name when the variable is reassigned; the latest binding wins +// during lookup. +// +// Example: +// +// int n = 0; // CVariableBinding{VarName:"n", Type: int} +// const char *msg = ""; // CVariableBinding{VarName:"msg", Type: const char*} +// +// Location reuses the package-level resolution.Location so call-site +// reporting and type tracking share one source-location vocabulary. +type CVariableBinding struct { + // VarName is the bare identifier of the declared variable. + VarName string + + // Type is the explicit type drawn from the source declaration. + // For C/C++, the engine sets Confidence=1.0 and Source="declaration" + // on every entry produced from an explicit type; the only exception + // is C++ `auto` (see CppTypeInferenceEngine.ExtractVariableType). + Type *core.TypeInfo + + // Location is the source location of the declaration. + Location Location +} + +// CFunctionScope tracks every variable declared inside one C function. +// Bindings are stored as a slice per name so later phases can audit +// reassignment history; GetVariable always returns the most recent one. +type CFunctionScope struct { + // FunctionFQN is the fully-qualified name of the owning function + // (e.g. "src/net/socket.c::handle_request"). + FunctionFQN string + + // Variables maps a bare variable name to every binding observed + // for it within this function. The latest binding is the last + // element of each slice. + Variables map[string][]*CVariableBinding +} + +// NewCFunctionScope returns an empty scope keyed to the given function +// FQN with its Variables map pre-allocated. +func NewCFunctionScope(functionFQN string) *CFunctionScope { + return &CFunctionScope{ + FunctionFQN: functionFQN, + Variables: make(map[string][]*CVariableBinding), + } +} + +// AddVariable appends binding to the per-name binding history. nil +// bindings are silently dropped so callers can write +// `scope.AddVariable(makeBinding(...))` without nil checks. +func (s *CFunctionScope) AddVariable(binding *CVariableBinding) { + if binding == nil || binding.VarName == "" { + return + } + s.Variables[binding.VarName] = append(s.Variables[binding.VarName], binding) +} + +// GetVariable returns the latest binding for varName, or nil when the +// variable is unknown to this scope. +func (s *CFunctionScope) GetVariable(varName string) *CVariableBinding { + bindings := s.Variables[varName] + if len(bindings) == 0 { + return nil + } + return bindings[len(bindings)-1] +} + +// HasVariable reports whether at least one binding exists for varName. +func (s *CFunctionScope) HasVariable(varName string) bool { + return len(s.Variables[varName]) > 0 +} + +// GetAllBindings returns every binding recorded for varName, in +// insertion order. Callers must not mutate the slice — return value +// is the live storage for performance. +func (s *CFunctionScope) GetAllBindings(varName string) []*CVariableBinding { + return s.Variables[varName] +} + +// CTypeInferenceEngine indexes explicit type information for a parsed +// C codebase: function return types and per-function variable scopes. +// +// The engine performs no inference, no propagation, and no flow +// analysis — every entry mirrors a type that appears verbatim in the +// source. Higher-confidence handlers (PR-07's call-graph builder) layer +// further analysis on top. +// +// Lifecycle: +// +// - Construct once with NewCTypeInferenceEngine(registry). +// - Populate from multiple goroutines during parallel Pass 2 +// extraction (`go test -race` clean). +// - Read-only consumption during call-graph construction. +// +// Embedding: CppTypeInferenceEngine embeds this type by value to inherit +// every method, so consumers can call ExtractReturnType, GetScope, etc. +// uniformly across both languages. +type CTypeInferenceEngine struct { + // Scopes maps function FQN to the variables declared inside it. + Scopes map[string]*CFunctionScope + + // ReturnTypes maps function FQN to its declared return type. void + // returns are intentionally absent — see ExtractReturnType. + ReturnTypes map[string]*core.TypeInfo + + // Registry exposes the C module registry for FQN resolution. The + // engine itself never mutates the registry. + Registry *core.CModuleRegistry + + scopeMutex sync.RWMutex + typeMutex sync.RWMutex +} + +// NewCTypeInferenceEngine returns an engine with allocated maps wired +// to the supplied registry. Passing a nil registry is permitted — +// the engine will simply produce no FQN-aware lookups, but type +// extraction still works (useful for unit tests). +func NewCTypeInferenceEngine(registry *core.CModuleRegistry) *CTypeInferenceEngine { + return &CTypeInferenceEngine{ + Scopes: make(map[string]*CFunctionScope), + ReturnTypes: make(map[string]*core.TypeInfo), + Registry: registry, + } +} + +// ============================================================================= +// Return type management +// ============================================================================= + +// ExtractReturnType records the explicit return type for the function +// identified by fqn. Empty types and the literal "void" are dropped: a +// void return carries no information for type-driven resolution and +// would only pollute downstream lookups. +// +// Safe for concurrent use. +func (e *CTypeInferenceEngine) ExtractReturnType(fqn, returnType string) { + if fqn == "" || returnType == "" || returnType == "void" { + return + } + info := &core.TypeInfo{ + TypeFQN: returnType, + Confidence: 1.0, + Source: declarationSource, + } + e.typeMutex.Lock() + e.ReturnTypes[fqn] = info + e.typeMutex.Unlock() +} + +// AddReturnType stores a precomputed TypeInfo for fqn. Useful when the +// caller has already classified a return type (e.g. through a future +// stdlib registry). Nil typeInfo is ignored. +func (e *CTypeInferenceEngine) AddReturnType(fqn string, typeInfo *core.TypeInfo) { + if fqn == "" || typeInfo == nil { + return + } + e.typeMutex.Lock() + e.ReturnTypes[fqn] = typeInfo + e.typeMutex.Unlock() +} + +// GetReturnType returns the recorded return type for fqn, or nil when +// none was registered (which includes void functions). +func (e *CTypeInferenceEngine) GetReturnType(fqn string) *core.TypeInfo { + e.typeMutex.RLock() + defer e.typeMutex.RUnlock() + return e.ReturnTypes[fqn] +} + +// HasReturnType reports whether a return type has been recorded for fqn. +func (e *CTypeInferenceEngine) HasReturnType(fqn string) bool { + e.typeMutex.RLock() + defer e.typeMutex.RUnlock() + _, ok := e.ReturnTypes[fqn] + return ok +} + +// GetAllReturnTypes returns a snapshot copy of every registered return +// type. The copy keeps the caller insulated from concurrent writes. +func (e *CTypeInferenceEngine) GetAllReturnTypes() map[string]*core.TypeInfo { + e.typeMutex.RLock() + defer e.typeMutex.RUnlock() + out := make(map[string]*core.TypeInfo, len(e.ReturnTypes)) + maps.Copy(out, e.ReturnTypes) + return out +} + +// ============================================================================= +// Scope and variable management +// ============================================================================= + +// ExtractVariableType registers an explicit variable declaration inside +// functionFQN. Empty arguments are silently dropped so callers do not +// need to pre-validate parser output. +// +// Safe for concurrent use. The function lazily creates the scope on +// first sight of functionFQN, so callers do not have to call AddScope +// before the first variable. +func (e *CTypeInferenceEngine) ExtractVariableType(functionFQN, varName, typeStr string, loc Location) { + if functionFQN == "" || varName == "" || typeStr == "" { + return + } + binding := &CVariableBinding{ + VarName: varName, + Type: &core.TypeInfo{ + TypeFQN: typeStr, + Confidence: 1.0, + Source: declarationSource, + }, + Location: loc, + } + e.appendBinding(functionFQN, binding) +} + +// AddScope replaces (or installs) a complete scope for a function. Used +// by tests or by callers that want to batch-build a scope before +// publishing it to the engine. Nil scopes are ignored. +func (e *CTypeInferenceEngine) AddScope(scope *CFunctionScope) { + if scope == nil || scope.FunctionFQN == "" { + return + } + e.scopeMutex.Lock() + e.Scopes[scope.FunctionFQN] = scope + e.scopeMutex.Unlock() +} + +// GetScope returns the scope for functionFQN, or nil if none exists. +func (e *CTypeInferenceEngine) GetScope(functionFQN string) *CFunctionScope { + e.scopeMutex.RLock() + defer e.scopeMutex.RUnlock() + return e.Scopes[functionFQN] +} + +// HasScope reports whether a scope exists for functionFQN. +func (e *CTypeInferenceEngine) HasScope(functionFQN string) bool { + e.scopeMutex.RLock() + defer e.scopeMutex.RUnlock() + _, ok := e.Scopes[functionFQN] + return ok +} + +// GetAllScopes returns a snapshot copy of every registered scope. +func (e *CTypeInferenceEngine) GetAllScopes() map[string]*CFunctionScope { + e.scopeMutex.RLock() + defer e.scopeMutex.RUnlock() + out := make(map[string]*CFunctionScope, len(e.Scopes)) + maps.Copy(out, e.Scopes) + return out +} + +// appendBinding installs binding inside the scope keyed by functionFQN, +// creating the scope on demand. The mutex protects the map mutation +// only — the per-scope slice is appended after re-acquiring the lock, +// so concurrent ExtractVariableType calls on the same function are +// serialised through this single lock. +func (e *CTypeInferenceEngine) appendBinding(functionFQN string, binding *CVariableBinding) { + e.scopeMutex.Lock() + defer e.scopeMutex.Unlock() + scope, ok := e.Scopes[functionFQN] + if !ok { + scope = NewCFunctionScope(functionFQN) + e.Scopes[functionFQN] = scope + } + scope.AddVariable(binding) +} diff --git a/sast-engine/graph/callgraph/resolution/c_types_test.go b/sast-engine/graph/callgraph/resolution/c_types_test.go new file mode 100644 index 00000000..b392a13e --- /dev/null +++ b/sast-engine/graph/callgraph/resolution/c_types_test.go @@ -0,0 +1,252 @@ +package resolution_test + +import ( + "strconv" + "sync" + "testing" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/resolution" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewCTypeInferenceEngine_AllocatesMaps(t *testing.T) { + registry := core.NewCModuleRegistry("/projects/myapp") + engine := resolution.NewCTypeInferenceEngine(registry) + + require.NotNil(t, engine) + assert.Same(t, registry, engine.Registry, "registry pointer must round-trip") + assert.NotNil(t, engine.Scopes) + assert.NotNil(t, engine.ReturnTypes) + + // nil-registry construction is permitted (used by tests + future + // callers that want type-only extraction without FQN context). + nilEngine := resolution.NewCTypeInferenceEngine(nil) + require.NotNil(t, nilEngine) + assert.Nil(t, nilEngine.Registry) +} + +func TestCTypeInferenceEngine_ExtractReturnType(t *testing.T) { + engine := resolution.NewCTypeInferenceEngine(nil) + fqn := "src/main.c::compute" + + engine.ExtractReturnType(fqn, "int") + + got := engine.GetReturnType(fqn) + require.NotNil(t, got) + assert.Equal(t, "int", got.TypeFQN) + assert.InDelta(t, 1.0, got.Confidence, 1e-6) + assert.Equal(t, "declaration", got.Source) + assert.True(t, engine.HasReturnType(fqn)) +} + +func TestCTypeInferenceEngine_ExtractReturnType_VoidIsDropped(t *testing.T) { + engine := resolution.NewCTypeInferenceEngine(nil) + fqn := "src/main.c::do_nothing" + + engine.ExtractReturnType(fqn, "void") + + assert.Nil(t, engine.GetReturnType(fqn), "void must not be stored") + assert.False(t, engine.HasReturnType(fqn)) + + // Empty arguments are also no-ops. + engine.ExtractReturnType("", "int") + engine.ExtractReturnType(fqn, "") + assert.False(t, engine.HasReturnType(fqn)) +} + +func TestCTypeInferenceEngine_AddReturnType_PreservesProvidedTypeInfo(t *testing.T) { + engine := resolution.NewCTypeInferenceEngine(nil) + fqn := "src/main.c::adopt" + + custom := &core.TypeInfo{TypeFQN: "Buffer*", Confidence: 0.7, Source: "declaration"} + engine.AddReturnType(fqn, custom) + + got := engine.GetReturnType(fqn) + require.NotNil(t, got) + assert.Equal(t, "Buffer*", got.TypeFQN) + assert.InDelta(t, 0.7, got.Confidence, 1e-6) + assert.Equal(t, "declaration", got.Source) + + // nil typeInfo and empty fqn are silent no-ops. + engine.AddReturnType(fqn, nil) + engine.AddReturnType("", custom) + assert.Same(t, custom, engine.GetReturnType(fqn), "existing entry must not be overwritten by nil/empty calls") +} + +func TestCTypeInferenceEngine_GetAllReturnTypes_ReturnsCopy(t *testing.T) { + engine := resolution.NewCTypeInferenceEngine(nil) + engine.ExtractReturnType("src/a.c::a", "int") + engine.ExtractReturnType("src/b.c::b", "char*") + + all := engine.GetAllReturnTypes() + require.Len(t, all, 2) + + // Mutating the snapshot does not affect engine state. + delete(all, "src/a.c::a") + assert.Len(t, engine.GetAllReturnTypes(), 2, "snapshot must be a copy") +} + +func TestCTypeInferenceEngine_ExtractVariableType(t *testing.T) { + engine := resolution.NewCTypeInferenceEngine(nil) + fqn := "src/main.c::main" + + loc := resolution.Location{File: "/projects/myapp/src/main.c", Line: 10, Column: 5} + engine.ExtractVariableType(fqn, "buf", "char*", loc) + + scope := engine.GetScope(fqn) + require.NotNil(t, scope) + assert.Equal(t, fqn, scope.FunctionFQN) + assert.True(t, scope.HasVariable("buf")) + + binding := scope.GetVariable("buf") + require.NotNil(t, binding) + assert.Equal(t, "buf", binding.VarName) + require.NotNil(t, binding.Type) + assert.Equal(t, "char*", binding.Type.TypeFQN) + assert.InDelta(t, 1.0, binding.Type.Confidence, 1e-6) + assert.Equal(t, "declaration", binding.Type.Source) + assert.Equal(t, loc, binding.Location) +} + +func TestCTypeInferenceEngine_ExtractVariableType_LatestWins(t *testing.T) { + engine := resolution.NewCTypeInferenceEngine(nil) + fqn := "src/main.c::main" + + engine.ExtractVariableType(fqn, "n", "int", resolution.Location{Line: 1}) + engine.ExtractVariableType(fqn, "n", "long", resolution.Location{Line: 5}) + + binding := engine.GetScope(fqn).GetVariable("n") + require.NotNil(t, binding) + assert.Equal(t, "long", binding.Type.TypeFQN, "GetVariable must return the most recent binding") + + all := engine.GetScope(fqn).GetAllBindings("n") + require.Len(t, all, 2) + assert.Equal(t, "int", all[0].Type.TypeFQN) + assert.Equal(t, "long", all[1].Type.TypeFQN) +} + +func TestCTypeInferenceEngine_ExtractVariableType_DropsEmptyInputs(t *testing.T) { + engine := resolution.NewCTypeInferenceEngine(nil) + loc := resolution.Location{} + + engine.ExtractVariableType("", "x", "int", loc) + engine.ExtractVariableType("src/m.c::m", "", "int", loc) + engine.ExtractVariableType("src/m.c::m", "x", "", loc) + + assert.Nil(t, engine.GetScope("src/m.c::m"), "no scope should be created from empty inputs") +} + +func TestCTypeInferenceEngine_GetScope_Miss(t *testing.T) { + engine := resolution.NewCTypeInferenceEngine(nil) + assert.Nil(t, engine.GetScope("nonexistent")) + assert.False(t, engine.HasScope("nonexistent")) +} + +func TestCTypeInferenceEngine_AddScope_StandaloneInsertion(t *testing.T) { + engine := resolution.NewCTypeInferenceEngine(nil) + + scope := resolution.NewCFunctionScope("src/x.c::f") + scope.AddVariable(&resolution.CVariableBinding{ + VarName: "p", + Type: &core.TypeInfo{TypeFQN: "int*", Confidence: 1.0, Source: "declaration"}, + }) + engine.AddScope(scope) + + got := engine.GetScope("src/x.c::f") + require.NotNil(t, got) + assert.Equal(t, "int*", got.GetVariable("p").Type.TypeFQN) + + // nil scopes and empty FQNs are dropped. + engine.AddScope(nil) + engine.AddScope(resolution.NewCFunctionScope("")) + assert.Len(t, engine.GetAllScopes(), 1) +} + +func TestCFunctionScope_DefensiveAdd(t *testing.T) { + scope := resolution.NewCFunctionScope("src/x.c::f") + + // nil binding is silently dropped. + scope.AddVariable(nil) + // Empty VarName is silently dropped. + scope.AddVariable(&resolution.CVariableBinding{VarName: ""}) + + assert.Empty(t, scope.Variables) + assert.Nil(t, scope.GetVariable("anything")) + assert.Empty(t, scope.GetAllBindings("anything")) +} + +// TestCTypeInferenceEngine_ConcurrentAccess intentionally mixes reads +// and writes from multiple goroutines so `go test -race` exercises the +// internal RWMutex pair. Failures here indicate a missing lock, a +// double-Unlock, or a map mutation outside the critical section. +func TestCTypeInferenceEngine_ConcurrentAccess(t *testing.T) { + engine := resolution.NewCTypeInferenceEngine(nil) + const goroutines = 16 + const opsPerGoroutine = 200 + + var wg sync.WaitGroup + wg.Add(goroutines * 2) + + // Writers: each goroutine writes a unique slice of FQNs. + for g := range goroutines { + go func(seed int) { + defer wg.Done() + for i := range opsPerGoroutine { + fqn := "src/c" + strconv.Itoa(seed) + ".c::fn" + strconv.Itoa(i) + engine.ExtractReturnType(fqn, "int") + engine.ExtractVariableType(fqn, "x", "int", resolution.Location{Line: uint32(i)}) + } + }(g) + } + // Readers: hammer the snapshot accessors and lookup methods. + for range goroutines { + go func() { + defer wg.Done() + for range opsPerGoroutine { + _ = engine.GetAllReturnTypes() + _ = engine.GetAllScopes() + _ = engine.GetReturnType("src/c0.c::fn0") + if scope := engine.GetScope("src/c0.c::fn0"); scope != nil { + _ = scope.GetVariable("x") + } + } + }() + } + wg.Wait() + + assert.Len(t, engine.GetAllReturnTypes(), goroutines*opsPerGoroutine) + assert.Len(t, engine.GetAllScopes(), goroutines*opsPerGoroutine) +} + +// TestCTypeInferenceEngine_ComplexCTypes verifies the engine preserves +// the exact type string the parser produced — pointer modifiers, const +// qualifiers, multi-word types, and tag-prefixed struct references must +// all round-trip without normalisation. +func TestCTypeInferenceEngine_ComplexCTypes(t *testing.T) { + engine := resolution.NewCTypeInferenceEngine(nil) + fqn := "src/main.c::work" + loc := resolution.Location{} + + cases := []struct { + name string + typ string + }{ + {"const_pointer", "const char*"}, + {"unsigned_long_long", "unsigned long long"}, + {"struct_pointer", "struct Buffer*"}, + {"void_pointer", "void*"}, + {"size_t", "size_t"}, + {"function_pointer", "int (*)(int, int)"}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + engine.ExtractVariableType(fqn, tc.name, tc.typ, loc) + binding := engine.GetScope(fqn).GetVariable(tc.name) + require.NotNil(t, binding) + assert.Equal(t, tc.typ, binding.Type.TypeFQN, "complex type must round-trip verbatim") + }) + } +} diff --git a/sast-engine/graph/callgraph/resolution/cpp_types.go b/sast-engine/graph/callgraph/resolution/cpp_types.go new file mode 100644 index 00000000..8359acbb --- /dev/null +++ b/sast-engine/graph/callgraph/resolution/cpp_types.go @@ -0,0 +1,205 @@ +package resolution + +import ( + "sync" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" +) + +// autoTypeName is the literal C++ keyword for placeholder types (`auto x = ...`). +// Detected exactly — qualified forms such as `auto*` or `auto&` are kept +// as-is because the parser already strips the keyword from those. +const autoTypeName = "auto" + +// autoSource is the TypeInfo.Source value used for unresolved `auto` +// declarations. Distinct from declarationSource so resolvers can skip +// auto bindings until Phase 2 deduces a concrete type. +const autoSource = "unresolved_auto" + +// CppTypeInferenceEngine extends CTypeInferenceEngine with C++ class +// member tracking. By embedding the C engine it inherits every +// scope- and return-type method, so callers can use a single engine to +// resolve both C-style functions and C++ classes. +// +// In addition to the C-level data, it indexes: +// +// - Method return types per class — used by call-graph resolution to +// compute the type of `obj.method()` once the receiver type is +// known. +// - Field types per class — used by call-graph resolution when a +// method is invoked via a member like `this->buffer.write(...)`. +// +// The maps are keyed by bare class name (e.g. "Socket") rather than +// fully-qualified class FQN; that mirrors how the parser emits class +// declarations and keeps lookups fast on hot paths. Callers requiring +// disambiguation across namespaces should pass FQNs explicitly to +// RegisterClassMethod. +type CppTypeInferenceEngine struct { + // CTypeInferenceEngine provides function- and variable-level + // indexing. Embedded by value so methods like ExtractReturnType, + // GetScope, and GetVariable resolve uniformly through the C++ engine. + CTypeInferenceEngine + + // CppRegistry is the C++-aware module registry. The embedded C + // engine holds a pointer to its CModuleRegistry for the C-only + // lookups; CppRegistry preserves access to NamespaceIndex and + // ClassIndex without forcing callers to type-assert. + CppRegistry *core.CppModuleRegistry + + // ClassMethods maps className -> methodName -> return type. nil + // outer entries are created lazily on first registration. + ClassMethods map[string]map[string]*core.TypeInfo + + // ClassFields maps className -> fieldName -> field type. Same + // lazy-allocation contract as ClassMethods. + ClassFields map[string]map[string]*core.TypeInfo + + classMethodMutex sync.RWMutex + classFieldMutex sync.RWMutex +} + +// NewCppTypeInferenceEngine constructs an engine wired to a C++ module +// registry. The embedded C engine is bound to the same root by +// reference (it borrows registry's CModuleRegistry), so any field +// added to the registry post-construction is visible to both. +// +// A nil registry is permitted; the engine still functions for tests +// and isolated extraction. +func NewCppTypeInferenceEngine(registry *core.CppModuleRegistry) *CppTypeInferenceEngine { + var cReg *core.CModuleRegistry + if registry != nil { + cReg = ®istry.CModuleRegistry + } + return &CppTypeInferenceEngine{ + CTypeInferenceEngine: *NewCTypeInferenceEngine(cReg), + CppRegistry: registry, + ClassMethods: make(map[string]map[string]*core.TypeInfo), + ClassFields: make(map[string]map[string]*core.TypeInfo), + } +} + +// ============================================================================= +// auto handling +// ============================================================================= + +// ExtractVariableType overrides the embedded C engine's behaviour to +// recognise the C++ `auto` placeholder. Auto declarations are recorded +// with Confidence=0 and Source="unresolved_auto" so later inference +// phases can find and refine them; resolvers gate on Confidence>=1.0 +// for explicit-only resolution and skip these. +// +// All non-auto types delegate to the C engine for identical handling. +func (e *CppTypeInferenceEngine) ExtractVariableType(functionFQN, varName, typeStr string, loc Location) { + if functionFQN == "" || varName == "" || typeStr == "" { + return + } + if typeStr != autoTypeName { + e.CTypeInferenceEngine.ExtractVariableType(functionFQN, varName, typeStr, loc) + return + } + binding := &CVariableBinding{ + VarName: varName, + Type: &core.TypeInfo{ + TypeFQN: autoTypeName, + Confidence: 0.0, + Source: autoSource, + }, + Location: loc, + } + e.appendBinding(functionFQN, binding) +} + +// ============================================================================= +// Class method registration +// ============================================================================= + +// RegisterClassMethod records the explicit return type of methodName on +// className. Empty arguments are silently dropped. Calling the function +// twice for the same key replaces the previous entry — the most recent +// declaration wins, mirroring C++ overload behaviour where redeclarations +// must agree. +// +// Safe for concurrent use. +func (e *CppTypeInferenceEngine) RegisterClassMethod(className, methodName, returnType string) { + if className == "" || methodName == "" || returnType == "" || returnType == "void" { + return + } + info := &core.TypeInfo{ + TypeFQN: returnType, + Confidence: 1.0, + Source: declarationSource, + } + e.classMethodMutex.Lock() + defer e.classMethodMutex.Unlock() + methods, ok := e.ClassMethods[className] + if !ok { + methods = make(map[string]*core.TypeInfo) + e.ClassMethods[className] = methods + } + methods[methodName] = info +} + +// GetMethodReturnType looks up the recorded return type of methodName +// on className. Returns nil when the class is unknown or the method is +// unregistered (including void methods, which are intentionally not +// stored). +func (e *CppTypeInferenceEngine) GetMethodReturnType(className, methodName string) *core.TypeInfo { + e.classMethodMutex.RLock() + defer e.classMethodMutex.RUnlock() + if methods, ok := e.ClassMethods[className]; ok { + return methods[methodName] + } + return nil +} + +// HasClassMethod reports whether a method type has been registered for +// className/methodName. +func (e *CppTypeInferenceEngine) HasClassMethod(className, methodName string) bool { + return e.GetMethodReturnType(className, methodName) != nil +} + +// ============================================================================= +// Class field registration +// ============================================================================= + +// RegisterClassField records the explicit type of fieldName on +// className. Empty arguments are silently dropped. Like +// RegisterClassMethod, repeated calls overwrite — duplicate field +// declarations should never happen in well-formed C++. +// +// Safe for concurrent use. +func (e *CppTypeInferenceEngine) RegisterClassField(className, fieldName, typeStr string) { + if className == "" || fieldName == "" || typeStr == "" { + return + } + info := &core.TypeInfo{ + TypeFQN: typeStr, + Confidence: 1.0, + Source: declarationSource, + } + e.classFieldMutex.Lock() + defer e.classFieldMutex.Unlock() + fields, ok := e.ClassFields[className] + if !ok { + fields = make(map[string]*core.TypeInfo) + e.ClassFields[className] = fields + } + fields[fieldName] = info +} + +// GetFieldType looks up the recorded type of fieldName on className. +// Returns nil when the class is unknown or the field is unregistered. +func (e *CppTypeInferenceEngine) GetFieldType(className, fieldName string) *core.TypeInfo { + e.classFieldMutex.RLock() + defer e.classFieldMutex.RUnlock() + if fields, ok := e.ClassFields[className]; ok { + return fields[fieldName] + } + return nil +} + +// HasClassField reports whether a field type has been registered for +// className/fieldName. +func (e *CppTypeInferenceEngine) HasClassField(className, fieldName string) bool { + return e.GetFieldType(className, fieldName) != nil +} diff --git a/sast-engine/graph/callgraph/resolution/cpp_types_test.go b/sast-engine/graph/callgraph/resolution/cpp_types_test.go new file mode 100644 index 00000000..5b54c266 --- /dev/null +++ b/sast-engine/graph/callgraph/resolution/cpp_types_test.go @@ -0,0 +1,250 @@ +package resolution_test + +import ( + "strconv" + "sync" + "testing" + + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/core" + "github.com/shivasurya/code-pathfinder/sast-engine/graph/callgraph/resolution" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewCppTypeInferenceEngine_AllocatesMaps(t *testing.T) { + cppRegistry := core.NewCppModuleRegistry("/projects/cppapp") + engine := resolution.NewCppTypeInferenceEngine(cppRegistry) + + require.NotNil(t, engine) + assert.Same(t, cppRegistry, engine.CppRegistry, "C++ registry must round-trip") + require.NotNil(t, engine.Registry, "embedded C registry must point to the embedded CModuleRegistry") + assert.Same(t, &cppRegistry.CModuleRegistry, engine.Registry, "embedded registry must alias the C++ registry's C facet") + assert.NotNil(t, engine.ClassMethods) + assert.NotNil(t, engine.ClassFields) + + // nil registry construction does not panic. + nilEngine := resolution.NewCppTypeInferenceEngine(nil) + require.NotNil(t, nilEngine) + assert.Nil(t, nilEngine.CppRegistry) + assert.Nil(t, nilEngine.Registry) +} + +// TestCppTypeInferenceEngine_EmbeddedCMethods exercises the spec's +// "embedded engine" requirement: every C-engine method must be callable +// directly on the C++ engine without re-implementation. +func TestCppTypeInferenceEngine_EmbeddedCMethods(t *testing.T) { + engine := resolution.NewCppTypeInferenceEngine(nil) + fqn := "src/main.cpp::main" + + engine.ExtractReturnType(fqn, "int") + got := engine.GetReturnType(fqn) + require.NotNil(t, got) + assert.Equal(t, "int", got.TypeFQN) + assert.True(t, engine.HasReturnType(fqn)) + + engine.ExtractVariableType(fqn, "n", "int", resolution.Location{Line: 3}) + scope := engine.GetScope(fqn) + require.NotNil(t, scope) + assert.Equal(t, "int", scope.GetVariable("n").Type.TypeFQN) +} + +func TestCppTypeInferenceEngine_AutoStoresWithZeroConfidence(t *testing.T) { + engine := resolution.NewCppTypeInferenceEngine(nil) + fqn := "src/main.cpp::main" + loc := resolution.Location{Line: 7} + + engine.ExtractVariableType(fqn, "x", "auto", loc) + + scope := engine.GetScope(fqn) + require.NotNil(t, scope) + binding := scope.GetVariable("x") + require.NotNil(t, binding) + assert.Equal(t, "auto", binding.Type.TypeFQN) + assert.InDelta(t, 0.0, binding.Type.Confidence, 1e-6) + assert.Equal(t, "unresolved_auto", binding.Type.Source) + assert.Equal(t, loc, binding.Location) +} + +// TestCppTypeInferenceEngine_AutoExactMatch verifies that the auto +// detection uses an exact equality on the type string. Modifiers like +// `auto*` and `auto&` must NOT trigger the unresolved branch — those +// are concrete types in their own right (modifying a deduced type). +// TestCppTypeInferenceEngine_ExtractVariableType_DropsEmptyInputs +// guards the early-return branch that mirrors the C engine's contract. +// Empty FQN, var name, or type string must not produce a binding. +func TestCppTypeInferenceEngine_ExtractVariableType_DropsEmptyInputs(t *testing.T) { + engine := resolution.NewCppTypeInferenceEngine(nil) + loc := resolution.Location{} + + engine.ExtractVariableType("", "x", "auto", loc) + engine.ExtractVariableType("src/m.cpp::m", "", "auto", loc) + engine.ExtractVariableType("src/m.cpp::m", "x", "", loc) + + assert.Nil(t, engine.GetScope("src/m.cpp::m")) +} + +func TestCppTypeInferenceEngine_AutoExactMatch(t *testing.T) { + engine := resolution.NewCppTypeInferenceEngine(nil) + fqn := "src/main.cpp::main" + + engine.ExtractVariableType(fqn, "p", "auto*", resolution.Location{}) + engine.ExtractVariableType(fqn, "r", "auto&", resolution.Location{}) + + pBinding := engine.GetScope(fqn).GetVariable("p") + require.NotNil(t, pBinding) + assert.Equal(t, "auto*", pBinding.Type.TypeFQN) + assert.InDelta(t, 1.0, pBinding.Type.Confidence, 1e-6, "auto* is concrete, must keep full confidence") + assert.Equal(t, "declaration", pBinding.Type.Source) + + rBinding := engine.GetScope(fqn).GetVariable("r") + require.NotNil(t, rBinding) + assert.InDelta(t, 1.0, rBinding.Type.Confidence, 1e-6) +} + +func TestCppTypeInferenceEngine_RegisterClassMethod(t *testing.T) { + engine := resolution.NewCppTypeInferenceEngine(nil) + + engine.RegisterClassMethod("Socket", "connect", "bool") + + info := engine.GetMethodReturnType("Socket", "connect") + require.NotNil(t, info) + assert.Equal(t, "bool", info.TypeFQN) + assert.InDelta(t, 1.0, info.Confidence, 1e-6) + assert.Equal(t, "declaration", info.Source) + assert.True(t, engine.HasClassMethod("Socket", "connect")) +} + +func TestCppTypeInferenceEngine_RegisterClassMethod_DropsVoidAndEmpty(t *testing.T) { + engine := resolution.NewCppTypeInferenceEngine(nil) + + engine.RegisterClassMethod("Socket", "init", "void") + engine.RegisterClassMethod("", "connect", "bool") + engine.RegisterClassMethod("Socket", "", "bool") + engine.RegisterClassMethod("Socket", "connect", "") + + assert.Nil(t, engine.GetMethodReturnType("Socket", "init"), "void return must not be stored") + assert.Nil(t, engine.GetMethodReturnType("Socket", "connect"), "incomplete inputs must not register anything") + assert.False(t, engine.HasClassMethod("Socket", "init")) + assert.False(t, engine.HasClassMethod("Other", "x")) +} + +func TestCppTypeInferenceEngine_RegisterClassMethod_RedeclarationOverwrites(t *testing.T) { + engine := resolution.NewCppTypeInferenceEngine(nil) + + engine.RegisterClassMethod("Socket", "connect", "bool") + engine.RegisterClassMethod("Socket", "connect", "Status") + + info := engine.GetMethodReturnType("Socket", "connect") + require.NotNil(t, info) + assert.Equal(t, "Status", info.TypeFQN, "the most recent registration must win") +} + +func TestCppTypeInferenceEngine_RegisterClassField(t *testing.T) { + engine := resolution.NewCppTypeInferenceEngine(nil) + + engine.RegisterClassField("Socket", "port", "int") + engine.RegisterClassField("Socket", "name", "std::string") + + port := engine.GetFieldType("Socket", "port") + require.NotNil(t, port) + assert.Equal(t, "int", port.TypeFQN) + assert.InDelta(t, 1.0, port.Confidence, 1e-6) + assert.Equal(t, "declaration", port.Source) + assert.True(t, engine.HasClassField("Socket", "port")) + assert.True(t, engine.HasClassField("Socket", "name")) + assert.False(t, engine.HasClassField("Socket", "missing")) + assert.False(t, engine.HasClassField("Other", "port")) +} + +func TestCppTypeInferenceEngine_RegisterClassField_DropsEmpty(t *testing.T) { + engine := resolution.NewCppTypeInferenceEngine(nil) + + engine.RegisterClassField("", "port", "int") + engine.RegisterClassField("Socket", "", "int") + engine.RegisterClassField("Socket", "port", "") + + assert.Nil(t, engine.GetFieldType("Socket", "port")) +} + +// TestCppTypeInferenceEngine_ComplexCppTypes verifies template +// instantiations, references, and multi-argument templates round-trip +// verbatim through every registration path. +func TestCppTypeInferenceEngine_ComplexCppTypes(t *testing.T) { + engine := resolution.NewCppTypeInferenceEngine(nil) + fqn := "src/main.cpp::process" + + cases := []struct { + name string + typ string + }{ + {"vector_int", "std::vector"}, + {"const_string_ref", "const std::string&"}, + {"map_string_int", "std::map"}, + {"unique_ptr", "std::unique_ptr"}, + {"nested_template", "std::vector>"}, + } + for _, tc := range cases { + engine.ExtractVariableType(fqn, tc.name, tc.typ, resolution.Location{}) + engine.RegisterClassMethod("Manager", "make_"+tc.name, tc.typ) + engine.RegisterClassField("Manager", "field_"+tc.name, tc.typ) + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + binding := engine.GetScope(fqn).GetVariable(tc.name) + require.NotNil(t, binding) + assert.Equal(t, tc.typ, binding.Type.TypeFQN) + + method := engine.GetMethodReturnType("Manager", "make_"+tc.name) + require.NotNil(t, method) + assert.Equal(t, tc.typ, method.TypeFQN) + + field := engine.GetFieldType("Manager", "field_"+tc.name) + require.NotNil(t, field) + assert.Equal(t, tc.typ, field.TypeFQN) + }) + } +} + +// TestCppTypeInferenceEngine_ConcurrentAccess covers the C++-only +// classMethodMutex / classFieldMutex pair. The C-level mutexes are +// already exercised by TestCTypeInferenceEngine_ConcurrentAccess, so +// this test focuses on the new locks introduced by the C++ engine. +func TestCppTypeInferenceEngine_ConcurrentAccess(t *testing.T) { + engine := resolution.NewCppTypeInferenceEngine(nil) + const goroutines = 16 + const opsPerGoroutine = 200 + + var wg sync.WaitGroup + wg.Add(goroutines * 2) + + for g := range goroutines { + go func(seed int) { + defer wg.Done() + for i := range opsPerGoroutine { + className := "Class" + strconv.Itoa(seed) + name := "m" + strconv.Itoa(i) + engine.RegisterClassMethod(className, name, "int") + engine.RegisterClassField(className, "f"+name, "int") + } + }(g) + } + for range goroutines { + go func() { + defer wg.Done() + for range opsPerGoroutine { + _ = engine.GetMethodReturnType("Class0", "m0") + _ = engine.GetFieldType("Class0", "fm0") + _ = engine.HasClassMethod("Class0", "m0") + _ = engine.HasClassField("Class0", "fm0") + } + }() + } + wg.Wait() + + for g := range goroutines { + className := "Class" + strconv.Itoa(g) + assert.True(t, engine.HasClassMethod(className, "m0")) + assert.True(t, engine.HasClassField(className, "fm0")) + } +}