@inbook{c4d874fe80cf4478afbf0f3438257ab5,
title = "Neurosymbolic Visual Reasoning with Scene Graphs and Multimodal LLMs",
abstract = "This chapter explores the advancements and challenges in achieving comprehensive scene understanding and visual reasoning through neurosymbolic integration and Multimodal Large Language Models (MLLMs). It begins by highlighting the limitations of basic vision tasks in extracting contextual and relational information from scenes, introducing scene graphs as a structured representation to bridge this gap. The chapter delves into Scene Graph Generation (SGG) methods, emphasising the importance of incorporating common sense knowledge from knowledge graphs to enhance the accuracy and expressiveness of scene graphs. The NeuSyRE framework is presented as a neurosymbolic approach for enriched scene graph generation and reasoning, demonstrating its effectiveness in downstream tasks such as image captioning and visual question answering. The chapter also examines the role of MLLMs in visual reasoning, discussing their architectures, performance on zero-shot tasks and challenges in handling fine-grained visual details. MARVEL, a novel benchmark for abstract visual reasoning, is introduced to evaluate the perceptual and reasoning capabilities of MLLMs. Insights from MARVEL highlight the limitations of current MLLMs in solving complex reasoning tasks and underscore the potential of neurosymbolic systems to address these challenges. The chapter concludes by emphasising the synergy between neurosymbolic approaches and MLLMs in advancing visual intelligence and achieving robust and explainable AI systems.",
author = "Filip Ilievski and Khan, \{M. Jaleed\} and Edward Curry",
year = "2025",
doi = "10.3233/FAIA250227",
language = "English",
isbn = "9781643685786",
series = "Frontiers in Artificial Intelligence and Applications",
publisher = "IOS Press",
pages = "689--711",
editor = "Pascal Hitzler and Abhilekha Dalal and Mahdavinejad, \{Mohammad Saeid\} and Norouzi, \{Sanaz Saki\}",
booktitle = "Handbook on Neurosymbolic AI and Knowledge Graphs",
address = "Netherlands",
}