<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Yiming Xu</title>
    <link>https://ym-xu.github.io/</link>
    <description>Recent content on Yiming Xu</description>
    <generator>Hugo</generator>
    <language>en</language>
    <lastBuildDate>Mon, 01 Jun 2026 00:00:00 +0000</lastBuildDate>
    <atom:link href="https://ym-xu.github.io/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Hierarchical Planner</title>
      <link>https://ym-xu.github.io/projects/hierarchical-planner/</link>
      <pubDate>Wed, 01 Jan 2025 00:00:00 +0000</pubDate>
      <guid>https://ym-xu.github.io/projects/hierarchical-planner/</guid>
      <description>A navigation-first planner that walks a document&amp;rsquo;s hierarchy instead of flat top-k retrieval, using layered embeddings as routing hints.</description>
    </item>
    <item>
      <title>E-commerce AI Agent</title>
      <link>https://ym-xu.github.io/projects/ecommerce-agent/</link>
      <pubDate>Mon, 01 Jan 2024 00:00:00 +0000</pubDate>
      <guid>https://ym-xu.github.io/projects/ecommerce-agent/</guid>
      <description>Flagship conversational shopping agent. Led development end-to-end; the project went on to raise close to USD 7M in funding.</description>
    </item>
    <item>
      <title>Multimodal RAG</title>
      <link>https://ym-xu.github.io/projects/multimodal-rag/</link>
      <pubDate>Sun, 01 Jan 2023 00:00:00 +0000</pubDate>
      <guid>https://ym-xu.github.io/projects/multimodal-rag/</guid>
      <description>Master&amp;rsquo;s thesis on retrieval-augmented generation across text and image, supervised by Benno Kruit and Jan-Christoph Kalo.</description>
    </item>
    <item>
      <title>Strategic Navigation or Stochastic Search? How Agents and Humans Reason Over Document Collections</title>
      <link>https://ym-xu.github.io/publications/icml-madqa/</link>
      <pubDate>Mon, 01 Jun 2026 00:00:00 +0000</pubDate>
      <guid>https://ym-xu.github.io/publications/icml-madqa/</guid>
      <description>MADQA — a multimodal agentic document-QA benchmark that scores full search trajectories, not just answers, and shows agents match human accuracy only by working ~5× harder.</description>
    </item>
    <item>
      <title>AdaNav: Query-Adaptive Multi-Granularity Navigation for Long Document Understanding</title>
      <link>https://ym-xu.github.io/publications/icdar-adanav/</link>
      <pubDate>Wed, 01 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://ym-xu.github.io/publications/icdar-adanav/</guid>
      <description>AdaNav builds a multimodal document tree and navigates it at query-adaptive granularity — no embedding retriever — beating open-source VLM agent systems by over 5% on MMLongBench-Doc while reading fewer pages.</description>
    </item>
    <item>
      <title>Planner design</title>
      <link>https://ym-xu.github.io/posts/2025/10/planner-design/</link>
      <pubDate>Tue, 07 Oct 2025 00:00:00 +0000</pubDate>
      <guid>https://ym-xu.github.io/posts/2025/10/planner-design/</guid>
      <description>Hierarchical + leaf embeddings for navigation, not naive top-k retrieval.</description>
    </item>
    <item>
      <title>Context engineering</title>
      <link>https://ym-xu.github.io/posts/2025/09/context-engineering/</link>
      <pubDate>Sun, 21 Sep 2025 00:00:00 +0000</pubDate>
      <guid>https://ym-xu.github.io/posts/2025/09/context-engineering/</guid>
      <description>Notes on shaping the context window as a first-class design surface.</description>
    </item>
    <item>
      <title>DocLens</title>
      <link>https://ym-xu.github.io/posts/2025/08/doclens/</link>
      <pubDate>Thu, 14 Aug 2025 00:00:00 +0000</pubDate>
      <guid>https://ym-xu.github.io/posts/2025/08/doclens/</guid>
      <description>A layout-aware observer for structured document understanding.</description>
    </item>
    <item>
      <title>Reading list</title>
      <link>https://ym-xu.github.io/posts/2025/07/reading-list/</link>
      <pubDate>Wed, 02 Jul 2025 00:00:00 +0000</pubDate>
      <guid>https://ym-xu.github.io/posts/2025/07/reading-list/</guid>
      <description>A running list of papers on multimodal retrieval and document AI.</description>
    </item>
    <item>
      <title>Retrieval-based Question Answering with Passage Expansion Using a Knowledge Graph</title>
      <link>https://ym-xu.github.io/publications/lrec-coling-2024-kg-qa/</link>
      <pubDate>Wed, 01 May 2024 00:00:00 +0000</pubDate>
      <guid>https://ym-xu.github.io/publications/lrec-coling-2024-kg-qa/</guid>
      <description>A multimodal retriever that combines knowledge-graph entity features with dense text retrieval, improving open-domain QA precision on rare, entity-centric questions where dense retrievers fall short.</description>
    </item>
    <item>
      <title>Fine-grained label learning via siamese network for cross-modal information retrieval</title>
      <link>https://ym-xu.github.io/publications/iccs-fglab/</link>
      <pubDate>Sat, 08 Jun 2019 00:00:00 +0000</pubDate>
      <guid>https://ym-xu.github.io/publications/iccs-fglab/</guid>
      <description>Fine-grained labels capture the &amp;ldquo;hardness&amp;rdquo; of text–image pairs; a siamese network and a weighted pairwise loss exploit them to improve cross-modal retrieval on three benchmarks.</description>
    </item>
  </channel>
</rss>
