<?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
    <channel>
        <title><![CDATA[Research — Mohammad Shaker]]></title>
        <description><![CDATA[Research articles by Mohammad Shaker]]></description>
        <link>https://mohammadshaker.com/en/blog/category/Research</link>
        <image>
            <url>https://mohammadshaker.com/opengraph-image</url>
            <title>Research — Mohammad Shaker</title>
            <link>https://mohammadshaker.com/en/blog/category/Research</link>
        </image>
        <generator>RSS for Node</generator>
        <lastBuildDate>Wed, 15 Apr 2026 10:20:23 GMT</lastBuildDate>
        <atom:link href="https://mohammadshaker.com/en/blog/category/Research/feed.xml" rel="self" type="application/rss+xml"/>
        <language><![CDATA[en-US]]></language>
        <item>
            <title><![CDATA[GAIA's S-Curve of Agent Effectiveness]]></title>
            <description><![CDATA[Agent effectiveness on the GAIA benchmark follows an S-curve. Plain LLMs plateau early because the failure mode is execution, not reasoning. Tool use drives the steep middle phase. The top systems now reach ~91–92% — matching human-level performance — but the remaining errors live in the long tail of edge cases.]]></description>
            <link>https://mohammadshaker.com/en/blog/gaia-s-curve-of-agent-effectiveness</link>
            <guid isPermaLink="true">https://mohammadshaker.com/en/blog/gaia-s-curve-of-agent-effectiveness</guid>
            <category><![CDATA[AI]]></category>
            <category><![CDATA[agents]]></category>
            <category><![CDATA[benchmarks]]></category>
            <dc:creator><![CDATA[Mohammad Shaker]]></dc:creator>
            <pubDate>Fri, 13 Feb 2026 00:00:00 GMT</pubDate>
        </item>
    </channel>
</rss>