From 421a4c2bf42566ddcb185e6f8cac4ca46d79e8fd Mon Sep 17 00:00:00 2001 From: Sam Harshe Date: Wed, 27 May 2026 02:46:37 +0000 Subject: [PATCH] refactor(quotes): grouped sections, drop italics, InferenceX brand cleanup Refactors the /quotes page to mirror the ClusterMAX layout: - Group quotes into 6 sections (AI Industry Leaders, AI Companies and Startups, Infrastructure Providers, Inference Engines and Frameworks, Researchers and Academia, Community and Content Creators) rendered as separate cards with section headers - Move Jensen Huang's quote to the top - Replace 'InferenceMAX' with 'InferenceX' in quote prose - Strip superscripted trademark symbols from inside quotes (intro card also drops the inline TM) - Remove italic styling on quote text; use foreground color for better readability - Render spans in author title field (e.g. Stas Bekman's book title) Adds 'group' field + QuoteGroup type + QUOTE_GROUPS to quotes-data.ts. Existing QUOTES, CAROUSEL_ORGS, and CAROUSEL_LABELS exports remain compatible with intro-section.tsx and faq-data.ts consumers. --- .../src/components/quotes/quotes-content.tsx | 84 ++-- .../app/src/components/quotes/quotes-data.ts | 397 ++++++++++-------- 2 files changed, 273 insertions(+), 208 deletions(-) diff --git a/packages/app/src/components/quotes/quotes-content.tsx b/packages/app/src/components/quotes/quotes-content.tsx index 90052d6f..c7a13bab 100644 --- a/packages/app/src/components/quotes/quotes-content.tsx +++ b/packages/app/src/components/quotes/quotes-content.tsx @@ -5,9 +5,18 @@ import { Card } from '@/components/ui/card'; import { ExternalLinkIcon } from '@/components/ui/external-link-icon'; import { CompanyLogo, highlightBrand } from './quote-utils'; -import { QUOTES } from './quotes-data'; +import { QUOTES, QUOTE_GROUPS, type Quote, type QuoteGroup } from './quotes-data'; -/** Deduplicated logos from all quote orgs. */ +/** Render `...` spans inside an otherwise plain string as real . */ +function renderEm(text: string) { + if (!text.includes('')) return text; + return text.split(/(.*?<\/em>)/gu).map((part, i) => { + const m = part.match(/^(.*?)<\/em>$/u); + return m ? {m[1]} : part; + }); +} + +/** Deduplicated logos from all quote orgs, ordered by first appearance. */ const orgLogos: { org: string; logo: string }[] = []; const seenOrgs = new Set(); for (const q of QUOTES) { @@ -17,24 +26,10 @@ for (const q of QUOTES) { } } -function QuoteCard({ - text, - name, - title, - org, - logo, - link, -}: { - text: string; - name: string; - title: string; - org: string; - logo?: string; - link?: string; -}) { - const content = ( +function QuoteCard({ text, name, title, org, logo, link }: Quote) { + return (
-

+

“{highlightBrand(text)}”

@@ -54,28 +49,32 @@ function QuoteCard({ ) : ( {name} )} - {title} + {renderEm(title)}
); - - return content; } export function QuotesContent() { + // Group quotes by their group field, preserving in-array order within each group. + const grouped = new Map(); + for (const g of QUOTE_GROUPS) grouped.set(g, []); + for (const q of QUOTES) grouped.get(q.group)?.push(q); + return (
+ {/* Intro card */}

- InferenceX™ Initiative Supporters + InferenceX Initiative Supporters

- InferenceX™ initiative is supported by many major buyers of compute and - prominent members of the ML community including those from OpenAI, Microsoft, vLLM, - PyTorch Foundation, Oracle and more. + InferenceX is supported by many major buyers of compute and prominent members of the + ML community including those from OpenAI, Microsoft, vLLM, PyTorch Foundation, Oracle + and more.

{orgLogos.map(({ org, logo }) => ( @@ -90,23 +89,26 @@ export function QuotesContent() {
))}
-
-
- {QUOTES.map((quote) => ( - - ))} -
-
+ + {/* Grouped quote cards */} + {QUOTE_GROUPS.map((group) => { + const quotes = grouped.get(group) ?? []; + if (quotes.length === 0) return null; + return ( +
+ +

{group}

+
+ {quotes.map((q) => ( + + ))} +
+
+
+ ); + })}
); diff --git a/packages/app/src/components/quotes/quotes-data.ts b/packages/app/src/components/quotes/quotes-data.ts index 757b2fd9..139e1dfc 100644 --- a/packages/app/src/components/quotes/quotes-data.ts +++ b/packages/app/src/components/quotes/quotes-data.ts @@ -1,333 +1,396 @@ +export type QuoteGroup = + | 'AI Industry Leaders' + | 'AI Companies and Startups' + | 'Infrastructure Providers' + | 'Inference Engines and Frameworks' + | 'Researchers and Academia' + | 'Community and Content Creators'; + +/** Order in which groups render on /quotes. */ +export const QUOTE_GROUPS: QuoteGroup[] = [ + 'AI Industry Leaders', + 'AI Companies and Startups', + 'Infrastructure Providers', + 'Inference Engines and Frameworks', + 'Researchers and Academia', + 'Community and Content Creators', +]; + export interface Quote { text: string; name: string; + group: QuoteGroup; title: string; org: string; logo?: string; link?: string; + /** True when the logo asset is a photograph (or any image whose colors are + * meaningful), so dark-mode rendering should skip the default `invert`. */ + logoIsPhoto?: boolean; } export const QUOTES: Quote[] = [ { - text: "As we build systems at unprecedented scale, it's critical for the ML community to have open, transparent benchmarks that reflect how inference really performs across hardware and software. InferenceMAX\u2122's head-to-head benchmarks cut through the noise and provide a living picture of token throughput, performance per dollar, and tokens per Megawatt. This kind of open source effort strengthens the entire ecosystem and helps everyone, from researchers to operators of frontier datacenters, make smarter decisions.", + text: "Inference demand is growing exponentially, driven by long-context reasoning. NVIDIA Grace Blackwell NVL72 was invented for this new era of thinking AI. NVIDIA is meeting that demand through constant hardware and software innovation to enable what's next in AI. By benchmarking frequently, InferenceX gives the industry a transparent view of LLM inference performance on real-world workloads. The results are clear: Grace Blackwell NVL72 with TRT-LLM and Dynamo delivers unmatched performance per dollar and per megawatt\u2014powering the most productive and cost-effective AI factories in the world.", + name: 'Jensen Huang', + group: 'AI Industry Leaders', + title: 'Founder & CEO, NVIDIA', + org: 'NVIDIA', + logo: 'nvidia.svg', + link: 'https://www.linkedin.com/in/jenhsunhuang/', + }, + { + text: "As we build systems at unprecedented scale, it's critical for the ML community to have open, transparent benchmarks that reflect how inference really performs across hardware and software. InferenceX's head-to-head benchmarks cut through the noise and provide a living picture of token throughput, performance per dollar, and tokens per Megawatt. This kind of open source effort strengthens the entire ecosystem and helps everyone, from researchers to operators of frontier datacenters, make smarter decisions.", name: 'Peter Hoeschele', + group: 'AI Industry Leaders', title: 'VP of Infrastructure and Industrial Compute, OpenAI Stargate', org: 'OpenAI', logo: 'openai.svg', link: 'https://www.linkedin.com/in/peter-hoeschele/', }, { - text: 'Open collaboration is driving the next era of AI innovation. The open-source InferenceMAX benchmark gives the community transparent, nightly results that inspire trust and accelerate progress. It highlights the competitive TCO performance of our AMD Instinct MI300, MI325X, and MI355X GPUs across diverse workloads, underscoring the strength of our platform and our commitment to giving developers real-time visibility into our software progress.', + text: 'Open collaboration is driving the next era of AI innovation. The open-source InferenceX benchmark gives the community transparent, nightly results that inspire trust and accelerate progress. It highlights the competitive TCO performance of our AMD Instinct MI300, MI325X, and MI355X GPUs across diverse workloads, underscoring the strength of our platform and our commitment to giving developers real-time visibility into our software progress.', name: 'Dr. Lisa Su', + group: 'AI Industry Leaders', title: 'Chair and CEO, AMD', org: 'AMD', logo: 'amd.svg', link: 'https://www.linkedin.com/in/lisasu-amd/', }, { - text: "Inference demand is growing exponentially, driven by long-context reasoning. NVIDIA Grace Blackwell NVL72 was invented for this new era of thinking AI. NVIDIA is meeting that demand through constant hardware and software innovation to enable what's next in AI. By benchmarking frequently, InferenceMAX\u2122 gives the industry a transparent view of LLM inference performance on real-world workloads. The results are clear: Grace Blackwell NVL72 with TRT-LLM and Dynamo delivers unmatched performance per dollar and per megawatt\u2014powering the most productive and cost-effective AI factories in the world.", - name: 'Jensen Huang', - title: 'Founder & CEO, NVIDIA', - org: 'NVIDIA', - logo: 'nvidia.svg', - link: 'https://www.linkedin.com/in/jenhsunhuang/', - }, - { - text: "Speed is the moat. InferenceMAX\u2122's nightly benchmarks match the speed of improvement of the AMD software stack. It's fantastic to see AMD's MI300, MI325, and MI355 GPUs performing so well across diverse workloads and interactivity levels.", + text: "Speed is the moat. InferenceX's nightly benchmarks match the speed of improvement of the AMD software stack. It's fantastic to see AMD's MI300, MI325, and MI355 GPUs performing so well across diverse workloads and interactivity levels.", name: 'Anush Elangovan', + group: 'AI Industry Leaders', title: 'VP GPU Software, AMD', org: 'AMD', logo: 'amd.svg', link: 'https://www.linkedin.com/in/anushelangovan/', }, { - text: 'InferenceMAX\u2122 highlights workloads that the ML community cares about. At NVIDIA, we welcome these comparisons because they underscore the advantage of our full-stack approach\u2014from GPUs hardware to NVLink networking to NVL72 Rack Scale to Dynamo disaggregated serving that consistently delivers industry-leading inference performance and ROI at scale.', + text: 'InferenceX highlights workloads that the ML community cares about. At NVIDIA, we welcome these comparisons because they underscore the advantage of our full-stack approach\u2014from GPUs hardware to NVLink networking to NVL72 Rack Scale to Dynamo disaggregated serving that consistently delivers industry-leading inference performance and ROI at scale.', name: 'Ian Buck', + group: 'AI Industry Leaders', title: 'VP & GM, Hyperscale, NVIDIA & Inventor of CUDA', org: 'NVIDIA', logo: 'nvidia.svg', link: 'https://www.linkedin.com/in/ian-buck-19201315/', }, { - text: "InferenceMAX\u2122's nightly results highlight the rapid pace of progress in the AMD software stack. It's exciting to witness the birth of an open project that provides a tied feedback loop between what the software team works on here at AMD and how it affects specific ML use cases across our MI300, MI325, and MI355 GPUs. I'm looking forward to see what's next for InferenceMAX and to showcase what the AMD platform can do. AMD GPUs will continue to get faster every week.", + text: "InferenceX's nightly results highlight the rapid pace of progress in the AMD software stack. It's exciting to witness the birth of an open project that provides a tied feedback loop between what the software team works on here at AMD and how it affects specific ML use cases across our MI300, MI325, and MI355 GPUs. I'm looking forward to see what's next for InferenceX and to showcase what the AMD platform can do. AMD GPUs will continue to get faster every week.", name: 'Quentin Colombet', + group: 'AI Industry Leaders', title: 'Senior Director, AMD, Ex-Brium CEO', org: 'AMD', logo: 'amd.svg', link: 'https://www.linkedin.com/in/quentincolombet/', }, { - text: "Our mission at Azure is to give customers the most performant, efficient, and cost-effective cloud for AI. SemiAnalysis InferenceMAX\u2122 supports that mission by providing transparent, reproducible benchmarks that track inference performance across GPUs and software stacks under realistic workloads. This continuous data on throughput, efficiency, and cost per watt strengthens our ability to tune Azure's inference platform for scale, helping customers build with confidence on Microsoft Cloud.", + text: "Our mission at Azure is to give customers the most performant, efficient, and cost-effective cloud for AI. SemiAnalysis InferenceX supports that mission by providing transparent, reproducible benchmarks that track inference performance across GPUs and software stacks under realistic workloads. This continuous data on throughput, efficiency, and cost per watt strengthens our ability to tune Azure's inference platform for scale, helping customers build with confidence on Microsoft Cloud.", name: 'Scott Guthrie', + group: 'AI Industry Leaders', title: 'Executive Vice President, Microsoft Cloud & AI', org: 'Microsoft', logo: 'microsoft.svg', link: 'https://www.linkedin.com/in/guthriescott/', }, { - text: 'At Microsoft, delivering the best inference performance and economics for our customers at scale requires a deep understanding of how AI models interact with real-world hardware and software. Open-source, reproducible benchmarks, like InferenceMAX\u2122, are essential for generating transparent insights into throughput, efficiency, and cost under realistic workloads. These continuous signals help guide our platform strategy, enabling us to optimize the entire stack from silicon, to systems, to software, so that every layer works together to unlock the full potential of our infrastructure.', + text: 'At Microsoft, delivering the best inference performance and economics for our customers at scale requires a deep understanding of how AI models interact with real-world hardware and software. Open-source, reproducible benchmarks, like InferenceX, are essential for generating transparent insights into throughput, efficiency, and cost under realistic workloads. These continuous signals help guide our platform strategy, enabling us to optimize the entire stack from silicon, to systems, to software, so that every layer works together to unlock the full potential of our infrastructure.', name: 'Saurabh Dighe', + group: 'AI Industry Leaders', title: 'Corporate Vice President, Azure Strategic Planning & Architecture', org: 'Microsoft', logo: 'microsoft.svg', link: 'https://www.linkedin.com/in/saurabhdighe/', }, { - text: 'The gap between theoretical peak and real-world inference throughput is often determined by systems software: inference engine, distributed strategies, and low-level kernels. InferenceMAX\u2122 is valuable because it benchmarks the latest software showing how optimizations like FP4, MTP, speculative decode, and wide-EP actually play out across various hardware. Open, reproducible results like these help the whole community move faster.', + text: 'Oracle Cloud Infrastructure is built to give frontier labs & enterprises flexibility and choice, with many GPU SKUs available for AI at scale. InferenceX strengthens that mission by delivering open source, reproducible benchmarks that reflect real-world performance, efficiency, and cost on the latest hardware and software. With this transparency, customers can confidently select the platforms that best align with their AI strategies.', + name: 'Jay Jackson', + group: 'AI Industry Leaders', + title: 'Vice President, Oracle Cloud Infrastructure', + org: 'Oracle', + logo: 'oracle.svg', + link: 'https://www.linkedin.com/in/jayejackson/', + }, + { + text: 'Supermicro is excited about the launch of InferenceX, the SemiAnalysis benchmarking system that measures real-world throughput, performance per dollar, and energy efficiency. This open-source tool provides reproducible benchmarks running on the latest hardware and software enabling AI labs and enterprises to choose the best platforms at scale.', + name: 'Charles Liang', + group: 'AI Industry Leaders', + title: 'Founder & CEO, Supermicro', + org: 'Supermicro', + logo: 'supermicro.svg', + link: 'https://en.wikipedia.org/wiki/Charles_Liang', + }, + { + text: 'PyTorch was built on the belief that open tools accelerate the entire AI ecosystem. InferenceX embodies that same philosophy\u2014open, reproducible, and vendor-neutral benchmarks that give the community real data on real hardware. As inference workloads scale to serve billions of users, having a continuously updated, transparent performance baseline across accelerators is essential for practitioners and platform teams making critical infrastructure decisions.', + name: 'Joseph Spisak', + group: 'AI Industry Leaders', + title: 'Product Director, Meta Super Intelligence Lab', + org: 'Meta Superintelligence Labs', + logo: 'meta.svg', + link: 'https://www.linkedin.com/in/jspisak', + }, + { + text: 'InferenceX demonstrates how an open ecosystem can operate in practice. Many leading inference stacks such as vLLM, SGLang, and TensorRT-LLM are built on PyTorch, and benchmarks like this show how innovations across kernels, runtimes, and frameworks translate into measurable performance on a range of hardware platforms, including NVIDIA and AMD GPUs. By being open source and running nightly, InferenceX offers a transparent, community-driven approach to tracking progress and providing PyTorch users with data-driven insights.', + name: 'Matt White', + group: 'AI Industry Leaders', + title: 'Executive Director, PyTorch Foundation', + org: 'PyTorch Foundation', + logo: 'pytorch.svg', + link: 'https://www.linkedin.com/in/mdwdata/', + }, + { + text: 'Hugging Face exists to make AI open and accessible to everyone. InferenceX extends that mission to ai chip performance, pulling models directly from the Hub and benchmarking them across every major accelerator, continuously and transparently. When the community can see exactly how frontier open models perform on real hardware in real time, it raises the bar for the entire ecosystem.', + name: 'Clement Delangue', + group: 'AI Industry Leaders', + title: 'CEO, Hugging Face', + org: 'Hugging Face', + logo: 'huggingface.svg', + link: 'https://www.linkedin.com/in/cdelangue/', + }, + { + text: 'The gap between theoretical peak and real-world inference throughput is often determined by systems software: inference engine, distributed strategies, and low-level kernels. InferenceX is valuable because it benchmarks the latest software showing how optimizations like FP4, MTP, speculative decode, and wide-EP actually play out across various hardware. Open, reproducible results like these help the whole community move faster.', name: 'Tri Dao', + group: 'AI Companies and Startups', title: 'Chief Scientist of Together AI & Inventor of Flash Attention', org: 'Together AI', logo: 'together-ai.svg', link: 'https://tridao.me/', }, { - text: "The industry needs many public, reproducible benchmarks of inference performance. We're excited to collaborate with InferenceMAX\u2122 from the vLLM team. More diverse workloads and scenarios that everyone can trust and reference will help the ecosystem move forward. Fair, transparent measurements drive progress across every layer of the stack, from model architectures to inference engines to hardware.", - name: 'Simon Mo', - title: 'vLLM Project Co-Lead', - org: 'vLLM', - logo: 'vllm.svg', - link: 'https://www.linkedin.com/in/simon-mo-834217162/', - }, - { - text: 'The benchmark is good sir', - name: 'Michael Goin', - title: 'vLLM Core Maintainer & Senior Principal Engineer at Red Hat', - org: 'Red Hat', - logo: 'redhat.svg', - link: 'https://www.linkedin.com/in/michael-goin/', + text: "At Prime Intellect, we're pushing the frontier of AI post-training and open research. InferenceX complements that work by providing open, reproducible benchmarks that track real-world inference performance across hardware and software stacks as they evolve. For researchers like us, having transparent, continuously updated data on throughput and efficiency means we can focus on building better models instead of second-guessing infrastructure. This is the kind of community-driven effort that accelerates progress for everyone.", + name: 'Jack Min Ong', + group: 'AI Companies and Startups', + title: 'Researcher, Prime Intellect', + org: 'Prime Intellect', + logo: 'prime-intellect.svg', + link: 'https://www.linkedin.com/in/jackminong/', }, { - text: 'InferenceMAX\u2122 benchmark is pogchamp & W in chat', - name: 'Kaichao You', - title: 'vLLM Project Co-Lead & PhD Student @ Tsinghua University', - org: 'vLLM', - logo: 'vllm.svg', - link: 'https://www.linkedin.com/in/youkaichao/', + text: 'We use InferenceX benchmarks ourselves as one of the key datapoints to help us make infrastructure decisions at Adaptive ML. Inference performance is critical for large-scale RL workloads, where fast generation directly impacts time to market & revenue for our customers. InferenceX benchmarks the full stack continuously \u2014 engine, model, software, and hardware across rack-scale systems like GB300 NVL72. This is the kind of open, transparent, reproducible signal the ecosystem has been missing.', + name: 'Julien Launay', + group: 'AI Companies and Startups', + title: 'Co-Founder & CEO, Adaptive ML', + org: 'Adaptive ML', + logo: 'adaptive-ml.svg', + link: 'https://www.linkedin.com/in/julienlaunay/', }, { - text: 'Arguably the most important OSS benchmark suite out today InferenceX', - name: 'Mark Saroufim', - title: 'GPU Mode Founder & Meta PyTorch Engineer', - org: 'GPU Mode', - logo: 'gpu-mode.png', - link: 'https://x.com/marksaroufim', + text: "Our customers ship AI to production using frontier open-source models \u2014 and at scale, every token per second and every dollar per million tokens matters. InferenceX gives the ecosystem something we've always needed: an objective, open benchmark that tracks real inference performance continuously across hardware such as GB300 NVL72, GB200 NVL72, H100 & soon Rubin & TPU & Trainium. Very helpful in allowing the wider community to understand the landscape and creating a clear taxonomy around performance.", + name: 'Alex Ker', + group: 'AI Companies and Startups', + title: 'Engineer, Baseten', + org: 'Baseten', + logo: 'baseten.svg', + link: 'https://www.linkedin.com/in/alex-ker/', }, { - text: 'InferenceMAX\u2122 demonstrates how an open ecosystem can operate in practice. Many leading inference stacks such as vLLM, SGLang, and TensorRT-LLM are built on PyTorch, and benchmarks like this show how innovations across kernels, runtimes, and frameworks translate into measurable performance on a range of hardware platforms, including NVIDIA and AMD GPUs. By being open source and running nightly, InferenceMAX\u2122 offers a transparent, community-driven approach to tracking progress and providing PyTorch users with data-driven insights.', - name: 'Matt White', - title: 'Executive Director, PyTorch Foundation', - org: 'PyTorch Foundation', - logo: 'pytorch.svg', - link: 'https://www.linkedin.com/in/mdwdata/', + text: "At Periodic Labs, we're building AI scientists that turn compute into real-world scientific discoveries. That means we care deeply about what each GPU actually delivers. InferenceX provides open, reproducible benchmarks that cut through spec sheets and show real-world throughput, efficiency, and cost across the latest hardware and software stacks. Having done inference across thousands of GPUs, I can say this kind of transparent, continuously updated data is exactly what practitioners need to make smart infrastructure decisions.", + name: 'Xander Dunn', + group: 'AI Companies and Startups', + title: 'Founding Team, Periodic Labs', + org: 'Periodic Labs', + logo: 'periodic-labs.png', + link: 'https://www.linkedin.com/in/xanderdunn/', }, { - text: 'Oracle Cloud Infrastructure is built to give frontier labs & enterprises flexibility and choice, with many GPU SKUs available for AI at scale. InferenceMAX strengthens that mission by delivering open source, reproducible benchmarks that reflect real-world performance, efficiency, and cost on the latest hardware and software. With this transparency, customers can confidently select the platforms that best align with their AI strategies.', - name: 'Jay Jackson', - title: 'Vice President, Oracle Cloud Infrastructure', - org: 'Oracle', - logo: 'oracle.svg', - link: 'https://www.linkedin.com/in/jayejackson/', + text: 'InferenceX has been useful for us even if Dylan Patel is a nice little guy with feelings', + name: 'Matthew Leavitt', + group: 'AI Companies and Startups', + title: 'Chief Science Officer, DatologyAI', + org: 'DatologyAI', + logo: 'datologyai.svg', + link: 'https://www.linkedin.com/in/matthew-leavitt-6797703b/', }, { - text: 'InferenceMAX\u2122 raises the bar by delivering open, transparent benchmarks that track how inference really performs across the latest GPUs and software stacks. For customers, having reproducible data that measures real world tokens per dollar & tokens per watt, turns abstract marketing numbers into actionable insight. At CoreWeave, we support this effort because it brings clarity to a fast-moving space and helps the entire ecosystem build with confidence.', + text: 'InferenceX raises the bar by delivering open, transparent benchmarks that track how inference really performs across the latest GPUs and software stacks. For customers, having reproducible data that measures real world tokens per dollar & tokens per watt, turns abstract marketing numbers into actionable insight. At CoreWeave, we support this effort because it brings clarity to a fast-moving space and helps the entire ecosystem build with confidence.', name: 'Peter Salanki', + group: 'Infrastructure Providers', title: 'CTO, CoreWeave', org: 'CoreWeave', logo: 'coreweave.svg', link: 'https://www.linkedin.com/in/salanki/', }, { - text: "InferenceMAX\u2122 sets a new standard by providing open, transparent benchmarks that reveal how inference performs across today's leading GPUs and software stacks. With reproducible data measuring real-world tokens per dollar and tokens per watt, customers can move beyond marketing claims to actionable insights. For us at Nebius, as a full-stack AI cloud provider, this initiative helps us build our inference platform with confidence and ensure we are aligned with the ecosystem.", + text: "InferenceX sets a new standard by providing open, transparent benchmarks that reveal how inference performs across today's leading GPUs and software stacks. With reproducible data measuring real-world tokens per dollar and tokens per watt, customers can move beyond marketing claims to actionable insights. For us at Nebius, as a full-stack AI cloud provider, this initiative helps us build our inference platform with confidence and ensure we are aligned with the ecosystem.", name: 'Roman Chernin', + group: 'Infrastructure Providers', title: 'Co-Founder & Chief Business Officer, Nebius', org: 'Nebius', logo: 'nebius.svg', link: 'https://www.linkedin.com/in/roman-chernin-1b4b8758/', }, { - text: "At Crusoe, we believe being a great partner means empowering our customers with choice and clarity. That's why we're proud to support InferenceMAX\u2122, which provides the entire AI community with open-source, reproducible benchmarks for the latest hardware. By delivering transparent, real-world data on throughput, efficiency, and cost, InferenceMAX\u2122 cuts through the hype and helps our customers confidently select the very best platform for their unique workloads.", + text: "At Crusoe, we believe being a great partner means empowering our customers with choice and clarity. That's why we're proud to support InferenceX, which provides the entire AI community with open-source, reproducible benchmarks for the latest hardware. By delivering transparent, real-world data on throughput, efficiency, and cost, InferenceX cuts through the hype and helps our customers confidently select the very best platform for their unique workloads.", name: 'Chase Lochmiller', + group: 'Infrastructure Providers', title: 'Co-Founder & CEO, Crusoe', org: 'Crusoe', logo: 'crusoe.svg', link: 'https://www.linkedin.com/in/chase-lochmiller-604483341/', }, { - text: 'Supermicro is excited about the launch of InferenceMAX\u2122, the SemiAnalysis benchmarking system that measures real-world throughput, performance per dollar, and energy efficiency. This open-source tool provides reproducible benchmarks running on the latest hardware and software enabling AI labs and enterprises to choose the best platforms at scale.', - name: 'Charles Liang', - title: 'Founder & CEO, Supermicro', - org: 'Supermicro', - logo: 'supermicro.svg', - link: 'https://en.wikipedia.org/wiki/Charles_Liang', - }, - { - text: "At TensorWave, we're building a next-generation cloud on AMD GPUs because we believe innovation thrives when customers have strong alternatives. InferenceMAX\u2122 reinforces that vision by providing open source, reproducible benchmarks that track throughput, efficiency, and cost across the latest hardware and software. By cutting through synthetic numbers and highlighting real-world inference performance, it helps customers see the full potential of AMD platforms for AI at scale.", + text: "At TensorWave, we're building a next-generation cloud on AMD GPUs because we believe innovation thrives when customers have strong alternatives. InferenceX reinforces that vision by providing open source, reproducible benchmarks that track throughput, efficiency, and cost across the latest hardware and software. By cutting through synthetic numbers and highlighting real-world inference performance, it helps customers see the full potential of AMD platforms for AI at scale.", name: 'Darrick Horton', + group: 'Infrastructure Providers', title: 'CEO, TensorWave', org: 'TensorWave', logo: 'tensorwave.svg', link: 'https://www.linkedin.com/in/darrick-horton/', }, { - text: 'Vultr is committed to providing an open ecosystem that gives developers freedom in how they build and scale AI \u2014 whether on NVIDIA or AMD GPUs. With InferenceMAX\u2122, customers gain open, reproducible benchmarks that deliver clear insights into throughput, efficiency, and cost across cutting-edge hardware and software. By showcasing real-world performance, we empower teams to confidently choose the right platform for their AI workloads.', + text: 'Vultr is committed to providing an open ecosystem that gives developers freedom in how they build and scale AI \u2014 whether on NVIDIA or AMD GPUs. With InferenceX, customers gain open, reproducible benchmarks that deliver clear insights into throughput, efficiency, and cost across cutting-edge hardware and software. By showcasing real-world performance, we empower teams to confidently choose the right platform for their AI workloads.', name: 'Nathan Goulding', + group: 'Infrastructure Providers', title: 'SVP of Engineering, Vultr', org: 'Vultr', logo: 'vultr.svg', link: 'https://www.linkedin.com/in/nathangoulding/', }, { - text: "At Prime Intellect, we're pushing the frontier of AI post-training and open research. InferenceX\u2122 complements that work by providing open, reproducible benchmarks that track real-world inference performance across hardware and software stacks as they evolve. For researchers like us, having transparent, continuously updated data on throughput and efficiency means we can focus on building better models instead of second-guessing infrastructure. This is the kind of community-driven effort that accelerates progress for everyone.", - name: 'Jack Min Ong', - title: 'Researcher, Prime Intellect', - org: 'Prime Intellect', - logo: 'prime-intellect.svg', - link: 'https://www.linkedin.com/in/jackminong/', - }, - { - text: "At Firmus, we're building the most energy-efficient AI Factories in the world \u2014 and efficiency only matters if you can measure it. InferenceX\u2122 gives the industry open, reproducible benchmarks that track real-world throughput, cost, and performance per watt across the latest GPU platforms and software stacks. As we scale gigawatts of renewable-powered AI infrastructure across Asia-Pacific & Australia, this kind of transparent, continuously updated data helps the entire ecosystem understand what these systems actually deliver.", + text: "At Firmus, we're building the most energy-efficient AI Factories in the world \u2014 and efficiency only matters if you can measure it. InferenceX gives the industry open, reproducible benchmarks that track real-world throughput, cost, and performance per watt across the latest GPU platforms and software stacks. As we scale gigawatts of renewable-powered AI infrastructure across Asia-Pacific & Australia, this kind of transparent, continuously updated data helps the entire ecosystem understand what these systems actually deliver.", name: 'Tim Rosenfield', + group: 'Infrastructure Providers', title: 'Co-Founder & Co-CEO, Firmus', org: 'Firmus', logo: 'firmus.svg', link: 'https://www.linkedin.com/in/tim-rosenfield-a735a4112', }, { - text: 'InferenceMAX has been useful for us even if Dylan Patel is a nice little guy with feelings', - name: 'Matthew Leavitt', - title: 'Chief Science Officer, DatologyAI', - org: 'DatologyAI', - logo: 'datologyai.svg', - link: 'https://www.linkedin.com/in/matthew-leavitt-6797703b/', - }, - { - text: "InferenceX\u2122 provides the open source measurements the community needs \u2014 nightly results across real workloads, real hardware, and real software stacks. As someone who has written extensively about the gap between theoretical and actual system performance, I'm glad to see a project that makes that gap visible and trackable for everyone.", - name: 'Stas Bekman', - title: 'Developer & Author of Machine Learning Engineering Open Book (17.5K+ ⭐)', - org: 'Stas Bekman', - link: 'https://github.com/stas00/ml-engineering', - }, - { - text: "SGLang is the inference engine behind many production inference factories such as xAI's Grok, earning its recognition as THE Inference King. At scale, we see firsthand how much performance varies across hardware, models, and configurations. InferenceX\u2122 benchmarks SGLang across every major GPU platform nightly, capturing that variance in a way no other benchmark does, continuously, & reproducibly.", - name: 'Mingyi Lu', - title: 'SGLang Product Lead', - org: 'SGLang', - logo: 'sglang.webp', - link: 'https://www.linkedin.com/in/mingyi-lu/', - }, - { - text: 'We use InferenceX benchmarks ourselves as one of the key datapoints to help us make infrastructure decisions at Adaptive ML. Inference performance is critical for large-scale RL workloads, where fast generation directly impacts time to market & revenue for our customers. InferenceX\u2122 benchmarks the full stack continuously \u2014 engine, model, software, and hardware across rack-scale systems like GB300 NVL72. This is the kind of open, transparent, reproducible signal the ecosystem has been missing.', - name: 'Julien Launay', - title: 'Co-Founder & CEO, Adaptive ML', - org: 'Adaptive ML', - logo: 'adaptive-ml.svg', - link: 'https://www.linkedin.com/in/julienlaunay/', - }, - { - text: "Our customers ship AI to production using frontier open-source models \u2014 and at scale, every token per second and every dollar per million tokens matters. InferenceX\u2122 gives the ecosystem something we've always needed: an objective, open benchmark that tracks real inference performance continuously across hardware such as GB300 NVL72, GB200 NVL72, H100 & soon Rubin & TPU & Trainium. Very helpful in allowing the wider community to understand the landscape and creating a clear taxonomy around performance.", - name: 'Alex Ker', - title: 'Engineer, Baseten', - org: 'Baseten', - logo: 'baseten.svg', - link: 'https://www.linkedin.com/in/alex-ker/', - }, - { - text: 'We founded Verda to give AI engineers frictionless access to cutting-edge compute without gatekeeping. InferenceX supports this mission by giving AI builders open, reproducible benchmarks that show what GPUs actually deliver under real inference workloads. We want our customers to see transparent, continuously updated performance data, without marketing fluff. InferenceX provides exactly that.', - name: 'Ruben Bryon', - title: 'Founder & CEO, Verda', - org: 'Verda', - logo: 'verda.svg', - link: 'https://www.linkedin.com/in/ruben-bryon/', - }, - { - text: "InferenceX\u2122 ensembles precisely that \u2014 open, reproducible benchmarks that are continuously updated as xPU accelerators (GPUs/TPUs/LPUs), memory, storage, and software stacks evolve. I'm excited to see the InferenceX benchmarking roadmap include agentic coding workloads that stress CPU KV Cache offloading & soon NVMe KV Cache offloading from xPUs. As WEKA helps scale the Memory Wall by building the KV Cache infrastructure that feeds these xPUs, having this level of visibility into inference performance helps the entire ecosystem make smarter decisions about where to invest.", + text: "InferenceX ensembles precisely that \u2014 open, reproducible benchmarks that are continuously updated as xPU accelerators (GPUs/TPUs/LPUs), memory, storage, and software stacks evolve. I'm excited to see the InferenceX benchmarking roadmap include agentic coding workloads that stress CPU KV Cache offloading & soon NVMe KV Cache offloading from xPUs. As WEKA helps scale the Memory Wall by building the KV Cache infrastructure that feeds these xPUs, having this level of visibility into inference performance helps the entire ecosystem make smarter decisions about where to invest.", name: 'Val Bercovici', + group: 'Infrastructure Providers', title: 'Chief AI Officer, WEKA', org: 'WEKA', logo: 'weka.svg', link: 'https://www.linkedin.com/in/valentinbercovici/', }, { - text: 'Voltage Park is built to give AI teams fast, affordable access to GPU compute at scale. InferenceX\u2122 supports that goal by providing open, reproducible benchmarks that show how inference actually performs across the latest hardware and software stacks. With transparent, continuously updated data on throughput, efficiency, and cost, teams can make confident compute decisions instead of guessing. We\u2019re happy to back an effort that brings this level of clarity to the ecosystem.', + text: 'Voltage Park is built to give AI teams fast, affordable access to GPU compute at scale. InferenceX supports that goal by providing open, reproducible benchmarks that show how inference actually performs across the latest hardware and software stacks. With transparent, continuously updated data on throughput, efficiency, and cost, teams can make confident compute decisions instead of guessing. We\u2019re happy to back an effort that brings this level of clarity to the ecosystem.', name: 'Saurabh Giri', + group: 'Infrastructure Providers', title: 'CTO, Voltage Park', org: 'Voltage Park', logo: 'voltage-park.svg', link: 'https://www.linkedin.com/in/saurabh-giri/', }, { - text: "At Periodic Labs, we're building AI scientists that turn compute into real-world scientific discoveries. That means we care deeply about what each GPU actually delivers. InferenceX\u2122 provides open, reproducible benchmarks that cut through spec sheets and show real-world throughput, efficiency, and cost across the latest hardware and software stacks. Having done inference across thousands of GPUs, I can say this kind of transparent, continuously updated data is exactly what practitioners need to make smart infrastructure decisions.", - name: 'Xander Dunn', - title: 'Founding Team, Periodic Labs', - org: 'Periodic Labs', - logo: 'periodic-labs.png', - link: 'https://www.linkedin.com/in/xanderdunn/', + text: 'Lambda exists to make GPU compute simple and accessible for AI teams, from individual researchers to the largest labs. InferenceX aligns with that mission by giving the community open, reproducible benchmarks that measure what actually matters: real-world throughput, cost efficiency, and performance per watt across the latest hardware and software stacks. Teams can make informed compute choices grounded in transparent, continuously updated data.', + name: 'Stephen Balaban', + group: 'Infrastructure Providers', + title: 'Co-founder and CEO, Lambda', + org: 'Lambda', + logo: 'lambda.svg', + link: 'https://www.linkedin.com/in/sbalaban/', }, { - text: 'For researchers working on inference optimizations, understanding how new techniques interact across the software and hardware stack is critical yet incredibly hard to measure. InferenceX\u2122 provides much-needed insights into how inference performance evolves across major hardware platforms, moving the field forward with open, reproducible data that makes the gaps and progress visible.', - name: 'Simon Guo', - title: 'PhD Student, Stanford CS', - org: 'Stanford', - logo: 'stanford.svg', - link: 'https://simonguo.tech/', + text: 'At GMI Cloud, we believe inference has become the center of AI value creation. SemiAnalysis has done something the industry has long needed with InferenceX\u2014they\u2019ve turned inference from a black box into a continuously measured, real-world system. By benchmarking not just hardware, but the full stack\u2014models, runtimes, and distributed systems\u2014InferenceX reflects how AI actually runs in production, not how it\u2019s marketed.', + name: 'Alex Yeh', + group: 'Infrastructure Providers', + title: 'Founder & CEO, GMI Cloud', + org: 'GMI Cloud', + logo: 'gmi-cloud.svg', + link: 'https://www.linkedin.com/in/gmi-yeh', + }, + { + text: 'We founded Verda to give AI engineers frictionless access to cutting-edge compute without gatekeeping. InferenceX supports this mission by giving AI builders open, reproducible benchmarks that show what GPUs actually deliver under real inference workloads. We want our customers to see transparent, continuously updated performance data, without marketing fluff. InferenceX provides exactly that.', + name: 'Ruben Bryon', + group: 'Infrastructure Providers', + title: 'Founder & CEO, Verda', + org: 'Verda', + logo: 'verda.svg', + link: 'https://www.linkedin.com/in/ruben-bryon/', }, { text: 'As AI infrastructure scales globally, no single vendor or region can define the benchmarks that matter for everyone. InferenceX is an important step toward a shared, transparent view of inference performance and TCO, enabling more rational investments for sovereign AI Cloud operators, as well as healthier competition, and ultimately more accessible AI capacity worldwide.', name: 'Talal M. Al Kaissi', + group: 'Infrastructure Providers', title: 'CEO', org: 'Core42', logo: 'core42.webp', }, { - text: 'PyTorch was built on the belief that open tools accelerate the entire AI ecosystem. InferenceX\u2122 embodies that same philosophy\u2014open, reproducible, and vendor-neutral benchmarks that give the community real data on real hardware. As inference workloads scale to serve billions of users, having a continuously updated, transparent performance baseline across accelerators is essential for practitioners and platform teams making critical infrastructure decisions.', - name: 'Joseph Spisak', - title: 'Product Director, Meta Super Intelligence Lab', - org: 'Meta Superintelligence Labs', - logo: 'meta.svg', - link: 'https://www.linkedin.com/in/jspisak', + text: "The industry needs many public, reproducible benchmarks of inference performance. We're excited to collaborate with InferenceX from the vLLM team. More diverse workloads and scenarios that everyone can trust and reference will help the ecosystem move forward. Fair, transparent measurements drive progress across every layer of the stack, from model architectures to inference engines to hardware.", + name: 'Simon Mo', + group: 'Inference Engines and Frameworks', + title: 'vLLM Project Co-Lead', + org: 'vLLM', + logo: 'vllm.svg', + link: 'https://www.linkedin.com/in/simon-mo-834217162/', }, { - text: 'Hugging Face exists to make AI open and accessible to everyone. InferenceX\u2122 extends that mission to ai chip performance, pulling models directly from the Hub and benchmarking them across every major accelerator, continuously and transparently. When the community can see exactly how frontier open models perform on real hardware in real time, it raises the bar for the entire ecosystem.', - name: 'Clement Delangue', - title: 'CEO, Hugging Face', - org: 'Hugging Face', - logo: 'huggingface.svg', - link: 'https://www.linkedin.com/in/cdelangue/', + text: 'The benchmark is good sir', + name: 'Michael Goin', + group: 'Inference Engines and Frameworks', + title: 'vLLM Core Maintainer & Senior Principal Engineer at Red Hat', + org: 'Red Hat', + logo: 'redhat.svg', + link: 'https://www.linkedin.com/in/michael-goin/', }, { - text: 'It is important to have an open and continuously updated platform for benchmarking inference engines across real workloads and diverse hardware. InferenceX provides this kind of transparent and practical evaluation, helping the community better understand real system bottlenecks and tradeoffs. Benchmarks like this are essential for building more efficient and scalable AI systems. Moreover, as LLM agents become increasingly capable at improving systems, such a platform can provide the reliable feedback needed to close the automatic optimization loop, further driving progress in this field.', - name: 'Cao Shiyi', - title: 'Researcher, Sky Computing Lab', - org: 'UC Berkeley', - logo: 'sky-berkeley.webp', + text: 'InferenceX benchmark is pogchamp & W in chat', + name: 'Kaichao You', + group: 'Inference Engines and Frameworks', + title: 'vLLM Project Co-Lead & PhD Student @ Tsinghua University', + org: 'vLLM', + logo: 'vllm.svg', + link: 'https://www.linkedin.com/in/youkaichao/', }, { - text: 'Lambda exists to make GPU compute simple and accessible for AI teams, from individual researchers to the largest labs. InferenceX\u2122 aligns with that mission by giving the community open, reproducible benchmarks that measure what actually matters: real-world throughput, cost efficiency, and performance per watt across the latest hardware and software stacks. Teams can make informed compute choices grounded in transparent, continuously updated data.', - name: 'Stephen Balaban', - title: 'Co-founder and CEO, Lambda', - org: 'Lambda', - logo: 'lambda.svg', - link: 'https://www.linkedin.com/in/sbalaban/', + text: "SGLang is the inference engine behind many production inference factories such as xAI's Grok, earning its recognition as THE Inference King. At scale, we see firsthand how much performance varies across hardware, models, and configurations. InferenceX benchmarks SGLang across every major GPU platform nightly, capturing that variance in a way no other benchmark does, continuously, & reproducibly.", + name: 'Mingyi Lu', + group: 'Inference Engines and Frameworks', + title: 'SGLang Product Lead', + org: 'SGLang', + logo: 'sglang.webp', + link: 'https://www.linkedin.com/in/mingyi-lu/', }, { - text: 'When we introduced DistServe, the thesis was simple: split prefill and decode and optimize each on its own terms. Eighteen months later, disaggregation is the default architecture across the industry. InferenceX\u2122 is the benchmark that comparing disaggregated and aggregated serving across the whole pareto curve. InferenceX shows exactly when and where P/D separation pays off in TTFT, TPOT, throughput, and cost.', + text: 'When we introduced DistServe, the thesis was simple: split prefill and decode and optimize each on its own terms. Eighteen months later, disaggregation is the default architecture across the industry. InferenceX is the benchmark that comparing disaggregated and aggregated serving across the whole pareto curve. InferenceX shows exactly when and where P/D separation pays off in TTFT, TPOT, throughput, and cost.', name: 'Hao Zhang', + group: 'Researchers and Academia', title: 'Assistant Professor, UC San Diego & Co-Creator of DistServe, vLLM, and FastVideo', org: 'UC San Diego', logo: 'uc-san-diego.svg', link: 'https://haozhang.ai/', }, { - text: 'At GMI Cloud, we believe inference has become the center of AI value creation. SemiAnalysis has done something the industry has long needed with InferenceX—they’ve turned inference from a black box into a continuously measured, real-world system. By benchmarking not just hardware, but the full stack—models, runtimes, and distributed systems—InferenceX reflects how AI actually runs in production, not how it’s marketed.', - name: 'Alex Yeh', - title: 'Founder & CEO, GMI Cloud', - org: 'GMI Cloud', - logo: 'gmi-cloud.svg', - link: 'https://www.linkedin.com/in/gmi-yeh', + text: 'For researchers working on inference optimizations, understanding how new techniques interact across the software and hardware stack is critical yet incredibly hard to measure. InferenceX provides much-needed insights into how inference performance evolves across major hardware platforms, moving the field forward with open, reproducible data that makes the gaps and progress visible.', + name: 'Simon Guo', + group: 'Researchers and Academia', + title: 'PhD Student, Stanford CS', + org: 'Stanford', + logo: 'stanford.svg', + link: 'https://simonguo.tech/', + }, + { + text: 'It is important to have an open and continuously updated platform for benchmarking inference engines across real workloads and diverse hardware. InferenceX provides this kind of transparent and practical evaluation, helping the community better understand real system bottlenecks and tradeoffs. Benchmarks like this are essential for building more efficient and scalable AI systems. Moreover, as LLM agents become increasingly capable at improving systems, such a platform can provide the reliable feedback needed to close the automatic optimization loop, further driving progress in this field.', + name: 'Cao Shiyi', + group: 'Researchers and Academia', + title: 'Researcher, Sky Computing Lab', + org: 'UC Berkeley', + logo: 'sky-berkeley.webp', + }, + { + text: "InferenceX provides the open source measurements the community needs \u2014 nightly results across real workloads, real hardware, and real software stacks. As someone who has written extensively about the gap between theoretical and actual system performance, I'm glad to see a project that makes that gap visible and trackable for everyone.", + name: 'Stas Bekman', + group: 'Researchers and Academia', + title: 'Developer & Author of Machine Learning Engineering Open Book (17.5K+ \u2B50)', + org: 'Stas Bekman', + link: 'https://github.com/stas00/ml-engineering', + }, + { + text: 'Arguably the most important OSS benchmark suite out today InferenceX', + name: 'Mark Saroufim', + group: 'Community and Content Creators', + title: 'GPU Mode Founder & Meta PyTorch Engineer', + org: 'GPU Mode', + logo: 'gpu-mode.png', + link: 'https://x.com/marksaroufim', }, { text: 'Now commonly hearing "We want to the Semianalysis for X". Testament to what @dylan522p has built.', name: 'Sriram Krishnan', + group: 'Community and Content Creators', title: 'White House Senior AI Advisor', org: 'White House', logo: 'white-house.svg',