{"tool":{"slug":"inference-cost-calculator","layer":"chips","layer_name":"Chips & Compute","name":"Inference Cost Calculator","question":"What does it cost to self-host a 70B model at 100k QPS?","short_description":"Per-million-tokens cost for self-hosted inference across H100 / H200 / B200 / MI300.","status":"coming_soon","inputs":["Model size + variant","Throughput target (QPS)","Hardware mix"],"outputs":["$ / 1M tokens","GPU hours / day","Recommended cluster shape"],"topic_slug":null},"related_tools":[{"slug":"h100-vs-h200-vs-b200-tco","layer":"chips","name":"H100 vs H200 vs B200 TCO","question":"Is upgrading from H100 to B200 worth the cost?","url":"/tools/chips/h100-vs-h200-vs-b200-tco"},{"slug":"memory-bandwidth-bottleneck-detector","layer":"chips","name":"Memory Bandwidth Bottleneck Detector","question":"Is my 70B inference bandwidth-bound on H100?","url":"/tools/chips/memory-bandwidth-bottleneck-detector"}],"links":{"page":"/tools/chips/inference-cost-calculator","layer_index":"/tools/chips","topic":null},"citation_url":"/tools/chips/inference-cost-calculator","generated_at":"2026-05-11T22:08:14.439Z"}