14:["$","div",null,{"children":[["$","$L32",null,{}],["$","$L33",null,{"resourceGroup":"playground","resource":"playground","action":"*","loginRequired":false,"children":["$","$L34",null,{"modelList":[{"type":"Chat","id":"deepseek/deepseek-v3-0324","name":"deepseek-v3-0324","displayName":"DeepSeek V3 0324","description":"DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.","context_size":163840,"input_token_price_per_m":2800,"output_token_price_per_m":11400,"input_token_price_per_m_toString":"0.28","output_token_price_per_m_toString":"1.14","features":["function-calling"],"link":"/models/llm/deepseek-deepseek-v3-0324","infos":["$$0.28/1.14 in/out MTokens","163840 Context"],"status":1,"input_pricing":{"originPricePerM":2800,"pricePerM":2800},"output_pricing":{"originPricePerM":11400,"pricePerM":11400},"max_output_tokens":163840,"rpm":10,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"HOT"}],"tags":[],"series":"DeepSeek","quota_items":[{"tier":"T1","rpm":10,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":true,"discount":0},{"type":"Chat","id":"moonshotai/kimi-k2-instruct","name":"kimi-k2-instruct","displayName":"Kimi K2 Instruct","description":"Kimi K2 is a state-of-the-art mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters. Trained with the Muon optimizer, Kimi K2 achieves exceptional performance across frontier knowledge, reasoning, and coding tasks while being meticulously optimized for agentic capabilities.Specifically designed for tool use, reasoning, and autonomous problem-solving.","context_size":131072,"input_token_price_per_m":5700,"output_token_price_per_m":23000,"input_token_price_per_m_toString":"0.57","output_token_price_per_m_toString":"2.3","features":["function-calling","structured-outputs"],"link":"/models/llm/moonshotai-kimi-k2-instruct","infos":["$$0.57/2.3 in/out MTokens","131072 Context"],"status":1,"input_pricing":{"originPricePerM":5700,"pricePerM":5700},"output_pricing":{"originPricePerM":23000,"pricePerM":23000},"max_output_tokens":131072,"rpm":10,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"NEW"}],"tags":[],"series":"","quota_items":[{"tier":"T1","rpm":10,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":300,"tpm":50000000},{"tier":"T4","rpm":300,"tpm":50000000},{"tier":"T5","rpm":300,"tpm":50000000}],"isCompletion":false,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"deepseek/deepseek-r1-0528","name":"deepseek-r1-0528","displayName":"DeepSeek R1 0528","description":"DeepSeek R1 0528 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","context_size":163840,"input_token_price_per_m":7000,"output_token_price_per_m":25000,"input_token_price_per_m_toString":"0.7","output_token_price_per_m_toString":"2.5","features":["function-calling","structured-outputs"],"link":"/models/llm/deepseek-deepseek-r1-0528","infos":["$$0.7/2.5 in/out MTokens","163840 Context"],"status":1,"input_pricing":{"originPricePerM":7000,"pricePerM":7000},"output_pricing":{"originPricePerM":25000,"pricePerM":25000},"max_output_tokens":163840,"rpm":10,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"NEW"}],"tags":[],"series":"DeepSeek","quota_items":[{"tier":"T1","rpm":10,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"baidu/ernie-4.5-vl-424b-a47b","name":"ernie-4.5-vl-424b-a47b","displayName":"ERNIE 4.5 VL 424B A47B","description":"$35","context_size":123000,"input_token_price_per_m":4200,"output_token_price_per_m":12500,"input_token_price_per_m_toString":"0.42","output_token_price_per_m_toString":"1.25","features":["function-calling","vision"],"link":"/models/llm/baidu-ernie-4.5-vl-424b-a47b","infos":["$$0.42/1.25 in/out MTokens","123000 Context"],"status":1,"input_pricing":{"originPricePerM":4200,"pricePerM":4200},"output_pricing":{"originPricePerM":12500,"pricePerM":12500},"max_output_tokens":16000,"rpm":0,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"NEW"}],"tags":[],"series":"BAIDU","quota_items":[],"isCompletion":true,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"baidu/ernie-4.5-300b-a47b-paddle","name":"ernie-4.5-300b-a47b-paddle","displayName":"ERNIE 4.5 300B A47B","description":"$36","context_size":123000,"input_token_price_per_m":2800,"output_token_price_per_m":11000,"input_token_price_per_m_toString":"0.28","output_token_price_per_m_toString":"1.1","features":["function-calling","structured-outputs"],"link":"/models/llm/baidu-ernie-4.5-300b-a47b-paddle","infos":["$$0.28/1.1 in/out MTokens","123000 Context"],"status":1,"input_pricing":{"originPricePerM":2800,"pricePerM":2800},"output_pricing":{"originPricePerM":11000,"pricePerM":11000},"max_output_tokens":12000,"rpm":300,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"NEW"}],"tags":[],"series":"BAIDU","quota_items":[{"tier":"T1","rpm":300,"tpm":50000000},{"tier":"T2","rpm":300,"tpm":50000000},{"tier":"T3","rpm":300,"tpm":50000000},{"tier":"T4","rpm":300,"tpm":50000000},{"tier":"T5","rpm":300,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"qwen/qwen3-30b-a3b-fp8","name":"qwen3-30b-a3b-fp8","displayName":"Qwen3 30B A3B","description":"Achieves effective integration of inference and non-inference modes, allowing seamless switching between modes during conversations. Its inference capability matches that of QwQ-32B with a smaller parameter size, and its general capabilities significantly surpass those of Qwen2.5-14B, reaching the state-of-the-art (SOTA) level among models of the same scale.","context_size":40960,"input_token_price_per_m":1000,"output_token_price_per_m":4500,"input_token_price_per_m_toString":"0.1","output_token_price_per_m_toString":"0.45","features":[],"link":"/models/llm/qwen-qwen3-30b-a3b-fp8","infos":["$$0.1/0.45 in/out MTokens","40960 Context"],"status":1,"input_pricing":{"originPricePerM":1000,"pricePerM":1000},"output_pricing":{"originPricePerM":4500,"pricePerM":4500},"max_output_tokens":20000,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"NEW"}],"tags":[],"series":"Qwen","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"minimaxai/minimax-m1-80k","name":"minimax-m1-80k","displayName":"MiniMax M1","description":"MiniMax-M1: The World's First Open-Weight, Large-Scale Hybrid Attention Inference Model\n\nMiniMax-M1 adopts a Mixture of Experts (MoE) architecture and integrates the Flash Attention mechanism. The model contains a total of 456 billion parameters, with 45.9 billion parameters activated per token.\n\nNatively, the M1 model supports a context length of 1 million tokens—8 times that of DeepSeek R1. Additionally, by combining the CISPO algorithm with an efficient hybrid attention design for reinforcement learning training, MiniMax-M1 achieves industry-leading performance in long-context reasoning and real-world software engineering scenarios.","context_size":1000000,"input_token_price_per_m":5500,"output_token_price_per_m":22000,"input_token_price_per_m_toString":"0.55","output_token_price_per_m_toString":"2.2","features":["function-calling"],"link":"/models/llm/minimaxai-minimax-m1-80k","infos":["$$0.55/2.2 in/out MTokens","1000000 Context"],"status":1,"input_pricing":{"originPricePerM":5500,"pricePerM":5500},"output_pricing":{"originPricePerM":22000,"pricePerM":22000},"max_output_tokens":40000,"rpm":300,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"NEW"}],"tags":[],"series":"MiniMax","quota_items":[{"tier":"T1","rpm":300,"tpm":50000000},{"tier":"T2","rpm":300,"tpm":50000000},{"tier":"T3","rpm":300,"tpm":50000000},{"tier":"T4","rpm":300,"tpm":50000000},{"tier":"T5","rpm":300,"tpm":50000000}],"isCompletion":false,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"deepseek/deepseek-r1-0528-qwen3-8b","name":"deepseek-r1-0528-qwen3-8b","displayName":"DeepSeek R1 0528 Qwen3 8B","description":"DeepSeek-R1-0528-Qwen3-8B is a high-performance reasoning model based on the Qwen3 8B Base model, enhanced through the integration of DeepSeek-R1-0528's Chain-of-Thought (CoT) optimization. In the AIME 2024 evaluation, this open-source model achieved state-of-the-art (SOTA) performance, delivering a 10% improvement over the original Qwen3 8B while matching the reasoning capabilities of the much larger 235-billion-parameter Qwen3-235B-thinking. ","context_size":128000,"input_token_price_per_m":600,"output_token_price_per_m":900,"input_token_price_per_m_toString":"0.06","output_token_price_per_m_toString":"0.09","features":[],"link":"/models/llm/deepseek-deepseek-r1-0528-qwen3-8b","infos":["$$0.06/0.09 in/out MTokens","128000 Context"],"status":1,"input_pricing":{"originPricePerM":600,"pricePerM":600},"output_pricing":{"originPricePerM":900,"pricePerM":900},"max_output_tokens":32000,"rpm":300,"tmp":"$undefined","labels":[{"key":"display","value":"NEW"},{"key":"display","value":"Dedicated"}],"tags":[],"series":"","quota_items":[{"tier":"T1","rpm":300,"tpm":50000000},{"tier":"T2","rpm":300,"tpm":50000000},{"tier":"T3","rpm":300,"tpm":50000000},{"tier":"T4","rpm":300,"tpm":50000000},{"tier":"T5","rpm":300,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"qwen/qwen3-32b-fp8","name":"qwen3-32b-fp8","displayName":"Qwen3 32B","description":"Achieves effective integration of inference and non-inference modes, allowing seamless switching between modes during conversations. Its inference capability matches that of QwQ-32B with a smaller parameter size, and its general capabilities significantly surpass those of Qwen2.5-14B, reaching the state-of-the-art (SOTA) level among models of the same scale.","context_size":40960,"input_token_price_per_m":1000,"output_token_price_per_m":4500,"input_token_price_per_m_toString":"0.1","output_token_price_per_m_toString":"0.45","features":[],"link":"/models/llm/qwen-qwen3-32b-fp8","infos":["$$0.1/0.45 in/out MTokens","40960 Context"],"status":1,"input_pricing":{"originPricePerM":1000,"pricePerM":1000},"output_pricing":{"originPricePerM":4500,"pricePerM":4500},"max_output_tokens":20000,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"NEW"}],"tags":[],"series":"Qwen","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"qwen/qwen2.5-vl-72b-instruct","name":"qwen2.5-vl-72b-instruct","displayName":"Qwen2.5 VL 72B Instruct","description":"Qwen2.5-VL, the latest vision-language model in the Qwen2.5 series, delivers enhanced multimodal capabilities including advanced visual comprehension for object/text recognition, chart/layout analysis, and agent-based dynamic tool orchestration. It processes long-form videos (>1 hour) with key event detection while enabling precise spatial annotation through bounding boxes or coordinate points. The model specializes in structured data extraction from scanned documents (invoices, tables, etc.) and achieves state-of-the-art performance across multimodal benchmarks encompassing image understanding, temporal video analysis, and agent task evaluations.","context_size":32768,"input_token_price_per_m":8000,"output_token_price_per_m":8000,"input_token_price_per_m_toString":"0.8","output_token_price_per_m_toString":"0.8","features":["vision"],"link":"/models/llm/qwen-qwen2.5-vl-72b-instruct","infos":["$$0.8/0.8 in/out MTokens","32768 Context"],"status":1,"input_pricing":{"originPricePerM":8000,"pricePerM":8000},"output_pricing":{"originPricePerM":8000,"pricePerM":8000},"max_output_tokens":32768,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"50～100B"}],"tags":[],"series":"Qwen","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"qwen/qwen3-235b-a22b-fp8","name":"qwen3-235b-a22b-fp8","displayName":"Qwen3 235B A22B","description":"Achieves effective integration of inference and non-inference modes, enabling seamless switching between modes during conversations. The model's inference capability significantly surpasses that of QwQ, and its general capabilities exceed those of Qwen2.5-72B-Instruct, reaching the state-of-the-art (SOTA) level among models of the same scale.","context_size":40960,"input_token_price_per_m":2000,"output_token_price_per_m":8000,"input_token_price_per_m_toString":"0.2","output_token_price_per_m_toString":"0.8","features":[],"link":"/models/llm/qwen-qwen3-235b-a22b-fp8","infos":["$$0.2/0.8 in/out MTokens","40960 Context"],"status":1,"input_pricing":{"originPricePerM":2000,"pricePerM":2000},"output_pricing":{"originPricePerM":8000,"pricePerM":8000},"max_output_tokens":20000,"rpm":10,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"NEW"}],"tags":[],"series":"Qwen","quota_items":[{"tier":"T1","rpm":10,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"deepseek/deepseek-v3-turbo","name":"deepseek-v3-turbo","displayName":"DeepSeek V3 (Turbo)\t","description":"DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.","context_size":64000,"input_token_price_per_m":4000,"output_token_price_per_m":13000,"input_token_price_per_m_toString":"0.4","output_token_price_per_m_toString":"1.3","features":["function-calling"],"link":"/models/llm/deepseek-deepseek-v3-turbo","infos":["$$0.4/1.3 in/out MTokens","64000 Context"],"status":1,"input_pricing":{"originPricePerM":4000,"pricePerM":4000},"output_pricing":{"originPricePerM":13000,"pricePerM":13000},"max_output_tokens":16000,"rpm":10,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"}],"tags":[],"series":"DeepSeek","quota_items":[{"tier":"T1","rpm":10,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"thudm/glm-4.1v-9b-thinking","name":"glm-4.1v-9b-thinking","displayName":"GLM 4.1V 9B Thinking","description":"GLM-4.1V-9B-Thinking is an open-source Vision-Language Model (VLM) jointly released by Zhipu AI and Tsinghua University’s KEG Lab, specifically designed to handle complex multimodal cognitive tasks. Built upon the GLM-4-9B-0414 base model, it integrates Chain-of-Thought (CoT) reasoning and employs reinforcement learning strategies, significantly enhancing its cross-modal reasoning capabilities and stability. As a lightweight model with 9B parameters, it strikes an optimal balance between deployment efficiency and performance. Across 28 authoritative benchmark evaluations, it matches or surpasses the performance of the 72B-parameter Qwen-2.5-VL-72B in 18 metrics. The model excels in tasks such as image-text understanding, mathematical and scientific reasoning, and video comprehension, while also supporting 4K-resolution images and arbitrary aspect ratios.","context_size":65536,"input_token_price_per_m":350,"output_token_price_per_m":1380,"input_token_price_per_m_toString":"0.035","output_token_price_per_m_toString":"0.138","features":["vision"],"link":"/models/llm/thudm-glm-4.1v-9b-thinking","infos":["$$0.035/0.138 in/out MTokens","65536 Context"],"status":1,"input_pricing":{"originPricePerM":350,"pricePerM":350},"output_pricing":{"originPricePerM":1380,"pricePerM":1380},"max_output_tokens":8000,"rpm":50,"tmp":"$undefined","labels":[{"key":"display","value":"NEW"}],"tags":[],"series":"THUDM","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":false,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"meta-llama/llama-4-maverick-17b-128e-instruct-fp8","name":"llama-4-maverick-17b-128e-instruct-fp8","displayName":"Llama 4 Maverick Instruct","description":"Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.","context_size":1048576,"input_token_price_per_m":1700,"output_token_price_per_m":8500,"input_token_price_per_m_toString":"0.17","output_token_price_per_m_toString":"0.85","features":["function-calling","vision"],"link":"/models/llm/meta-llama-llama-4-maverick-17b-128e-instruct-fp8","infos":["$$0.17/0.85 in/out MTokens","1048576 Context"],"status":1,"input_pricing":{"originPricePerM":1700,"pricePerM":1700},"output_pricing":{"originPricePerM":8500,"pricePerM":8500},"max_output_tokens":1048576,"rpm":10,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"}],"tags":[],"series":"Llama","quota_items":[{"tier":"T1","rpm":10,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"google/gemma-3-27b-it","name":"gemma-3-27b-it","displayName":"Gemma 3 27B","description":"Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 32k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs. Gemma 3 27B is Google's latest open source model, successor to Gemma.","context_size":32000,"input_token_price_per_m":1190,"output_token_price_per_m":2000,"input_token_price_per_m_toString":"0.119","output_token_price_per_m_toString":"0.2","features":["structured-outputs","vision"],"link":"/models/llm/google-gemma-3-27b-it","infos":["$$0.119/0.2 in/out MTokens","32000 Context"],"status":1,"input_pricing":{"originPricePerM":1190,"pricePerM":1190},"output_pricing":{"originPricePerM":2000,"pricePerM":2000},"max_output_tokens":32000,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"10～50B"}],"tags":[],"series":"Gemma","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"deepseek/deepseek-r1-turbo","name":"deepseek-r1-turbo","displayName":"DeepSeek R1 (Turbo)\t","description":"DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive reasoning capabilities, particularly achieving performance comparable to OpenAI's o1 model in mathematics, coding, and reasoning tasks.","context_size":64000,"input_token_price_per_m":7000,"output_token_price_per_m":25000,"input_token_price_per_m_toString":"0.7","output_token_price_per_m_toString":"2.5","features":["function-calling"],"link":"/models/llm/deepseek-deepseek-r1-turbo","infos":["$$0.7/2.5 in/out MTokens","64000 Context"],"status":1,"input_pricing":{"originPricePerM":7000,"pricePerM":7000},"output_pricing":{"originPricePerM":25000,"pricePerM":25000},"max_output_tokens":16000,"rpm":10,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"}],"tags":[],"series":"DeepSeek","quota_items":[{"tier":"T1","rpm":10,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"Sao10K/L3-8B-Stheno-v3.2","name":"L3-8B-Stheno-v3.2","displayName":"L3 8B Stheno V3.2","description":"Sao10K/L3-8B-Stheno-v3.2 is a highly skilled actor that excels at fully immersing itself in any role assigned.","context_size":8192,"input_token_price_per_m":500,"output_token_price_per_m":500,"input_token_price_per_m_toString":"0.05","output_token_price_per_m_toString":"0.05","features":[],"link":"/models/llm/Sao10K-L3-8B-Stheno-v3.2","infos":["$$0.05/0.05 in/out MTokens","8192 Context"],"status":1,"input_pricing":{"originPricePerM":500,"pricePerM":500},"output_pricing":{"originPricePerM":500,"pricePerM":500},"max_output_tokens":32000,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"}],"tags":[],"series":"Sao10K","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"gryphe/mythomax-l2-13b","name":"mythomax-l2-13b","displayName":"Mythomax L2 13B","description":"The idea behind this merge is that each layer is composed of several tensors, which are in turn responsible for specific functions. Using MythoLogic-L2's robust understanding as its input and Huginn's extensive writing capability as its output seems to have resulted in a model that exceeds at both, confirming my theory. (More details to be released at a later time).","context_size":4096,"input_token_price_per_m":900,"output_token_price_per_m":900,"input_token_price_per_m_toString":"0.09","output_token_price_per_m_toString":"0.09","features":[],"link":"/models/llm/gryphe-mythomax-l2-13b","infos":["$$0.09/0.09 in/out MTokens","4096 Context"],"status":1,"input_pricing":{"originPricePerM":900,"pricePerM":900},"output_pricing":{"originPricePerM":900,"pricePerM":900},"max_output_tokens":32000,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"10～50B"}],"tags":[],"series":"Mythomax","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"deepseek/deepseek-prover-v2-671b","name":"deepseek-prover-v2-671b","displayName":"Deepseek Prover V2 671B","description":"DeepSeek Launches Open-Source Model DeepSeek-Prover-V2-671B, Specializing in Mathematical Theorem Proving\nThe new model employs a Mixture of Experts (MoE) architecture and is trained using the Lean 4 framework for formal reasoning. With 671 billion parameters, it leverages reinforcement learning and large-scale synthetic data to significantly enhance automated theorem-proving capabilities.","context_size":160000,"input_token_price_per_m":7000,"output_token_price_per_m":25000,"input_token_price_per_m_toString":"0.7","output_token_price_per_m_toString":"2.5","features":[],"link":"/models/llm/deepseek-deepseek-prover-v2-671b","infos":["$$0.7/2.5 in/out MTokens","160000 Context"],"status":1,"input_pricing":{"originPricePerM":7000,"pricePerM":7000},"output_pricing":{"originPricePerM":25000,"pricePerM":25000},"max_output_tokens":160000,"rpm":10,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"NEW"}],"tags":[],"series":"DeepSeek","quota_items":[{"tier":"T1","rpm":10,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"meta-llama/llama-4-scout-17b-16e-instruct","name":"llama-4-scout-17b-16e-instruct","displayName":"Llama 4 Scout Instruct","description":"Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.","context_size":131072,"input_token_price_per_m":1000,"output_token_price_per_m":5000,"input_token_price_per_m_toString":"0.1","output_token_price_per_m_toString":"0.5","features":["function-calling","vision"],"link":"/models/llm/meta-llama-llama-4-scout-17b-16e-instruct","infos":["$$0.1/0.5 in/out MTokens","131072 Context"],"status":1,"input_pricing":{"originPricePerM":1000,"pricePerM":1000},"output_pricing":{"originPricePerM":5000,"pricePerM":5000},"max_output_tokens":131072,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"Dedicated"}],"tags":[],"series":"Llama","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"deepseek/deepseek-r1-distill-llama-8b","name":"deepseek-r1-distill-llama-8b","displayName":"DeepSeek R1 Distill Llama 8B","description":"DeepSeek R1 Distill Llama 8B is a distilled large language model based on Llama-3.1-8B-Instruct, using outputs from DeepSeek R1. ","context_size":32000,"input_token_price_per_m":400,"output_token_price_per_m":400,"input_token_price_per_m_toString":"0.04","output_token_price_per_m_toString":"0.04","features":["structured-outputs"],"link":"/models/llm/deepseek-deepseek-r1-distill-llama-8b","infos":["$$0.04/0.04 in/out MTokens","32000 Context"],"status":1,"input_pricing":{"originPricePerM":400,"pricePerM":400},"output_pricing":{"originPricePerM":400,"pricePerM":400},"max_output_tokens":32000,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"},{"key":"display","value":"Dedicated"}],"tags":[],"series":"DeepSeek","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"meta-llama/llama-3.1-8b-instruct","name":"llama-3.1-8b-instruct","displayName":"Llama 3.1 8B Instruct","description":"Meta's latest class of models, Llama 3.1, launched with a variety of sizes and configurations. The 8B instruct-tuned version is particularly fast and efficient. It has demonstrated strong performance in human evaluations, outperforming several leading closed-source models.","context_size":16384,"input_token_price_per_m":200,"output_token_price_per_m":500,"input_token_price_per_m_toString":"0.02","output_token_price_per_m_toString":"0.05","features":[],"link":"/models/llm/meta-llama-llama-3.1-8b-instruct","infos":["$$0.02/0.05 in/out MTokens","16384 Context"],"status":1,"input_pricing":{"originPricePerM":200,"pricePerM":200},"output_pricing":{"originPricePerM":500,"pricePerM":500},"max_output_tokens":131072,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"}],"tags":[],"series":"Llama","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"deepseek/deepseek-r1-distill-qwen-14b","name":"deepseek-r1-distill-qwen-14b","displayName":"DeepSeek R1 Distill Qwen 14B","description":"DeepSeek R1 Distill Qwen 14B is a distilled large language model based on Qwen 2.5 14B, using outputs from DeepSeek R1. It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\nAIME 2024 pass@1: 69.7\nMATH-500 pass@1: 93.9\nCodeForces Rating: 1481\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.","context_size":64000,"input_token_price_per_m":1500,"output_token_price_per_m":1500,"input_token_price_per_m_toString":"0.15","output_token_price_per_m_toString":"0.15","features":["structured-outputs"],"link":"/models/llm/deepseek-deepseek-r1-distill-qwen-14b","infos":["$$0.15/0.15 in/out MTokens","64000 Context"],"status":1,"input_pricing":{"originPricePerM":1500,"pricePerM":1500},"output_pricing":{"originPricePerM":1500,"pricePerM":1500},"max_output_tokens":32000,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"10～50B"}],"tags":[],"series":"DeepSeek","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"meta-llama/llama-3.3-70b-instruct","name":"llama-3.3-70b-instruct","displayName":"Llama 3.3 70B Instruct","description":"The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.","context_size":131072,"input_token_price_per_m":1300,"output_token_price_per_m":3900,"input_token_price_per_m_toString":"0.13","output_token_price_per_m_toString":"0.39","features":["function-calling"],"link":"/models/llm/meta-llama-llama-3.3-70b-instruct","infos":["$$0.13/0.39 in/out MTokens","131072 Context"],"status":1,"input_pricing":{"originPricePerM":1300,"pricePerM":1300},"output_pricing":{"originPricePerM":3900,"pricePerM":3900},"max_output_tokens":120000,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"50～100B"},{"key":"display","value":"HOT"}],"tags":[],"series":"Llama","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":true,"discount":0},{"type":"Chat","id":"qwen/qwen-2.5-72b-instruct","name":"qwen-2.5-72b-instruct","displayName":"Qwen 2.5 72B Instruct","description":"Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.","context_size":32000,"input_token_price_per_m":3800,"output_token_price_per_m":4000,"input_token_price_per_m_toString":"0.38","output_token_price_per_m_toString":"0.4","features":["structured-outputs","function-calling"],"link":"/models/llm/qwen-qwen-2.5-72b-instruct","infos":["$$0.38/0.4 in/out MTokens","32000 Context"],"status":1,"input_pricing":{"originPricePerM":3800,"pricePerM":3800},"output_pricing":{"originPricePerM":4000,"pricePerM":4000},"max_output_tokens":32000,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"50～100B"}],"tags":[],"series":"Qwen","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"mistralai/mistral-nemo","name":"mistral-nemo","displayName":"Mistral Nemo","description":"A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA. The model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi. It supports function calling and is released under the Apache 2.0 license.","context_size":60288,"input_token_price_per_m":400,"output_token_price_per_m":1700,"input_token_price_per_m_toString":"0.04","output_token_price_per_m_toString":"0.17","features":["structured-outputs"],"link":"/models/llm/mistralai-mistral-nemo","infos":["$$0.04/0.17 in/out MTokens","60288 Context"],"status":1,"input_pricing":{"originPricePerM":400,"pricePerM":400},"output_pricing":{"originPricePerM":1700,"pricePerM":1700},"max_output_tokens":32000,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"10～50B"}],"tags":[],"series":"Mistral","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"deepseek/deepseek-r1-distill-qwen-32b","name":"deepseek-r1-distill-qwen-32b","displayName":"DeepSeek R1 Distill Qwen 32B","description":"DeepSeek R1 Distill Qwen 32B is a distilled large language model based on Qwen 2.5 32B, using outputs from DeepSeek R1. It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\nAIME 2024 pass@1: 72.6\nMATH-500 pass@1: 94.3\nCodeForces Rating: 1691\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.","context_size":64000,"input_token_price_per_m":3000,"output_token_price_per_m":3000,"input_token_price_per_m_toString":"0.3","output_token_price_per_m_toString":"0.3","features":["structured-outputs"],"link":"/models/llm/deepseek-deepseek-r1-distill-qwen-32b","infos":["$$0.3/0.3 in/out MTokens","64000 Context"],"status":1,"input_pricing":{"originPricePerM":3000,"pricePerM":3000},"output_pricing":{"originPricePerM":3000,"pricePerM":3000},"max_output_tokens":32000,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"10～50B"}],"tags":[],"series":"DeepSeek","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"meta-llama/llama-3-8b-instruct","name":"llama-3-8b-instruct","displayName":"Llama 3 8B Instruct","description":"Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong performance compared to leading closed-source models in human evaluations.","context_size":8192,"input_token_price_per_m":400,"output_token_price_per_m":400,"input_token_price_per_m_toString":"0.04","output_token_price_per_m_toString":"0.04","features":[],"link":"/models/llm/meta-llama-llama-3-8b-instruct","infos":["$$0.04/0.04 in/out MTokens","8192 Context"],"status":1,"input_pricing":{"originPricePerM":400,"pricePerM":400},"output_pricing":{"originPricePerM":400,"pricePerM":400},"max_output_tokens":8192,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"}],"tags":[],"series":"Llama","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"microsoft/wizardlm-2-8x22b","name":"wizardlm-2-8x22b","displayName":"Wizardlm 2 8x22B","description":"WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.","context_size":65535,"input_token_price_per_m":6200,"output_token_price_per_m":6200,"input_token_price_per_m_toString":"0.62","output_token_price_per_m_toString":"0.62","features":[],"link":"/models/llm/microsoft-wizardlm-2-8x22b","infos":["$$0.62/0.62 in/out MTokens","65535 Context"],"status":1,"input_pricing":{"originPricePerM":6200,"pricePerM":6200},"output_pricing":{"originPricePerM":6200,"pricePerM":6200},"max_output_tokens":8000,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"> 100B"}],"tags":[],"series":"Others","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"deepseek/deepseek-r1-distill-llama-70b","name":"deepseek-r1-distill-llama-70b","displayName":"DeepSeek R1 Distill LLama 70B","description":"DeepSeek R1 Distill LLama 70B","context_size":32000,"input_token_price_per_m":8000,"output_token_price_per_m":8000,"input_token_price_per_m_toString":"0.8","output_token_price_per_m_toString":"0.8","features":["structured-outputs"],"link":"/models/llm/deepseek-deepseek-r1-distill-llama-70b","infos":["$$0.8/0.8 in/out MTokens","32000 Context"],"status":1,"input_pricing":{"originPricePerM":8000,"pricePerM":8000},"output_pricing":{"originPricePerM":8000,"pricePerM":8000},"max_output_tokens":32000,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"50～100B"}],"tags":[],"series":"DeepSeek","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"mistralai/mistral-7b-instruct","name":"mistral-7b-instruct","displayName":"Mistral 7B Instruct","description":"A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.","context_size":32768,"input_token_price_per_m":290,"output_token_price_per_m":590,"input_token_price_per_m_toString":"0.029","output_token_price_per_m_toString":"0.059","features":[],"link":"/models/llm/mistralai-mistral-7b-instruct","infos":["$$0.029/0.059 in/out MTokens","32768 Context"],"status":1,"input_pricing":{"originPricePerM":290,"pricePerM":290},"output_pricing":{"originPricePerM":590,"pricePerM":590},"max_output_tokens":8192,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"}],"tags":[],"series":"Mistral","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"meta-llama/llama-3-70b-instruct","name":"llama-3-70b-instruct","displayName":"Llama3 70B Instruct","description":"Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong performance compared to leading closed-source models in human evaluations.","context_size":8192,"input_token_price_per_m":5100,"output_token_price_per_m":7400,"input_token_price_per_m_toString":"0.51","output_token_price_per_m_toString":"0.74","features":["structured-outputs"],"link":"/models/llm/meta-llama-llama-3-70b-instruct","infos":["$$0.51/0.74 in/out MTokens","8192 Context"],"status":1,"input_pricing":{"originPricePerM":5100,"pricePerM":5100},"output_pricing":{"originPricePerM":7400,"pricePerM":7400},"max_output_tokens":8000,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"50～100B"}],"tags":[],"series":"Llama","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"nousresearch/hermes-2-pro-llama-3-8b","name":"hermes-2-pro-llama-3-8b","displayName":"Hermes 2 Pro Llama 3 8B","description":"Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.","context_size":8192,"input_token_price_per_m":1400,"output_token_price_per_m":1400,"input_token_price_per_m_toString":"0.14","output_token_price_per_m_toString":"0.14","features":["structured-outputs"],"link":"/models/llm/nousresearch-hermes-2-pro-llama-3-8b","infos":["$$0.14/0.14 in/out MTokens","8192 Context"],"status":1,"input_pricing":{"originPricePerM":1400,"pricePerM":1400},"output_pricing":{"originPricePerM":1400,"pricePerM":1400},"max_output_tokens":8192,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"}],"tags":[],"series":"Others","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"sao10k/l3-70b-euryale-v2.1","name":"l3-70b-euryale-v2.1","displayName":"L3 70B Euryale V2.1\t","description":"The uncensored llama3 model is a powerhouse of creativity, excelling in both roleplay and story writing. It offers a liberating experience during roleplays, free from any restrictions. This model stands out for its immense creativity, boasting a vast array of unique ideas and plots, truly a treasure trove for those seeking originality. Its unrestricted nature during roleplays allows for the full breadth of imagination to unfold, akin to an enhanced, big-brained version of Stheno. Perfect for creative minds seeking a boundless platform for their imaginative expressions, the uncensored llama3 model is an ideal choice","context_size":8192,"input_token_price_per_m":14800,"output_token_price_per_m":14800,"input_token_price_per_m_toString":"1.48","output_token_price_per_m_toString":"1.48","features":[],"link":"/models/llm/sao10k-l3-70b-euryale-v2.1","infos":["$$1.48/1.48 in/out MTokens","8192 Context"],"status":1,"input_pricing":{"originPricePerM":14800,"pricePerM":14800},"output_pricing":{"originPricePerM":14800,"pricePerM":14800},"max_output_tokens":8192,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"50～100B"}],"tags":[],"series":"Sao10K","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"cognitivecomputations/dolphin-mixtral-8x22b","name":"dolphin-mixtral-8x22b","displayName":"Dolphin Mixtral 8x22B","description":"Dolphin 2.9 is designed for instruction following, conversational, and coding. This model is a finetune of Mixtral 8x22B Instruct. It features a 64k context length and was fine-tuned with a 16k sequence length using ChatML templates.The model is uncensored and is stripped of alignment and bias. It requires an external alignment layer for ethical use.","context_size":16000,"input_token_price_per_m":9000,"output_token_price_per_m":9000,"input_token_price_per_m_toString":"0.9","output_token_price_per_m_toString":"0.9","features":["structured-outputs"],"link":"/models/llm/cognitivecomputations-dolphin-mixtral-8x22b","infos":["$$0.9/0.9 in/out MTokens","16000 Context"],"status":1,"input_pricing":{"originPricePerM":9000,"pricePerM":9000},"output_pricing":{"originPricePerM":9000,"pricePerM":9000},"max_output_tokens":8192,"rpm":10,"tmp":"$undefined","labels":[{"key":"specification","value":"10～50B"}],"tags":[],"series":"Others","quota_items":[{"tier":"T1","rpm":10,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"sophosympatheia/midnight-rose-70b","name":"midnight-rose-70b","displayName":"Midnight Rose 70B","description":"A merge with a complex family tree, this model was crafted for roleplaying and storytelling. Midnight Rose is a successor to Rogue Rose and Aurora Nights and improves upon them both. It wants to produce lengthy output by default and is the best creative writing merge produced so far by sophosympatheia.","context_size":4096,"input_token_price_per_m":8000,"output_token_price_per_m":8000,"input_token_price_per_m_toString":"0.8","output_token_price_per_m_toString":"0.8","features":[],"link":"/models/llm/sophosympatheia-midnight-rose-70b","infos":["$$0.8/0.8 in/out MTokens","4096 Context"],"status":1,"input_pricing":{"originPricePerM":8000,"pricePerM":8000},"output_pricing":{"originPricePerM":8000,"pricePerM":8000},"max_output_tokens":2048,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"50～100B"}],"tags":[],"series":"Others","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"sao10k/l3-8b-lunaris","name":"l3-8b-lunaris","displayName":"Sao10k L3 8B Lunaris\t","description":"A generalist / roleplaying model merge based on Llama 3.","context_size":8192,"input_token_price_per_m":500,"output_token_price_per_m":500,"input_token_price_per_m_toString":"0.05","output_token_price_per_m_toString":"0.05","features":["structured-outputs"],"link":"/models/llm/sao10k-l3-8b-lunaris","infos":["$$0.05/0.05 in/out MTokens","8192 Context"],"status":1,"input_pricing":{"originPricePerM":500,"pricePerM":500},"output_pricing":{"originPricePerM":500,"pricePerM":500},"max_output_tokens":32000,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"}],"tags":[],"series":"Sao10K","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"baidu/ernie-4.5-vl-28b-a3b","name":"ernie-4.5-vl-28b-a3b","displayName":"ERNIE 4.5 VL 28B A3B","description":"$37","context_size":30000,"input_token_price_per_m":0,"output_token_price_per_m":0,"input_token_price_per_m_toString":"0","output_token_price_per_m_toString":"0","features":["function-calling"],"link":"/models/llm/baidu-ernie-4.5-vl-28b-a3b","infos":["$$0/0 in/out MTokens","30000 Context"],"status":1,"input_pricing":{"originPricePerM":0,"pricePerM":0},"output_pricing":{"originPricePerM":0,"pricePerM":0},"max_output_tokens":8000,"rpm":300,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"FREE"}],"tags":[],"series":"BAIDU","quota_items":[{"tier":"T1","rpm":300,"tpm":50000000},{"tier":"T2","rpm":300,"tpm":50000000},{"tier":"T3","rpm":300,"tpm":50000000},{"tier":"T4","rpm":300,"tpm":50000000},{"tier":"T5","rpm":300,"tpm":50000000}],"isCompletion":false,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"baidu/ernie-4.5-21B-a3b","name":"ernie-4.5-21B-a3b","displayName":"ERNIE 4.5 21B A3B","description":"$38","context_size":120000,"input_token_price_per_m":0,"output_token_price_per_m":0,"input_token_price_per_m_toString":"0","output_token_price_per_m_toString":"0","features":["function-calling"],"link":"/models/llm/baidu-ernie-4.5-21B-a3b","infos":["$$0/0 in/out MTokens","120000 Context"],"status":1,"input_pricing":{"originPricePerM":0,"pricePerM":0},"output_pricing":{"originPricePerM":0,"pricePerM":0},"max_output_tokens":8000,"rpm":0,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"FREE"}],"tags":[],"series":"BAIDU","quota_items":[],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"baidu/ernie-4.5-0.3b","name":"ernie-4.5-0.3b","displayName":"ERNIE 4.5 0.3B","description":"$39","context_size":120000,"input_token_price_per_m":0,"output_token_price_per_m":0,"input_token_price_per_m_toString":"0","output_token_price_per_m_toString":"0","features":["vision","function-calling"],"link":"/models/llm/baidu-ernie-4.5-0.3b","infos":["$$0/0 in/out MTokens","120000 Context"],"status":1,"input_pricing":{"originPricePerM":0,"pricePerM":0},"output_pricing":{"originPricePerM":0,"pricePerM":0},"max_output_tokens":8000,"rpm":0,"tmp":"$undefined","labels":[{"key":"specification","value":"MoE"},{"key":"display","value":"NEW"},{"key":"footer","value":"FREE"}],"tags":[],"series":"BAIDU","quota_items":[],"isCompletion":true,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"google/gemma-3-1b-it","name":"gemma-3-1b-it","displayName":"Gemma3 1B IT","description":"Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. Gemma 3 models are multimodal, handling text and image input and generating text output, with open weights for both pre-trained variants and instruction-tuned variants. Gemma 3 has a large, 128K context window, multilingual support in over 140 languages, and is available in more sizes than previous versions. Gemma 3 models are well-suited for a variety of text generation and image understanding tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as laptops, desktops or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.","context_size":32768,"input_token_price_per_m":0,"output_token_price_per_m":0,"input_token_price_per_m_toString":"0","output_token_price_per_m_toString":"0","features":[],"link":"/models/llm/google-gemma-3-1b-it","infos":["$$0/0 in/out MTokens","32768 Context"],"status":1,"input_pricing":{"originPricePerM":0,"pricePerM":0},"output_pricing":{"originPricePerM":0,"pricePerM":0},"max_output_tokens":131072,"rpm":300,"tmp":"$undefined","labels":[{"key":"display","value":"NEW"}],"tags":[],"series":"Gemma","quota_items":[{"tier":"T1","rpm":300,"tpm":50000000},{"tier":"T2","rpm":300,"tpm":50000000},{"tier":"T3","rpm":300,"tpm":50000000},{"tier":"T4","rpm":300,"tpm":50000000},{"tier":"T5","rpm":300,"tpm":50000000}],"isCompletion":false,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"qwen/qwen3-8b-fp8","name":"qwen3-8b-fp8","displayName":"Qwen3 8B","description":"Achieves effective integration of reasoning and non-reasoning modes, allowing seamless mode switching during conversations. Its reasoning capability reaches state-of-the-art (SOTA) performance among models of the same scale, and its general capabilities significantly outperform those of Qwen2.5-7B.","context_size":128000,"input_token_price_per_m":350,"output_token_price_per_m":1380,"input_token_price_per_m_toString":"0.035","output_token_price_per_m_toString":"0.138","features":[],"link":"/models/llm/qwen-qwen3-8b-fp8","infos":["$$0.035/0.138 in/out MTokens","128000 Context"],"status":1,"input_pricing":{"originPricePerM":350,"pricePerM":350},"output_pricing":{"originPricePerM":1380,"pricePerM":1380},"max_output_tokens":20000,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"},{"key":"display","value":"NEW"},{"key":"display","value":"Dedicated"}],"tags":[],"series":"Qwen","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"qwen/qwen3-4b-fp8","name":"qwen3-4b-fp8","displayName":"Qwen3 4B","description":"Achieves effective integration of reasoning and non-reasoning modes, allowing seamless switching during conversations. The model delivers state-of-the-art (SOTA) reasoning performance among models of the same scale, with significantly enhanced human preference alignment. Notable improvements are seen in creative writing, role-playing, multi-turn dialogue, and instruction following, leading to a clearly improved user experience.","context_size":128000,"input_token_price_per_m":0,"output_token_price_per_m":0,"input_token_price_per_m_toString":"0","output_token_price_per_m_toString":"0","features":[],"link":"/models/llm/qwen-qwen3-4b-fp8","infos":["$$0/0 in/out MTokens","128000 Context"],"status":1,"input_pricing":{"originPricePerM":0,"pricePerM":0},"output_pricing":{"originPricePerM":0,"pricePerM":0},"max_output_tokens":20000,"rpm":1000,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"},{"key":"display","value":"Free"},{"key":"display","value":"Dedicated"}],"tags":[],"series":"Qwen","quota_items":[{"tier":"T1","rpm":1000,"tpm":50000000},{"tier":"T2","rpm":1000,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"thudm/glm-4-32b-0414","name":"glm-4-32b-0414","displayName":"GLM-4-32B-0414","description":"$3a","context_size":32000,"input_token_price_per_m":2400,"output_token_price_per_m":2400,"input_token_price_per_m_toString":"0.24","output_token_price_per_m_toString":"0.24","features":["function-calling","structured-outputs"],"link":"/models/llm/thudm-glm-4-32b-0414","infos":["$$0.24/0.24 in/out MTokens","32000 Context"],"status":1,"input_pricing":{"originPricePerM":2400,"pricePerM":2400},"output_pricing":{"originPricePerM":2400,"pricePerM":2400},"max_output_tokens":32000,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"10～50B"},{"key":"display","value":"NEW"}],"tags":[],"series":"THUDM","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":true,"isHot":false,"discount":0},{"type":"Chat","id":"qwen/qwen2.5-7b-instruct","name":"qwen2.5-7b-instruct","displayName":"Qwen2.5 7B Instruct","description":"Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters. Qwen2.5 brings the following improvements upon Qwen2:\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.","context_size":32000,"input_token_price_per_m":0,"output_token_price_per_m":0,"input_token_price_per_m_toString":"0","output_token_price_per_m_toString":"0","features":["function-calling","structured-outputs"],"link":"/models/llm/qwen-qwen2.5-7b-instruct","infos":["$$0/0 in/out MTokens","32000 Context"],"status":1,"input_pricing":{"originPricePerM":700,"pricePerM":0},"output_pricing":{"originPricePerM":700,"pricePerM":0},"max_output_tokens":32000,"rpm":1500,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"},{"key":"display","value":"Free"}],"tags":[],"series":"Qwen","quota_items":[{"tier":"T1","rpm":1500,"tpm":50000000},{"tier":"T2","rpm":1500,"tpm":50000000},{"tier":"T3","rpm":1500,"tpm":50000000},{"tier":"T4","rpm":1500,"tpm":50000000},{"tier":"T5","rpm":1500,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"meta-llama/llama-3.2-1b-instruct","name":"llama-3.2-1b-instruct","displayName":"Llama 3.2 1B Instruct\t","description":"The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out).","context_size":131000,"input_token_price_per_m":0,"output_token_price_per_m":0,"input_token_price_per_m_toString":"0","output_token_price_per_m_toString":"0","features":[],"link":"/models/llm/meta-llama-llama-3.2-1b-instruct","infos":["$$0/0 in/out MTokens","131000 Context"],"status":1,"input_pricing":{"originPricePerM":200,"pricePerM":0},"output_pricing":{"originPricePerM":200,"pricePerM":0},"max_output_tokens":32000,"rpm":1500,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"},{"key":"display","value":"Free"}],"tags":[],"series":"Llama","quota_items":[{"tier":"T1","rpm":1500,"tpm":50000000},{"tier":"T2","rpm":1500,"tpm":50000000},{"tier":"T3","rpm":1500,"tpm":50000000},{"tier":"T4","rpm":1500,"tpm":50000000},{"tier":"T5","rpm":1500,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"meta-llama/llama-3.2-3b-instruct","name":"llama-3.2-3b-instruct","displayName":"Llama 3.2 3B Instruct","description":"The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out)","context_size":32768,"input_token_price_per_m":300,"output_token_price_per_m":500,"input_token_price_per_m_toString":"0.03","output_token_price_per_m_toString":"0.05","features":["function-calling"],"link":"/models/llm/meta-llama-llama-3.2-3b-instruct","infos":["$$0.03/0.05 in/out MTokens","32768 Context"],"status":1,"input_pricing":{"originPricePerM":300,"pricePerM":300},"output_pricing":{"originPricePerM":500,"pricePerM":500},"max_output_tokens":32000,"rpm":50,"tmp":"$undefined","labels":[{"key":"specification","value":"< 10B"}],"tags":[],"series":"Llama","quota_items":[{"tier":"T1","rpm":50,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0},{"type":"Chat","id":"sao10k/l31-70b-euryale-v2.2","name":"l31-70b-euryale-v2.2","displayName":"L31 70B Euryale V2.2","description":"Euryale L3.1 70B v2.2 is a model focused on creative roleplay from Sao10k. It is the successor of Euryale L3 70B v2.1.","context_size":8192,"input_token_price_per_m":14800,"output_token_price_per_m":14800,"input_token_price_per_m_toString":"1.48","output_token_price_per_m_toString":"1.48","features":[],"link":"/models/llm/sao10k-l31-70b-euryale-v2.2","infos":["$$1.48/1.48 in/out MTokens","8192 Context"],"status":1,"input_pricing":{"originPricePerM":14800,"pricePerM":14800},"output_pricing":{"originPricePerM":14800,"pricePerM":14800},"max_output_tokens":8192,"rpm":20,"tmp":"$undefined","labels":[{"key":"specification","value":"50～100B"}],"tags":[],"series":"Sao10K","quota_items":[{"tier":"T1","rpm":20,"tpm":50000000},{"tier":"T2","rpm":100,"tpm":50000000},{"tier":"T3","rpm":1000,"tpm":50000000},{"tier":"T4","rpm":3000,"tpm":50000000},{"tier":"T5","rpm":6000,"tpm":50000000}],"isCompletion":true,"isDiscount":false,"isNew":false,"isHot":false,"discount":0}],"modelReadMe":[{"readme":"$3b","modelId":"meta-llama/llama-3-8b-instruct","title":"Llama 3 8B Instruct: Novita AI's Integration Guide for Advanced AI Solutions","description":"Discover Novita AI's Llama 3 8B Instruct model. Learn to generate responses, stream data, and integrate with Python, HTTP, and JavaScript via our API. Enhance your AI applications with our comprehensive guide.","createdAt":"2024-11-25T05:41:30.244Z","updatedAt":"2024-11-26T10:38:57.865Z","publishedAt":"2024-11-26T09:16:18.351Z"},{"readme":"$3c","modelId":"meta-llama/llama-3.1-8b-instruct","title":"Llama 3.1 8B Instruct: Novita AI's API Integration Guide","description":"Discover how to use Novita AI's Llama 3.1 8B Instruct model. Learn to generate responses, stream data, and integrate with Python, HTTP, and JavaScript via our API.","createdAt":"2024-11-26T09:17:02.236Z","updatedAt":"2024-11-26T10:36:40.700Z","publishedAt":"2024-11-26T09:37:21.621Z"},{"readme":"$3d","modelId":"meta-llama/llama-3-70b-instruct","title":"Llama 3 70B Instruct: Novita AI's API Integration Guide","description":"Discover Novita AI's Llama 3 70B Instruct model. Learn to generate responses, stream data, and integrate with Python, HTTP, and JavaScript via our API.","createdAt":"2024-11-26T09:24:10.033Z","updatedAt":"2024-11-26T10:37:36.875Z","publishedAt":"2024-11-26T09:24:12.755Z"},{"readme":"$3e","modelId":"meta-llama/llama-3.1-70b-instruct","title":"Llama 3.1 70B Instruct: Novita AI's API Integration Guide","description":"Discover how to use Novita AI's Llama 3.1 70B Instruct model. Learn to generate responses, stream data, and integrate with Python, HTTP, and JavaScript via our API.","createdAt":"2024-11-26T09:40:35.846Z","updatedAt":"2024-11-27T09:44:35.949Z","publishedAt":"2024-11-26T09:40:38.322Z"},{"readme":"$3f","modelId":"meta-llama/llama-3.1-405b-instruct","title":"Llama 3.1 405B Instruct: Novita AI's Integration Guide","description":"Discover how to use Novita AI's Llama 3.1 405B Instruct model. Learn to generate responses, stream data, and integrate with Python, HTTP, and JavaScript via our API.","createdAt":"2024-11-27T03:23:50.346Z","updatedAt":"2024-11-27T09:13:08.534Z","publishedAt":"2024-11-27T03:23:52.545Z"},{"readme":"$40","modelId":"gryphe/mythomax-l2-13b","title":"MythoMax L2 13B: Advanced AI Model | Novita AI","description":"Discover MythoMax L2 13B, Novita AI's robust model merging MythoLogic-L2's understanding & Huginn's writing. Access via HTTP, Python, & JavaScript","createdAt":"2024-11-27T03:26:42.128Z","updatedAt":"2024-11-27T09:45:13.982Z","publishedAt":"2024-11-27T03:26:44.386Z"},{"readme":"$41","modelId":"google/gemma-2-9b-it","title":"Gemma 2 9B: Google's Lightweight AI Model for Text Generation","description":"Discover Gemma 2 9B, Google's state-of-the-art AI model for text generation tasks like Q&A, summarization, and reasoning. Ideal for limited resource environments.","createdAt":"2024-11-27T03:34:34.107Z","updatedAt":"2024-11-27T09:40:27.185Z","publishedAt":"2024-11-27T03:34:36.570Z"},{"readme":"$42","modelId":"mistralai/mistral-nemo","title":"Mistral Nemo: Advanced LLM by Mistral AI & NVIDIA","description":"Discover Mistral Nemo, a powerful LLM fine-tuned by Mistral AI and NVIDIA. Outperforms similar models with multilingual and code data training.","createdAt":"2024-11-27T03:43:48.224Z","updatedAt":"2024-11-29T11:36:07.752Z","publishedAt":"2024-11-27T03:43:50.440Z"},{"readme":"$43","modelId":"microsoft/wizardlm-2-8x22b","title":"WizardLM-2 8x22B: Advanced AI Model by Microsoft","description":"Discover WizardLM-2 8x22B, a powerful AI model by Microsoft with 141B parameters. Explore its multilingual capabilities, competitive performance, and API integration.","createdAt":"2024-11-27T03:48:57.715Z","updatedAt":"2024-11-29T11:51:21.492Z","publishedAt":"2024-11-27T03:48:59.656Z"},{"readme":"$44","modelId":"mistralai/mistral-7b-instruct","title":"Mistral 7B Instruct: Advanced LLM by Mistral AI","description":"Discover Mistral 7B Instruct, an advanced LLM by Mistral AI with extended vocabulary and function calling support. Learn how to use it via HTTP/cURL, Python, and JavaScript.","createdAt":"2024-11-27T05:57:14.546Z","updatedAt":"2024-11-29T11:37:33.643Z","publishedAt":"2024-11-27T05:57:16.650Z"},{"readme":"$45","modelId":"microsoft/wizardlm-2-7b","title":"WizardLM-2 7B: Advanced AI Model by Microsoft AI","description":"Discover WizardLM-2 7B, a multilingual AI model with 7B parameters developed by Microsoft AI. Explore its capabilities, performance, and how to use it with HTTP/cURL, Python, and JavaScript.","createdAt":"2024-11-27T07:13:11.301Z","updatedAt":"2024-11-29T11:53:03.725Z","publishedAt":"2024-11-27T07:13:13.386Z"},{"readme":"$46","modelId":"openchat/openchat-7b","title":"OpenChat 7B Model: Advanced AI by Novita AI","description":"Discover OpenChat 7B, an innovative open-source language model achieving comparable results to ChatGPT. Fine-tuned with C-RLFT, it excels in mixed-quality data learning.","createdAt":"2024-11-27T07:27:05.576Z","updatedAt":"2024-12-10T05:59:16.861Z","publishedAt":"2024-11-27T07:27:07.274Z"},{"readme":"$47","modelId":"nousresearch/hermes-2-pro-llama-3-8b","title":"Hermes 2 Pro Llama 3 8B: Enhanced AI for Function Calling & JSON Outputs","description":"Explore Hermes 2 Pro Llama 3 8B, an advanced AI model with improved function calling, JSON outputs, and general task capabilities. Developed by Nous Research, it offers structured prompt formats for better conversations.","createdAt":"2024-11-27T07:36:53.137Z","updatedAt":"2024-11-29T11:34:35.303Z","publishedAt":"2024-11-27T07:36:54.856Z"},{"readme":"$48","modelId":"sao10k/l3-70b-euryale-v2.1","title":"L3 70B Euryale v2.1 Model: Enhanced AI for Better Prompt Adherence","description":"Discover the L3 70B Euryale v2.1 model, designed for improved prompt adherence, spatial awareness, and creativity. Ideal for unique formatting and roleplays.","createdAt":"2024-11-27T07:38:26.308Z","updatedAt":"2025-04-28T07:36:59.639Z","publishedAt":"2024-11-27T07:38:27.754Z"},{"readme":"$49","modelId":"cognitivecomputations/dolphin-mixtral-8x22b","title":"Dolphin Mixtral 8x22B Model | Novita AI","description":"Discover Dolphin Mixtral 8x22B, an advanced AI model with 64k context and 16k sequence length. Trained with SystemChat 2.0 for enhanced compliance.","createdAt":"2024-11-27T07:53:48.895Z","updatedAt":"2024-11-28T02:55:40.985Z","publishedAt":"2024-11-27T07:53:50.321Z"},{"readme":"$4a","modelId":"jondurbin/airoboros-l2-70b","title":"Airoboros L2 70B Model: Efficient AI with AWQ Quantization | Novita AI","description":"Discover the Airoboros L2 70B model with AWQ quantization for fast, efficient AI inference. Use our API in HTTP/cURL, Python, or JavaScript. Optimize GPU usage and reduce costs.","createdAt":"2024-11-27T08:25:36.945Z","updatedAt":"2024-11-27T09:24:56.268Z","publishedAt":"2024-11-27T08:25:38.478Z"},{"readme":"$4b","modelId":"lzlv_70b","title":"Lzlv 70b Model: Fast, Efficient AI with AWQ Quantization | Novita AI","description":"Discover the lzlv 70b model with AWQ quantization for blazing-fast, high-quality AI inference. Supported by Text Generation Webui, vLLM, Hugging Face TGI, and AutoAWQ.","createdAt":"2024-11-27T08:50:05.909Z","updatedAt":"2025-01-22T03:24:01.462Z","publishedAt":"2024-11-27T08:50:08.057Z"},{"readme":"$4c","modelId":"nousresearch/nous-hermes-llama2-13b","title":"Nous Hermes Llama2 13B Model: Advanced AI Solutions","description":"Discover Nous Hermes Llama2 13B, a state-of-the-art language model fine-tuned on over 300,000 instructions. Known for long responses, lower hallucination rates, and no OpenAI censorship.","createdAt":"2024-11-27T09:43:53.700Z","updatedAt":"2024-11-29T11:33:13.358Z","publishedAt":"2024-11-27T09:43:55.318Z"},{"readme":"$4d","modelId":"teknium/openhermes-2.5-mistral-7b","title":"OpenHermes 2.5 Mistral 7B | Novita AI","description":"Discover OpenHermes 2.5 Mistral 7B, a state-of-the-art AI model fine-tuned for superior communication and code instruction benchmarks. Explore its features and API integration.","createdAt":"2024-11-27T09:48:20.787Z","updatedAt":"2024-11-29T12:06:46.302Z","publishedAt":"2024-11-27T09:48:23.191Z"},{"readme":"$4e","modelId":"qwen/qwen-2.5-72b-instruct","title":"Qwen2.5-72B-Instruct: Advanced AI Model | Novita AI","description":"Discover Qwen2.5-72B-Instruct, a powerful AI model with improved coding, math, and multilingual support. Ideal for long-text generation and structured data understanding.","createdAt":"2024-11-27T10:14:55.051Z","updatedAt":"2024-11-29T12:02:13.336Z","publishedAt":"2024-11-27T10:14:57.216Z"},{"readme":"$4f","modelId":"sao10k/l31-70b-euryale-v2.2","title":"Llama-3.1-70B-Euryale-v2.2 | Novita AI","description":"Discover Llama-3.1-70B-Euryale-v2.2, finetuned for conversational and creative writing. Learn about its training, datasets, and unique features for improved AI performance","createdAt":"2024-11-27T10:16:32.544Z","updatedAt":"2024-11-29T12:05:34.831Z","publishedAt":"2024-11-27T10:16:34.251Z"},{"readme":"$50","modelId":"qwen/qwen-2-7b-instruct","title":"Qwen2-7B-Instruct: Advanced AI Model | Novita AI","description":"Discover Qwen2-7B-Instruct, a cutting-edge AI model with up to 131,072 token context length, surpassing open-source and proprietary models in language understanding, generation, and more.","createdAt":"2024-11-27T10:21:43.799Z","updatedAt":"2024-11-29T12:00:56.901Z","publishedAt":"2024-11-27T10:21:45.950Z"},{"readme":"$51","modelId":"qwen/qwen-2-72b-instruct","title":"Qwen2-72B Instruct: Advanced AI Language Model | Novita AI","description":"Discover Qwen2-72B Instruct, advanced language model with 72 billion parameters. Excels in language understanding, coding, and multilingual tasks. Explore now!","createdAt":"2024-11-27T10:25:29.307Z","updatedAt":"2024-11-29T11:59:23.993Z","publishedAt":"2024-11-28T03:04:56.490Z"},{"readme":"$52","modelId":"meta-llama/llama-3.2-1b-instruct","title":"Meta Llama 3.2 1B Instruct: Multilingual AI for Dialogue & Summarization","description":"Explore Meta Llama 3.2 1B Instruct, a powerful multilingual AI model for dialogue and summarization, excelling in industry benchmarks.","createdAt":"2024-11-28T06:14:21.744Z","updatedAt":"2024-11-29T11:56:45.092Z","publishedAt":"2024-11-28T06:14:25.559Z"},{"readme":"$53","modelId":"meta-llama/llama-3.2-11b-vision-instruct","title":"Llama 3.2 11B Vision Instruct: Advanced Image Reasoning AI","description":"Explore Llama 3.2 11B Vision Instruct by Meta, a multimodal AI excelling in visual recognition, image reasoning, and captioning. Outperforms industry benchmarks.","createdAt":"2024-11-28T06:56:47.952Z","updatedAt":"2024-11-29T11:57:57.281Z","publishedAt":"2024-11-28T06:56:50.028Z"},{"readme":"$54","modelId":"meta-llama/llama-3.2-3b-instruct","title":"Meta Llama 3.2 3B: Multilingual AI Model | Novita AI","description":"Explore Meta Llama 3.2 3B Instruct, a powerful multilingual AI model for dialogue, summarization, and more. Learn about its features and API integration.","createdAt":"2024-11-29T02:57:03.421Z","updatedAt":"2024-11-29T11:55:17.241Z","publishedAt":"2024-11-29T02:57:05.054Z"},{"readme":"$55","modelId":"qwen/qwq-32b-preview","title":"QwQ 32B Preview: Advanced AI Reasoning Model by Qwen Team","description":"Discover QwQ 32B Preview, an experimental AI model by Qwen Team, designed for enhanced reasoning. Learn about its capabilities, limitations, and how to use it via HTTP/cURL, Python, and JavaScript.","createdAt":"2024-12-09T07:46:49.301Z","updatedAt":"2024-12-09T08:48:57.376Z","publishedAt":"2024-12-09T07:46:51.683Z"},{"readme":"$56","modelId":"meta-llama/llama-3.3-70b-instruct","title":"Meta Llama 3.3 70B Instruct: Multilingual AI Model for Dialogue","description":"Discover Meta's Llama 3.3 70B Instruct, a multilingual AI model optimized for dialogue. Outperforms competitors on industry benchmarks. Learn more!","createdAt":"2024-12-09T08:01:45.486Z","updatedAt":"2024-12-09T08:47:25.300Z","publishedAt":"2024-12-09T08:01:48.670Z"},{"readme":"$57","modelId":"sao10k/l3-8b-lunaris","title":"Llama 3 8B Lunaris: Enhanced AI Model for Roleplaying","description":"Discover Llama 3 8B Lunaris, an advanced AI model for roleplaying & general use. Improved creativity & logic over Stheno v3.2. Explore settings & more!","createdAt":"2024-12-09T08:06:49.880Z","updatedAt":"2024-12-09T09:58:37.994Z","publishedAt":"2024-12-09T08:06:51.463Z"},{"readme":"$58","modelId":"deepseek/deepseek_v3","title":"DeepSeek-V3: Advanced 671B MoE Model with FP8 Training | Novita AI","description":"Open-source language model with 37B activated parameters, featuring efficient MLA architecture and FP8 training. Outperforms existing models on math and code tasks.","createdAt":"2025-01-22T03:15:45.088Z","updatedAt":"2025-01-22T11:15:11.658Z","publishedAt":"2025-01-22T03:23:24.283Z"},{"readme":"$59","modelId":"deepseek/deepseek-r1","title":"DeepSeek-R1: Advanced Reasoning LLM with RL-Based Training | Novita AI","description":"Explore DeepSeek-R1, a state-of-the-art language model trained via reinforcement learning, offering superior reasoning capabilities and open-source distilled versions.","createdAt":"2025-02-06T03:20:17.320Z","updatedAt":"2025-02-06T03:45:38.497Z","publishedAt":"2025-02-06T03:33:06.916Z"},{"readme":"$5a","modelId":"deepseek/deepseek-r1-distill-llama-70b","title":"DeepSeek R1 Distill Llama 70B Demo","description":"DeepSeek R1 Distill Llama 70B: A 70B-parameter LLM fine-tuned for enhanced reasoning, achieving SOTA performance in math, code & general tasks.","createdAt":"2025-02-06T09:11:55.591Z","updatedAt":"2025-02-06T09:28:27.767Z","publishedAt":"2025-02-06T09:16:40.498Z"},{"readme":"$5b","modelId":"deepseek/deepseek-r1-distill-qwen-32b","title":"DeepSeek R1 Distill Qwen 32B Demo","description":"DeepSeek R1 Distill Qwen 32B is a distilled model with state-of-the-art reasoning, surpassing benchmarks for dense models.","createdAt":"2025-02-06T09:26:34.793Z","updatedAt":"2025-02-06T09:30:02.563Z","publishedAt":"2025-02-06T09:30:02.560Z"},{"readme":"$5c","modelId":"deepseek/deepseek-r1-distill-qwen-14b","title":"Deepseek R1 Distill Qwen 14B","description":"DeepSeek R1 Distill Qwen 14B is a distilled large language model derived from Qwen 14B, fine-tuned using outputs from DeepSeek R1.","createdAt":"2025-02-06T09:33:59.266Z","updatedAt":"2025-02-06T09:34:01.307Z","publishedAt":"2025-02-06T09:34:01.304Z"},{"readme":"$5d","modelId":"deepseek/deepseek-r1-distill-llama-8b","title":"DeepSeek R1 Distill Llama 8B","description":"DeepSeek R1 Distill Llama 8B is a distilled large language model based on Llama 8B, using outputs from DeepSeek R1.","createdAt":"2025-02-06T09:36:12.287Z","updatedAt":"2025-04-07T03:07:21.345Z","publishedAt":"2025-04-07T03:07:21.340Z"},{"readme":"$5e","modelId":"deepseek/deepseek-r1-distill-llama-8b","title":"DeepSeek R1 Distill Llama 8B Demo","description":"DeepSeek R1 Distill Llama 8B is a powerful language model that combines Llama's architecture with DeepSeek R1's reasoning capabilities through distillation, optimized for math & coding tasks.","createdAt":"2025-02-20T04:11:08.969Z","updatedAt":"2025-02-20T05:32:54.924Z","publishedAt":"2025-02-20T05:32:54.921Z"},{"readme":"$5f","modelId":"qwen/qwq-32b","title":"QwQ-32B: A Powerful Reasoning Language Model by Qwen","description":"QwQ-32B is a powerful reasoning LLM that matches performance of models 20x larger. Excels at math, coding, and complex problem-solving with RL-enhanced capabilities.","createdAt":"2025-03-07T11:35:30.767Z","updatedAt":"2025-03-07T11:50:14.839Z","publishedAt":"2025-03-07T11:40:28.426Z"},{"readme":"$60","modelId":"deepseek/deepseek-v3-0324","title":"DeepSeek-V3-0324: An Advanced AI Model for Reasoning and Development","description":"DeepSeek-V3-0324 is a cutting-edge AI model with enhanced reasoning, coding, and Chinese language capabilities. Excels in front-end web development, function calling, and complex problem-solving with improved accuracy and performance.","createdAt":"2025-03-27T04:26:55.066Z","updatedAt":"2025-03-28T07:20:35.890Z","publishedAt":"2025-03-28T07:15:27.871Z"},{"readme":"$61","modelId":"meta-llama/llama-4-maverick-17b-128e-instruct-fp8","title":"Llama 4 Maverick 17B 128E Instruct Demo | Novita AI","description":"Explore Llama 4 Maverick 17B 128E Instruct – Meta’s cutting-edge multimodal AI model with 128 experts. Designed for advanced vision-language tasks, multilingual reasoning, and next-gen generative AI at scale. ","createdAt":"2025-04-07T02:34:20.305Z","updatedAt":"2025-04-07T03:20:57.891Z","publishedAt":"2025-04-07T03:20:57.886Z"},{"readme":"$62","modelId":"meta-llama/llama-4-scout-17b-16e-instruct","title":"Llama 4 Scout 17B 16E Instruct Demo | Novita AI","description":"Explore Llama 4 Scout 17B 16E Instruct, Meta’s efficient multimodal model built with a mixture-of-experts architecture. Supports multilingual text, images, and code. Fine-tuned for chat, visual reasoning, and more on Novita AI.","createdAt":"2025-04-07T02:50:36.127Z","updatedAt":"2025-04-07T03:30:03.759Z","publishedAt":"2025-04-07T03:30:03.753Z"},{"readme":"$63","modelId":"deepseek/deepseek-v3-turbo","title":"DeepSeek V3 Turbo Demo | Novita AI LLM Playground","description":"Explore DeepSeek V3 Turbo on Novita AI — a powerful MoE LLM with 671B parameters, high efficiency, and top benchmark performance. Try it now!","createdAt":"2025-04-28T02:47:18.189Z","updatedAt":"2025-04-28T07:42:46.087Z","publishedAt":"2025-04-28T07:42:46.082Z"},{"readme":"$64","modelId":"deepseek/deepseek-r1-turbo","title":"DeepSeek R1 Turbo Demo: Advanced Reasoning with RL Models","description":"Explore DeepSeek R1 Turbo, a cutting-edge reasoning model trained via reinforcement learning. Try the demo on Novita AI and experience top-tier AI reasoning.","createdAt":"2025-04-28T02:52:03.178Z","updatedAt":"2025-04-28T07:42:39.803Z","publishedAt":"2025-04-28T07:42:39.799Z"},{"readme":"$65","modelId":"qwen/qwen2.5-vl-72b-instruct","title":"Qwen2.5 VL 72B Instruct Demo: Advanced Vision-Language Model","description":"Discover Qwen2.5 VL 72B Instruct on Novita AI — a powerful vision-language model with enhanced video understanding, visual reasoning, and structured output.","createdAt":"2025-04-28T03:02:10.414Z","updatedAt":"2025-04-28T07:43:02.888Z","publishedAt":"2025-04-28T07:43:02.884Z"},{"readme":"$66","modelId":"google/gemma-3-27b-it","title":"Gemma 3 27B Instruct Demo: Advanced Multimodal AI Model","description":"Explore Gemma 3 27B Instruct on Novita AI — a powerful multimodal model by Google DeepMind for text, image understanding, and advanced reasoning.","createdAt":"2025-04-28T03:08:40.927Z","updatedAt":"2025-04-28T07:48:56.091Z","publishedAt":"2025-04-28T07:42:54.501Z"}],"defaultModel":"$undefined","deEndpoint":"$undefined"}]}]]}]