Documentation

Getting StartedFree

1. Get Your API Key

Find our llm-tool app on RapidAPI
Subscribe to the Free Tier (10 requests)
Copy your X-RapidAPI-Key

2. Authentication Headers

X-RapidAPI-Key: YOUR_RAPIDAPI_KEY
X-RapidAPI-Host: llm-tool.p.rapidapi.com

3. Your First Request

curl -X POST "https://llm-tool.p.rapidapi.com/v1/calculate" \
  -H "X-RapidAPI-Key: YOUR_KEY" \
  -H "X-RapidAPI-Host: llm-tool.p.rapidapi.com" \
  -H "Content-Type: application/json" \
  -d '{"hf_model": "meta-llama/Llama-3.1-8B"}'

4. Result

{
  "model_family": "llama",
  "model_architecture": "LlamaForCausalLM",
  "estimated_parameters": "8B",
  "is_mixture_of_experts": false,
  "gpu_requirements": {
    "recommended_gpu_count": 1,
    "gpu_type": "L40",
    "memory_per_gpu_gb": 48,
    "total_vram_required_gb": 15,
    "cpu_cores": 12,
    "ram_gb": 128
  },
  "vllm_parameters": {
    "core": {
      "model": "meta-llama/Llama-3.1-8B",
      "tokenizer": "meta-llama/Llama-3.1-8B",
      "reasoning_parser": null,
      "dtype": "auto",
      "load_format": "auto",
      "quantization": null
    },
    "parallel": {
      "tensor_parallel_size": 1,
      "pipeline_parallel_size": 1
    },
    "memory": {
      "gpu_memory_utilization": 0.85,
      "max_model_len": 2048,
      "max_num_seqs": 8,
      "max_num_batched_tokens": 4096,
      "block_size": 16,
      "cpu_offload_gb": 0
    },
    "performance": {
      "enforce_eager": false,
      "enable_prefix_caching": false,
      "enable_chunked_prefill": false,
      "disable_frontend_multiprocessing": false,
      "disable_custom_all_reduce": false
    },
    "features": {
      "trust_remote_code": false,
      "multimodal": false,
      "enable_lora": true,
      "max_loras": 1,
      "max_lora_rank": 16,
      "max_cpu_loras": 2,
      "allow_runtime_lora_updating": false,
      "lora_modules": null,
      "chat_template": null
    }
  },
  "deployment_configuration": {
    "container_disk_gb": 50,
    "deployment_complexity": "low"
  },
  "notes": [
    "Using empirical VRAM measurement: 14.5GB",
    "Selected L40 with 1x GPU configuration for optimal cost/performance"
  ],
  "confidence": "high",
  "error": null
}