id stringlengths 4 123 | downloads int64 0 2.48M | downloadsAllTime int64 0 143M | likes int64 0 9.74k | tags listlengths 1 7.92k | organization stringlengths 2 42 | has_audio bool 2
classes | has_speech bool 2
classes | has_music bool 2
classes | has_robot bool 2
classes | has_bio bool 2
classes | has_med bool 2
classes | has_series bool 2
classes | has_video bool 2
classes | has_image bool 2
classes | has_text bool 2
classes | has_science bool 2
classes | is_biomed bool 2
classes | data_download_timestamp timestamp[us, tz=UTC]date 2026-06-18 06:46:43 2026-06-18 06:46:43 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Glint-Research/Fable-5-traces | 3,118 | 3,118 | 273 | [
"license:agpl-3.0",
"region:us"
] | Glint-Research | false | false | false | false | false | false | false | false | false | false | false | false | 2026-06-18T06:46:43.753000 |
armand0e/claude-fable-5-claude-code | 3,307 | 3,307 | 129 | [
"task_categories:text-generation",
"size_categories:n<1K",
"format:json",
"format:agent-traces",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us",
"agent-traces",
"format:agent-traces",
"claude",
"distillation",... | armand0e | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
lazarus19/Vibe-Coding-Instruct | 634 | 634 | 89 | [
"size_categories:1M<n<10M",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | lazarus19 | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
agents-last-exam/agents-last-exam | 7,525 | 7,557 | 185 | [
"language:en",
"license:cc-by-4.0",
"size_categories:n<1K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"computer-use-agents",
"agent-benchmark",
"benchmark",
"evaluation"
] | agents-last-exam | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
angrygiraffe/claude-opus-4.6-4.7-reasoning-8.7k | 10,153 | 13,268 | 382 | [
"task_categories:text-generation",
"task_categories:question-answering",
"language:en",
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:dask",
"library:mlcroissant",
"region:us",
"sft",
"chain-of-thought",
"coding",
"math",... | angrygiraffe | false | false | false | false | false | false | false | false | false | true | true | false | 2026-06-18T06:46:43.753000 |
WithinUsAI/claude_mythos_distilled_25k | 2,068 | 2,120 | 73 | [
"language:en",
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"synthetic",
"claude",
"mythos",
"distillation",
"cybersecurity",
"coding",
"reasoning",
"a... | WithinUsAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
stanford-vision-lab/gpic | 182,053 | 186,188 | 137 | [
"language:en",
"license:mit",
"arxiv:2605.30341",
"region:us"
] | stanford-vision-lab | false | false | false | false | false | false | false | false | false | false | false | false | 2026-06-18T06:46:43.753000 |
wikimedia/structured-wikipedia | 18,249 | 41,123 | 378 | [
"language:en",
"language:fr",
"license:cc-by-sa-4.0",
"size_categories:10M<n<100M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us",
"wikipedia",
"wikimedia",
"structured-data",
"parquet",
"knowledge-base",
"... | wikimedia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
victor/fable-5-boeing-747-trace | 767 | 767 | 23 | [
"license:mit",
"size_categories:n<1K",
"format:json",
"format:agent-traces",
"modality:tabular",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"agent-traces",
"claude-code",
"threejs",
"fable-5"
] | victor | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
K-intelligence/KSAFE-MM | 144 | 144 | 23 | [
"size_categories:10K<n<100K",
"format:parquet",
"format:optimized-parquet",
"modality:image",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2605.28013",
"region:us"
] | K-intelligence | false | false | false | false | false | false | false | false | true | true | false | false | 2026-06-18T06:46:43.753000 |
liumindmind/Neko_Audio-80K_Short | 9,873 | 9,873 | 24 | [
"size_categories:10K<n<100K",
"format:json",
"modality:audio",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | liumindmind | true | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
HuggingFaceFW/fineweb-edu | 462,571 | 7,621,387 | 1,150 | [
"task_categories:text-generation",
"language:en",
"license:odc-by",
"size_categories:1B<n<10B",
"format:parquet",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2406.17557",
"arxiv:2404.14219",
"arxiv:2401.10020",
... | HuggingFaceFW | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
openbmb/Ultra-FineWeb | 80,903 | 629,424 | 390 | [
"task_categories:text-generation",
"language:en",
"language:zh",
"license:apache-2.0",
"size_categories:1B<n<10B",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2505.05427",
"arxiv:2602.09003",
"arxiv:2412.04315",
"... | openbmb | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
openbmb/Ultra-FineWeb-L3 | 79,175 | 81,625 | 296 | [
"task_categories:text-generation",
"language:en",
"language:zh",
"license:apache-2.0",
"size_categories:1B<n<10B",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2505.05427",
"arxiv:2602.09003",
"region:us",
"llm",
... | openbmb | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
redmadrobot-rnd/pii_benchmark | 315 | 315 | 15 | [
"task_categories:token-classification",
"language:ru",
"license:mit",
"size_categories:1K<n<10K",
"format:csv",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"pii",
"ner",
"named-entity-recognition",
"pii-detection",
"priva... | redmadrobot-rnd | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
trace-commons/agent-traces | 312 | 312 | 14 | [
"task_categories:text-generation",
"language:en",
"license:cc-by-4.0",
"size_categories:n<1K",
"format:parquet",
"format:optimized-parquet",
"modality:tabular",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"agent",
"agent-tr... | trace-commons | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
HuggingFaceFW/fineweb | 418,904 | 8,493,495 | 2,889 | [
"task_categories:text-generation",
"language:en",
"license:odc-by",
"size_categories:10B<n<100B",
"modality:tabular",
"modality:text",
"arxiv:2306.01116",
"arxiv:2109.07445",
"arxiv:2406.17557",
"doi:10.57967/hf/2493",
"region:us"
] | HuggingFaceFW | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
SakanaAI/AI-CUDA-Engineer-Archive | 964 | 28,575 | 186 | [
"license:cc-by-4.0",
"size_categories:10K<n<100K",
"format:parquet",
"modality:tabular",
"modality:text",
"library:datasets",
"library:pandas",
"library:mlcroissant",
"library:polars",
"region:us",
"code"
] | SakanaAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
Jackrong/Claude-opus-4.7-TraceInversion-5000x | 2,215 | 2,215 | 62 | [
"task_categories:text-generation",
"annotations_creators:machine-generated",
"language:en",
"language:zh",
"language:ko",
"language:ru",
"language:ja",
"language:es",
"license:apache-2.0",
"size_categories:1K<n<10K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"... | Jackrong | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
lordx64/agentic-distill-fable-5-sft | 128 | 128 | 13 | [
"task_categories:text-generation",
"language:en",
"license:agpl-3.0",
"size_categories:1K<n<10K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"agentic",
"chain-of-thought",
"distillation",
"claude",
"cla... | lordx64 | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
openai/gsm8k | 896,385 | 12,499,818 | 1,391 | [
"benchmark:official",
"benchmark:eval-yaml",
"task_categories:text-generation",
"annotations_creators:crowdsourced",
"language_creators:crowdsourced",
"multilinguality:monolingual",
"source_datasets:original",
"language:en",
"license:mit",
"size_categories:10K<n<100K",
"format:parquet",
"modal... | openai | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
aidigestorg/ai-village | 202 | 202 | 13 | [
"language:en",
"license:other",
"size_categories:1M<n<10M",
"region:us",
"agents",
"llm-agents",
"computer-use",
"ai-safety",
"agentic-behavior"
] | aidigestorg | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
Anthropic/hh-rlhf | 32,091 | 1,921,576 | 1,794 | [
"license:mit",
"size_categories:100K<n<1M",
"format:json",
"modality:text",
"library:datasets",
"library:dask",
"library:mlcroissant",
"library:polars",
"arxiv:2204.05862",
"region:us",
"human-feedback"
] | Anthropic | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
zlab-princeton/i1-captions | 3,859 | 3,903 | 14 | [
"task_categories:text-to-image",
"size_categories:100M<n<1B",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2606.11289",
"region:us"
] | zlab-princeton | false | false | false | false | false | false | false | false | true | true | false | false | 2026-06-18T06:46:43.753000 |
qualialabsAI/SmoothConv | 17,820 | 17,820 | 11 | [
"language:zh",
"license:cc-by-nc-4.0",
"arxiv:0000.00000",
"region:us",
"speech",
"conversational-speech",
"chinese"
] | qualialabsAI | false | true | false | false | false | false | false | false | false | false | false | false | 2026-06-18T06:46:43.753000 |
perplexity-ai/draco | 1,146 | 12,076 | 105 | [
"language:en",
"license:mit",
"size_categories:n<1K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2602.11685",
"region:us",
"deep-research"
] | perplexity-ai | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
nvidia/Open-SWE-Traces | 333 | 345 | 10 | [
"license:cc-by-4.0",
"size_categories:100K<n<1M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2606.16038",
"region:us",
"code",
"synthetic",
"tools",
"agents",
"software"
] | nvidia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
Jackrong/GLM-5.1-Reasoning-1M-Cleaned | 6,488 | 18,759 | 279 | [
"task_categories:text-generation",
"task_categories:question-answering",
"language:en",
"language:zh",
"license:apache-2.0",
"size_categories:100K<n<1M",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"reasoning",... | Jackrong | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
openbmb/UltraData-SFT-2605 | 44,940 | 44,940 | 346 | [
"task_categories:text-generation",
"task_categories:question-answering",
"language:en",
"language:zh",
"license:apache-2.0",
"size_categories:10B<n<100B",
"arxiv:2602.09003",
"region:us",
"llm",
"sft",
"supervised-fine-tuning",
"post-training",
"deep-thinking",
"reasoning",
"instruction-... | openbmb | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
nvidia/Nemotron-Pretraining-Code-v3 | 1,786 | 1,786 | 51 | [
"task_categories:text-generation",
"language:code",
"license:cc-by-4.0",
"size_categories:100M<n<1B",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"text",
"pre-training",
"human",
"legal",
"Nemotron_3_Ult... | nvidia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
HelioAI/Fable-5-Distill-Reasoning-462x | 495 | 495 | 20 | [
"task_categories:text-generation",
"annotations_creators:machine-generated",
"language:en",
"language:ru",
"license:unknown",
"size_categories:n<1K",
"region:us",
"reasoning",
"long-context",
"reasoning-traces",
"synthetic-data",
"chain-of-thought",
"process-supervision",
"mythos-v2",
"d... | HelioAI | false | false | false | false | true | true | false | false | true | true | false | true | 2026-06-18T06:46:43.753000 |
nvidia/Nemotron-Personas-Belgium | 43 | 43 | 10 | [
"task_categories:text-generation",
"language:nl",
"language:fr",
"language:de",
"language:en",
"license:cc-by-4.0",
"size_categories:1M<n<10M",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"library:datadesigner",
"regio... | nvidia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
roneneldan/TinyStories | 87,064 | 1,472,382 | 1,031 | [
"task_categories:text-generation",
"language:en",
"license:cdla-sharing-1.0",
"size_categories:1M<n<10M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2305.07759",
"region:us"
] | roneneldan | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
bones-studio/seed | 4,122 | 16,714 | 149 | [
"task_categories:robotics",
"task_categories:text-to-video",
"task_categories:video-text-to-text",
"language:en",
"license:other",
"size_categories:100K<n<1M",
"region:us",
"motion-capture",
"humanoid-robotics",
"human-motion",
"physical-ai",
"whole-body-control",
"NVIDIA-SOMA",
"Unitree-G... | bones-studio | false | false | false | true | false | false | false | true | false | true | false | false | 2026-06-18T06:46:43.753000 |
Jackrong/Claude-opus-4.6-TraceInversion-9000x | 2,681 | 2,681 | 69 | [
"task_categories:text-generation",
"annotations_creators:machine-generated",
"language:en",
"language:zh",
"language:ko",
"language:ja",
"language:ru",
"language:es",
"license:apache-2.0",
"size_categories:1K<n<10K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"... | Jackrong | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
qualialabsAI/DuplexConv | 14,507 | 14,507 | 9 | [
"language:zh",
"license:cc-by-nc-4.0",
"arxiv:0000.00000",
"region:us",
"speech",
"conversational-speech",
"chinese"
] | qualialabsAI | false | true | false | false | false | false | false | false | false | false | false | false | 2026-06-18T06:46:43.753000 |
tahoebio/EmeraldBay | 1,122 | 1,122 | 11 | [
"license:cc-by-4.0",
"size_categories:1M<n<10M",
"format:parquet",
"format:optimized-parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us",
"biology",
"single-cell",
"RNA",
"drug-sensitivity",
"perturbation",
"chemistry"
] | tahoebio | false | false | false | false | true | false | false | false | false | true | false | true | 2026-06-18T06:46:43.753000 |
AweAI-Team/Scale-SWE-Distilled-DeepSeek-v4-Pro-High-41k | 504 | 504 | 8 | [
"arxiv:2602.09892",
"region:us"
] | AweAI-Team | false | false | false | false | false | false | false | false | false | false | false | false | 2026-06-18T06:46:43.753000 |
allenai/c4 | 833,248 | 13,495,543 | 598 | [
"task_categories:text-generation",
"task_categories:fill-mask",
"task_ids:language-modeling",
"task_ids:masked-language-modeling",
"annotations_creators:no-annotation",
"language_creators:found",
"multilinguality:multilingual",
"source_datasets:original",
"language:af",
"language:am",
"language:... | allenai | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
google/fleurs | 72,599 | 1,576,273 | 415 | [
"task_categories:automatic-speech-recognition",
"annotations_creators:expert-generated",
"annotations_creators:crowdsourced",
"annotations_creators:machine-generated",
"language_creators:crowdsourced",
"language_creators:expert-generated",
"multilinguality:multilingual",
"language:afr",
"language:am... | google | true | true | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
nvidia/PhysicalAI-Autonomous-Vehicles | 175,553 | 2,460,686 | 916 | [
"license:other",
"region:us"
] | nvidia | false | false | false | false | false | false | false | false | false | false | false | false | 2026-06-18T06:46:43.753000 |
open-thoughts/OpenThoughts3-1.2M | 23,751 | 201,110 | 242 | [
"task_categories:text-generation",
"license:apache-2.0",
"size_categories:1M<n<10M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:mlcroissant",
"library:polars",
"arxiv:2506.04178",
"region:us",
"reasoning",
"mathematics",
"code",
"science"
] | open-thoughts | false | false | false | false | false | false | false | false | false | true | true | false | 2026-06-18T06:46:43.753000 |
nvidia/PhysicalAI-Autonomous-Vehicles-NuRec | 17,838 | 109,368 | 183 | [
"license:other",
"region:us"
] | nvidia | false | false | false | false | false | false | false | false | false | false | false | false | 2026-06-18T06:46:43.753000 |
openbmb/UltraData-Math | 35,416 | 200,827 | 318 | [
"task_categories:text-generation",
"language:en",
"language:zh",
"license:apache-2.0",
"size_categories:100M<n<1B",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2602.09003",
"region:us",
"llm",
"pretraining",
"ma... | openbmb | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
jasperai/monet | 180,476 | 406,464 | 133 | [
"task_categories:text-to-image",
"task_categories:image-feature-extraction",
"task_categories:zero-shot-image-classification",
"language:en",
"license:apache-2.0",
"size_categories:100M<n<1B",
"arxiv:2605.21272",
"region:us",
"multimodal",
"image-text",
"captioning",
"text-to-image",
"synthe... | jasperai | false | false | false | false | false | false | false | false | true | true | false | false | 2026-06-18T06:46:43.753000 |
WithinUsAI/GPT_5.5_Distilled | 647 | 732 | 11 | [
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | WithinUsAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
IndexTeam/CASTER-Bench | 182 | 182 | 8 | [
"task_categories:video-classification",
"task_categories:text-classification",
"language:zh",
"license:cc-by-nc-4.0",
"size_categories:1K<n<10K",
"format:json",
"modality:image",
"modality:text",
"modality:video",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",... | IndexTeam | false | false | false | false | false | false | false | true | true | true | false | false | 2026-06-18T06:46:43.753000 |
WithinUsAI/claude_opus_4.8_distill_5k | 363 | 363 | 11 | [
"license:apache-2.0",
"size_categories:1K<n<10K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | WithinUsAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
tencent/Hy-Embodied-0.5-VLA-Data | 22,267 | 22,267 | 7 | [
"task_categories:robotics",
"task_categories:reinforcement-learning",
"license:cc-by-4.0",
"size_categories:n<1K",
"format:parquet",
"modality:tabular",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"library:lerobot",
"library:lance",
"arxi... | tencent | false | false | false | true | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
meituan-longcat/LoHoSearch | 329 | 329 | 8 | [
"task_categories:question-answering",
"language:en",
"license:mit",
"size_categories:1K<n<10K",
"format:csv",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2606.12837",
"region:us",
"search-agent",
"benchmark",
"knowledge-graph",
... | meituan-longcat | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
attentionAllYouNeed/Vibe-Coding-Claude-Fable-5 | 183 | 183 | 8 | [
"size_categories:1M<n<10M",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | attentionAllYouNeed | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
MiG-NJU/OmniVideo-100K | 750 | 750 | 7 | [
"license:apache-2.0",
"size_categories:10K<n<100K",
"modality:video",
"modality:text",
"modality:image",
"arxiv:2606.14702",
"region:us",
"video",
"text",
"image"
] | MiG-NJU | false | false | false | false | false | false | false | true | true | true | false | false | 2026-06-18T06:46:43.753000 |
tatsu-lab/alpaca | 83,636 | 2,135,370 | 992 | [
"task_categories:text-generation",
"language:en",
"license:cc-by-nc-4.0",
"size_categories:10K<n<100K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"instruction-finetuning"
] | tatsu-lab | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
QuixiAI/ultrachat-uncensored | 172 | 4,116 | 64 | [
"license:mit",
"size_categories:100K<n<1M",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:mlcroissant",
"library:polars",
"region:us"
] | QuixiAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
Idavidrein/gpqa | 114,728 | 1,825,009 | 461 | [
"benchmark:official",
"benchmark:eval-yaml",
"task_categories:question-answering",
"task_categories:text-generation",
"language:en",
"license:cc-by-4.0",
"size_categories:1K<n<10K",
"format:csv",
"modality:tabular",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"... | Idavidrein | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
bigcode/the-stack-v2 | 16,554 | 309,541 | 585 | [
"task_categories:text-generation",
"language_creators:crowdsourced",
"language_creators:expert-generated",
"multilinguality:multilingual",
"language:code",
"license:other",
"size_categories:1B<n<10B",
"format:parquet",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
... | bigcode | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
NousResearch/hermes-function-calling-v1 | 30,598 | 100,141 | 423 | [
"task_categories:text-generation",
"task_categories:question-answering",
"task_categories:feature-extraction",
"language:en",
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"re... | NousResearch | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
lambda/hermes-agent-reasoning-traces | 3,032 | 13,845 | 360 | [
"task_categories:text-generation",
"language:en",
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:parquet",
"format:optimized-parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"tool-calling",
"function-calling... | lambda | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
badlogicgames/pi-mono | 2,875 | 24,583 | 155 | [
"task_categories:text-generation",
"language:en",
"language:code",
"license:other",
"region:us",
"agent-traces",
"coding-agent",
"pi-share-hf"
] | badlogicgames | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
InternScience/Scholar-kg | 54 | 54 | 6 | [
"region:us"
] | InternScience | false | false | false | false | false | false | false | false | false | false | false | false | 2026-06-18T06:46:43.753000 |
xwm/WildGUI | 14,995 | 14,995 | 6 | [
"language:en",
"license:cc-by-nc-4.0",
"size_categories:10M<n<100M",
"format:webdataset",
"modality:image",
"modality:text",
"library:datasets",
"library:webdataset",
"library:mlcroissant",
"arxiv:2605.14747",
"region:us",
"gui-agents",
"gui-grounding",
"interaction-trajectories",
"video... | xwm | false | false | false | false | false | false | false | true | true | true | false | false | 2026-06-18T06:46:43.753000 |
armand0e/minimax-m3-claude-code-traces | 927 | 927 | 11 | [
"task_categories:text-generation",
"size_categories:n<1K",
"format:json",
"format:agent-traces",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us",
"agent-traces",
"format:agent-traces",
"claude-code",
"distillat... | armand0e | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
zhiqix/PUM-MATH | 231 | 231 | 12 | [
"task_categories:text-generation",
"task_categories:text-classification",
"language:en",
"license:cc-by-4.0",
"size_categories:100K<n<1M",
"format:json",
"modality:tabular",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2606.07190",
... | zhiqix | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
dieKarotte/SO-Bench | 2,059 | 2,059 | 7 | [
"size_categories:1K<n<10K",
"format:audiofolder",
"modality:audio",
"library:datasets",
"library:mlcroissant",
"arxiv:2606.10738",
"region:us"
] | dieKarotte | true | false | false | false | false | false | false | false | false | false | false | false | 2026-06-18T06:46:43.753000 |
Jrmyrion/Fable-5-traces | 162 | 162 | 6 | [
"license:agpl-3.0",
"size_categories:1K<n<10K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | Jrmyrion | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
cfahlgren1/Fable-5-traces | 372 | 372 | 6 | [
"license:agpl-3.0",
"size_categories:n<1K",
"format:json",
"format:agent-traces",
"modality:tabular",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us"
] | cfahlgren1 | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
hotpotqa/hotpot_qa | 84,008 | 1,065,742 | 306 | [
"task_categories:question-answering",
"annotations_creators:crowdsourced",
"language_creators:found",
"multilinguality:monolingual",
"source_datasets:original",
"language:en",
"license:cc-by-sa-4.0",
"size_categories:100K<n<1M",
"format:parquet",
"modality:text",
"library:datasets",
"library:d... | hotpotqa | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
HuggingFaceH4/ultrachat_200k | 63,115 | 1,003,936 | 733 | [
"task_categories:text-generation",
"language:en",
"license:mit",
"size_categories:100K<n<1M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:mlcroissant",
"library:polars",
"arxiv:2305.14233",
"region:us"
] | HuggingFaceH4 | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
Salesforce/xlam-function-calling-60k | 32,313 | 148,222 | 638 | [
"task_categories:question-answering",
"task_categories:text-generation",
"task_categories:reinforcement-learning",
"language:en",
"license:cc-by-4.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:mlcroissant",
"library:polars",
... | Salesforce | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
HuggingFaceFW/fineweb-2 | 90,571 | 1,995,838 | 822 | [
"task_categories:text-generation",
"language:aai",
"language:aak",
"language:aau",
"language:aaz",
"language:aba",
"language:abi",
"language:abk",
"language:abn",
"language:abq",
"language:abs",
"language:abt",
"language:abx",
"language:aby",
"language:abz",
"language:aca",
"language... | HuggingFaceFW | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
cais/hle | 34,984 | 347,481 | 836 | [
"benchmark:official",
"license:mit",
"size_categories:1K<n<10K",
"format:parquet",
"modality:image",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | cais | false | false | false | false | false | false | false | false | true | true | false | false | 2026-06-18T06:46:43.753000 |
Anthropic/EconomicIndex | 38,588 | 157,115 | 544 | [
"language:en",
"license:mit",
"arxiv:2503.04761",
"region:us",
"AI",
"LLM",
"Economic Impacts",
"Anthropic"
] | Anthropic | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
whale99/Interaction2Code | 4,276 | 8,816 | 10 | [
"task_categories:image-text-to-text",
"language:en",
"size_categories:1K<n<10K",
"format:imagefolder",
"modality:image",
"library:datasets",
"library:mlcroissant",
"arxiv:2411.03292",
"region:us"
] | whale99 | false | false | false | false | false | false | false | false | true | true | false | false | 2026-06-18T06:46:43.753000 |
nvidia/OpenCodeInstruct | 8,443 | 49,533 | 97 | [
"task_categories:text-generation",
"language:en",
"license:cc-by-4.0",
"size_categories:1M<n<10M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2504.04030",
"region:us",
"code",
"synthetic"
] | nvidia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
ScaleAI/SWE-bench_Pro | 74,071 | 1,087,655 | 128 | [
"benchmark:official",
"benchmark:eval-yaml",
"size_categories:n<1K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | ScaleAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
openai/gdpval | 89,750 | 354,752 | 511 | [
"size_categories:n<1K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | openai | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
TeichAI/claude-4.5-opus-high-reasoning-250x | 961 | 24,654 | 397 | [
"size_categories:n<1K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:mlcroissant",
"library:polars",
"region:us"
] | TeichAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
qixuewei/nuReasoning | 14,073 | 16,153 | 19 | [
"task_categories:visual-question-answering",
"task_categories:text-generation",
"task_categories:robotics",
"license:other",
"size_categories:10K<n<100K",
"region:us"
] | qixuewei | false | false | false | true | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
ansulev/GPT-5.5-Thinking-Max-Distill-25k | 216 | 268 | 6 | [
"language:en",
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"gpt-5-5",
"thinking-max-distill",
"god-level-recursive-seed-ai",
"o1-style-reasoning",
"test-tim... | ansulev | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
SWE-Explore-Bench/SWE-Explore-Bench | 270 | 337 | 11 | [
"license:cc-by-nc-nd-4.0",
"size_categories:n<1K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2606.07297",
"region:us"
] | SWE-Explore-Bench | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
WithinUsAI/claude_Opus_4.7_Distilled | 468 | 566 | 20 | [
"license:apache-2.0",
"size_categories:10K<n<100K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | WithinUsAI | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
nvidia/Nemotron-SFT-ARC-AGI-v1 | 636 | 636 | 12 | [
"task_categories:text-generation",
"language:en",
"license:other",
"size_categories:100K<n<1M",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"Nemotron_3_Ultra",
"code",
"reasoning",
"synthetic",
"text",
"s... | nvidia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
kyutai/interactivity-alignment-samples | 2,020 | 2,020 | 7 | [
"language:en",
"license:cc-by-4.0",
"size_categories:1K<n<10K",
"format:audiofolder",
"modality:audio",
"library:datasets",
"library:mlcroissant",
"arxiv:2606.11167",
"region:us"
] | kyutai | true | false | false | false | false | false | false | false | false | false | false | false | 2026-06-18T06:46:43.753000 |
nvidia/Nemotron-Personas-El-Salvador | 4,501 | 4,501 | 53 | [
"task_categories:text-generation",
"language:es",
"license:cc-by-4.0",
"size_categories:100K<n<1M",
"format:parquet",
"format:optimized-parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"library:datadesigner",
"region:us",
"synthetic",
... | nvidia | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
inclusionAI/FinixDocBench | 2,940 | 2,940 | 7 | [
"task_categories:image-to-text",
"task_categories:object-detection",
"language:zh",
"language:en",
"license:cc-by-nc-sa-4.0",
"size_categories:n<1K",
"format:imagefolder",
"modality:image",
"modality:text",
"library:datasets",
"library:mlcroissant",
"region:us",
"document-parsing",
"ocr",
... | inclusionAI | false | false | false | false | false | false | false | false | true | true | false | false | 2026-06-18T06:46:43.753000 |
GenAI4ELab/papercli-papers | 10,332 | 10,332 | 7 | [
"license:cc-by-4.0",
"size_categories:100K<n<1M",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us"
] | GenAI4ELab | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
Voxel51/kitscenes-multimodal | 6,180 | 6,180 | 9 | [
"task_categories:object-detection",
"language:en",
"license:cc-by-nc-4.0",
"size_categories:10K<n<100K",
"format:imagefolder",
"modality:image",
"library:datasets",
"library:mlcroissant",
"library:fiftyone",
"arxiv:2606.02956",
"region:us",
"autonomous-driving",
"fiftyone",
"group",
"hd-... | Voxel51 | false | false | false | false | false | false | false | false | true | false | false | false | 2026-06-18T06:46:43.753000 |
carpedkm/CustoMDiT | 223 | 223 | 16 | [
"task_categories:text-to-video",
"license:cc-by-4.0",
"size_categories:1M<n<10M",
"format:csv",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2606.11783",
"region:us",
"video-customization",
"identity-preserving",
"open-domain",
"d... | carpedkm | false | false | false | false | false | false | false | true | false | true | false | false | 2026-06-18T06:46:43.753000 |
dynamic-maps/hard-intersection-multimodal-sample | 1,385 | 1,385 | 5 | [
"task_categories:image-to-3d",
"task_categories:image-classification",
"task_categories:image-segmentation",
"task_categories:depth-estimation",
"task_categories:object-detection",
"task_categories:other",
"annotations_creators:expert-generated",
"annotations_creators:human-annotated",
"language:en"... | dynamic-maps | false | false | false | false | false | false | false | false | true | false | false | false | 2026-06-18T06:46:43.753000 |
openbmb/MA-ProofBench | 192 | 192 | 6 | [
"task_categories:text-generation",
"language:en",
"license:mit",
"size_categories:n<1K",
"format:json",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2606.13782",
"region:us",
"mathematics",
"mathematical-analysis",
"theorem-provin... | openbmb | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
TokenRhythm/Claw-SWE-Bench | 205 | 205 | 5 | [
"task_categories:text-generation",
"multilinguality:monolingual",
"language:en",
"license:mit",
"size_categories:n<1K",
"format:parquet",
"format:optimized-parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2606.12344",
"arxiv:2... | TokenRhythm | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
victor/claude-fable-worldcup-2026-session | 228 | 228 | 5 | [
"language:en",
"license:cc-by-4.0",
"size_categories:n<1K",
"format:json",
"format:agent-traces",
"modality:tabular",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"claude-code",
"agent-transcript",
"session-log",
"football... | victor | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
england-lobster/zhang-xuefeng-data | 123 | 123 | 5 | [
"task_categories:text-generation",
"language:zh",
"license:other",
"size_categories:1K<n<10K",
"format:parquet",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"region:us",
"conversational",
"chinese",
"style-transfer",
"persona",
"sft",... | england-lobster | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
King3Djbl/fable5-dataset | 102 | 102 | 5 | [
"task_categories:text-generation",
"language:en",
"language:code",
"license:mit",
"size_categories:1K<n<10K",
"region:us"
] | King3Djbl | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
build-small-hackathon/CVE_Vulnerailities_Detailed | 70 | 70 | 5 | [
"size_categories:10K<n<100K",
"format:parquet",
"format:optimized-parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"region:us"
] | build-small-hackathon | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
cais/mmlu | 480,483 | 41,907,685 | 769 | [
"task_categories:question-answering",
"task_ids:multiple-choice-qa",
"annotations_creators:no-annotation",
"language_creators:expert-generated",
"multilinguality:monolingual",
"source_datasets:original",
"language:en",
"license:mit",
"size_categories:100K<n<1M",
"format:parquet",
"modality:text"... | cais | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
bigcode/starcoderdata | 25,781 | 354,008 | 524 | [
"task_categories:text-generation",
"language_creators:crowdsourced",
"language_creators:expert-generated",
"multilinguality:multilingual",
"language:code",
"license:other",
"size_categories:100M<n<1B",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:mlcroissant"... | bigcode | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
gaia-benchmark/GAIA | 42,218 | 311,598 | 694 | [
"language:en",
"size_categories:n<1K",
"format:parquet",
"modality:audio",
"modality:document",
"modality:image",
"modality:text",
"library:datasets",
"library:pandas",
"library:polars",
"library:mlcroissant",
"arxiv:2311.12983",
"region:us"
] | gaia-benchmark | true | false | false | false | false | false | false | false | true | true | false | false | 2026-06-18T06:46:43.753000 |
irlspbru/RusLawOD | 871 | 5,500 | 17 | [
"language:ru",
"license:cc-by-nc-4.0",
"size_categories:100K<n<1M",
"format:parquet",
"modality:text",
"library:datasets",
"library:dask",
"library:polars",
"library:mlcroissant",
"arxiv:2406.04855",
"region:us",
"legal",
"corpus"
] | irlspbru | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
nonchev/TCGA_virtual_spatial_transcriptomics | 14,052 | 17,576 | 15 | [
"language:en",
"license:cc-by-nc-nd-4.0",
"size_categories:1K<n<10K",
"format:csv",
"modality:text",
"library:datasets",
"library:pandas",
"library:mlcroissant",
"library:polars",
"region:us",
"spatial-transcriptomics",
"histology",
"pathology",
"transcriptomics",
"machine-learning"
] | nonchev | false | false | false | false | false | false | false | false | false | true | false | false | 2026-06-18T06:46:43.753000 |
End of preview. Expand in Data Studio
No dataset card yet
- Downloads last month
- 450