Dataset Viewer
Auto-converted to Parquet Duplicate
id
stringlengths
4
123
downloads
int64
0
2.48M
downloadsAllTime
int64
0
143M
likes
int64
0
9.74k
tags
listlengths
1
7.92k
organization
stringlengths
2
42
has_audio
bool
2 classes
has_speech
bool
2 classes
has_music
bool
2 classes
has_robot
bool
2 classes
has_bio
bool
2 classes
has_med
bool
2 classes
has_series
bool
2 classes
has_video
bool
2 classes
has_image
bool
2 classes
has_text
bool
2 classes
has_science
bool
2 classes
is_biomed
bool
2 classes
data_download_timestamp
timestamp[us, tz=UTC]date
2026-06-18 06:46:43
2026-06-18 06:46:43
Glint-Research/Fable-5-traces
3,118
3,118
273
[ "license:agpl-3.0", "region:us" ]
Glint-Research
false
false
false
false
false
false
false
false
false
false
false
false
2026-06-18T06:46:43.753000
armand0e/claude-fable-5-claude-code
3,307
3,307
129
[ "task_categories:text-generation", "size_categories:n<1K", "format:json", "format:agent-traces", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "agent-traces", "format:agent-traces", "claude", "distillation",...
armand0e
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
lazarus19/Vibe-Coding-Instruct
634
634
89
[ "size_categories:1M<n<10M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
lazarus19
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
agents-last-exam/agents-last-exam
7,525
7,557
185
[ "language:en", "license:cc-by-4.0", "size_categories:n<1K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "computer-use-agents", "agent-benchmark", "benchmark", "evaluation" ]
agents-last-exam
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
angrygiraffe/claude-opus-4.6-4.7-reasoning-8.7k
10,153
13,268
382
[ "task_categories:text-generation", "task_categories:question-answering", "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:dask", "library:mlcroissant", "region:us", "sft", "chain-of-thought", "coding", "math",...
angrygiraffe
false
false
false
false
false
false
false
false
false
true
true
false
2026-06-18T06:46:43.753000
WithinUsAI/claude_mythos_distilled_25k
2,068
2,120
73
[ "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "synthetic", "claude", "mythos", "distillation", "cybersecurity", "coding", "reasoning", "a...
WithinUsAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
stanford-vision-lab/gpic
182,053
186,188
137
[ "language:en", "license:mit", "arxiv:2605.30341", "region:us" ]
stanford-vision-lab
false
false
false
false
false
false
false
false
false
false
false
false
2026-06-18T06:46:43.753000
wikimedia/structured-wikipedia
18,249
41,123
378
[ "language:en", "language:fr", "license:cc-by-sa-4.0", "size_categories:10M<n<100M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "wikipedia", "wikimedia", "structured-data", "parquet", "knowledge-base", "...
wikimedia
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
victor/fable-5-boeing-747-trace
767
767
23
[ "license:mit", "size_categories:n<1K", "format:json", "format:agent-traces", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "agent-traces", "claude-code", "threejs", "fable-5" ]
victor
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
K-intelligence/KSAFE-MM
144
144
23
[ "size_categories:10K<n<100K", "format:parquet", "format:optimized-parquet", "modality:image", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2605.28013", "region:us" ]
K-intelligence
false
false
false
false
false
false
false
false
true
true
false
false
2026-06-18T06:46:43.753000
liumindmind/Neko_Audio-80K_Short
9,873
9,873
24
[ "size_categories:10K<n<100K", "format:json", "modality:audio", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
liumindmind
true
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
HuggingFaceFW/fineweb-edu
462,571
7,621,387
1,150
[ "task_categories:text-generation", "language:en", "license:odc-by", "size_categories:1B<n<10B", "format:parquet", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2406.17557", "arxiv:2404.14219", "arxiv:2401.10020", ...
HuggingFaceFW
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
openbmb/Ultra-FineWeb
80,903
629,424
390
[ "task_categories:text-generation", "language:en", "language:zh", "license:apache-2.0", "size_categories:1B<n<10B", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2505.05427", "arxiv:2602.09003", "arxiv:2412.04315", "...
openbmb
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
openbmb/Ultra-FineWeb-L3
79,175
81,625
296
[ "task_categories:text-generation", "language:en", "language:zh", "license:apache-2.0", "size_categories:1B<n<10B", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2505.05427", "arxiv:2602.09003", "region:us", "llm", ...
openbmb
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
redmadrobot-rnd/pii_benchmark
315
315
15
[ "task_categories:token-classification", "language:ru", "license:mit", "size_categories:1K<n<10K", "format:csv", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "pii", "ner", "named-entity-recognition", "pii-detection", "priva...
redmadrobot-rnd
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
trace-commons/agent-traces
312
312
14
[ "task_categories:text-generation", "language:en", "license:cc-by-4.0", "size_categories:n<1K", "format:parquet", "format:optimized-parquet", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "agent", "agent-tr...
trace-commons
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
HuggingFaceFW/fineweb
418,904
8,493,495
2,889
[ "task_categories:text-generation", "language:en", "license:odc-by", "size_categories:10B<n<100B", "modality:tabular", "modality:text", "arxiv:2306.01116", "arxiv:2109.07445", "arxiv:2406.17557", "doi:10.57967/hf/2493", "region:us" ]
HuggingFaceFW
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
SakanaAI/AI-CUDA-Engineer-Archive
964
28,575
186
[ "license:cc-by-4.0", "size_categories:10K<n<100K", "format:parquet", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "region:us", "code" ]
SakanaAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
Jackrong/Claude-opus-4.7-TraceInversion-5000x
2,215
2,215
62
[ "task_categories:text-generation", "annotations_creators:machine-generated", "language:en", "language:zh", "language:ko", "language:ru", "language:ja", "language:es", "license:apache-2.0", "size_categories:1K<n<10K", "format:json", "modality:text", "library:datasets", "library:pandas", "...
Jackrong
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
lordx64/agentic-distill-fable-5-sft
128
128
13
[ "task_categories:text-generation", "language:en", "license:agpl-3.0", "size_categories:1K<n<10K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "agentic", "chain-of-thought", "distillation", "claude", "cla...
lordx64
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
openai/gsm8k
896,385
12,499,818
1,391
[ "benchmark:official", "benchmark:eval-yaml", "task_categories:text-generation", "annotations_creators:crowdsourced", "language_creators:crowdsourced", "multilinguality:monolingual", "source_datasets:original", "language:en", "license:mit", "size_categories:10K<n<100K", "format:parquet", "modal...
openai
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
aidigestorg/ai-village
202
202
13
[ "language:en", "license:other", "size_categories:1M<n<10M", "region:us", "agents", "llm-agents", "computer-use", "ai-safety", "agentic-behavior" ]
aidigestorg
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
Anthropic/hh-rlhf
32,091
1,921,576
1,794
[ "license:mit", "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:dask", "library:mlcroissant", "library:polars", "arxiv:2204.05862", "region:us", "human-feedback" ]
Anthropic
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
zlab-princeton/i1-captions
3,859
3,903
14
[ "task_categories:text-to-image", "size_categories:100M<n<1B", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2606.11289", "region:us" ]
zlab-princeton
false
false
false
false
false
false
false
false
true
true
false
false
2026-06-18T06:46:43.753000
qualialabsAI/SmoothConv
17,820
17,820
11
[ "language:zh", "license:cc-by-nc-4.0", "arxiv:0000.00000", "region:us", "speech", "conversational-speech", "chinese" ]
qualialabsAI
false
true
false
false
false
false
false
false
false
false
false
false
2026-06-18T06:46:43.753000
perplexity-ai/draco
1,146
12,076
105
[ "language:en", "license:mit", "size_categories:n<1K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2602.11685", "region:us", "deep-research" ]
perplexity-ai
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
nvidia/Open-SWE-Traces
333
345
10
[ "license:cc-by-4.0", "size_categories:100K<n<1M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2606.16038", "region:us", "code", "synthetic", "tools", "agents", "software" ]
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
Jackrong/GLM-5.1-Reasoning-1M-Cleaned
6,488
18,759
279
[ "task_categories:text-generation", "task_categories:question-answering", "language:en", "language:zh", "license:apache-2.0", "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "reasoning",...
Jackrong
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
openbmb/UltraData-SFT-2605
44,940
44,940
346
[ "task_categories:text-generation", "task_categories:question-answering", "language:en", "language:zh", "license:apache-2.0", "size_categories:10B<n<100B", "arxiv:2602.09003", "region:us", "llm", "sft", "supervised-fine-tuning", "post-training", "deep-thinking", "reasoning", "instruction-...
openbmb
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
nvidia/Nemotron-Pretraining-Code-v3
1,786
1,786
51
[ "task_categories:text-generation", "language:code", "license:cc-by-4.0", "size_categories:100M<n<1B", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "text", "pre-training", "human", "legal", "Nemotron_3_Ult...
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
HelioAI/Fable-5-Distill-Reasoning-462x
495
495
20
[ "task_categories:text-generation", "annotations_creators:machine-generated", "language:en", "language:ru", "license:unknown", "size_categories:n<1K", "region:us", "reasoning", "long-context", "reasoning-traces", "synthetic-data", "chain-of-thought", "process-supervision", "mythos-v2", "d...
HelioAI
false
false
false
false
true
true
false
false
true
true
false
true
2026-06-18T06:46:43.753000
nvidia/Nemotron-Personas-Belgium
43
43
10
[ "task_categories:text-generation", "language:nl", "language:fr", "language:de", "language:en", "license:cc-by-4.0", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "library:datadesigner", "regio...
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
roneneldan/TinyStories
87,064
1,472,382
1,031
[ "task_categories:text-generation", "language:en", "license:cdla-sharing-1.0", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2305.07759", "region:us" ]
roneneldan
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
bones-studio/seed
4,122
16,714
149
[ "task_categories:robotics", "task_categories:text-to-video", "task_categories:video-text-to-text", "language:en", "license:other", "size_categories:100K<n<1M", "region:us", "motion-capture", "humanoid-robotics", "human-motion", "physical-ai", "whole-body-control", "NVIDIA-SOMA", "Unitree-G...
bones-studio
false
false
false
true
false
false
false
true
false
true
false
false
2026-06-18T06:46:43.753000
Jackrong/Claude-opus-4.6-TraceInversion-9000x
2,681
2,681
69
[ "task_categories:text-generation", "annotations_creators:machine-generated", "language:en", "language:zh", "language:ko", "language:ja", "language:ru", "language:es", "license:apache-2.0", "size_categories:1K<n<10K", "format:json", "modality:text", "library:datasets", "library:pandas", "...
Jackrong
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
qualialabsAI/DuplexConv
14,507
14,507
9
[ "language:zh", "license:cc-by-nc-4.0", "arxiv:0000.00000", "region:us", "speech", "conversational-speech", "chinese" ]
qualialabsAI
false
true
false
false
false
false
false
false
false
false
false
false
2026-06-18T06:46:43.753000
tahoebio/EmeraldBay
1,122
1,122
11
[ "license:cc-by-4.0", "size_categories:1M<n<10M", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "biology", "single-cell", "RNA", "drug-sensitivity", "perturbation", "chemistry" ]
tahoebio
false
false
false
false
true
false
false
false
false
true
false
true
2026-06-18T06:46:43.753000
AweAI-Team/Scale-SWE-Distilled-DeepSeek-v4-Pro-High-41k
504
504
8
[ "arxiv:2602.09892", "region:us" ]
AweAI-Team
false
false
false
false
false
false
false
false
false
false
false
false
2026-06-18T06:46:43.753000
allenai/c4
833,248
13,495,543
598
[ "task_categories:text-generation", "task_categories:fill-mask", "task_ids:language-modeling", "task_ids:masked-language-modeling", "annotations_creators:no-annotation", "language_creators:found", "multilinguality:multilingual", "source_datasets:original", "language:af", "language:am", "language:...
allenai
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
google/fleurs
72,599
1,576,273
415
[ "task_categories:automatic-speech-recognition", "annotations_creators:expert-generated", "annotations_creators:crowdsourced", "annotations_creators:machine-generated", "language_creators:crowdsourced", "language_creators:expert-generated", "multilinguality:multilingual", "language:afr", "language:am...
google
true
true
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
nvidia/PhysicalAI-Autonomous-Vehicles
175,553
2,460,686
916
[ "license:other", "region:us" ]
nvidia
false
false
false
false
false
false
false
false
false
false
false
false
2026-06-18T06:46:43.753000
open-thoughts/OpenThoughts3-1.2M
23,751
201,110
242
[ "task_categories:text-generation", "license:apache-2.0", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:mlcroissant", "library:polars", "arxiv:2506.04178", "region:us", "reasoning", "mathematics", "code", "science" ]
open-thoughts
false
false
false
false
false
false
false
false
false
true
true
false
2026-06-18T06:46:43.753000
nvidia/PhysicalAI-Autonomous-Vehicles-NuRec
17,838
109,368
183
[ "license:other", "region:us" ]
nvidia
false
false
false
false
false
false
false
false
false
false
false
false
2026-06-18T06:46:43.753000
openbmb/UltraData-Math
35,416
200,827
318
[ "task_categories:text-generation", "language:en", "language:zh", "license:apache-2.0", "size_categories:100M<n<1B", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2602.09003", "region:us", "llm", "pretraining", "ma...
openbmb
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
jasperai/monet
180,476
406,464
133
[ "task_categories:text-to-image", "task_categories:image-feature-extraction", "task_categories:zero-shot-image-classification", "language:en", "license:apache-2.0", "size_categories:100M<n<1B", "arxiv:2605.21272", "region:us", "multimodal", "image-text", "captioning", "text-to-image", "synthe...
jasperai
false
false
false
false
false
false
false
false
true
true
false
false
2026-06-18T06:46:43.753000
WithinUsAI/GPT_5.5_Distilled
647
732
11
[ "license:apache-2.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
WithinUsAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
IndexTeam/CASTER-Bench
182
182
8
[ "task_categories:video-classification", "task_categories:text-classification", "language:zh", "license:cc-by-nc-4.0", "size_categories:1K<n<10K", "format:json", "modality:image", "modality:text", "modality:video", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant",...
IndexTeam
false
false
false
false
false
false
false
true
true
true
false
false
2026-06-18T06:46:43.753000
WithinUsAI/claude_opus_4.8_distill_5k
363
363
11
[ "license:apache-2.0", "size_categories:1K<n<10K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
WithinUsAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
tencent/Hy-Embodied-0.5-VLA-Data
22,267
22,267
7
[ "task_categories:robotics", "task_categories:reinforcement-learning", "license:cc-by-4.0", "size_categories:n<1K", "format:parquet", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "library:lerobot", "library:lance", "arxi...
tencent
false
false
false
true
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
meituan-longcat/LoHoSearch
329
329
8
[ "task_categories:question-answering", "language:en", "license:mit", "size_categories:1K<n<10K", "format:csv", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2606.12837", "region:us", "search-agent", "benchmark", "knowledge-graph", ...
meituan-longcat
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
attentionAllYouNeed/Vibe-Coding-Claude-Fable-5
183
183
8
[ "size_categories:1M<n<10M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
attentionAllYouNeed
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
MiG-NJU/OmniVideo-100K
750
750
7
[ "license:apache-2.0", "size_categories:10K<n<100K", "modality:video", "modality:text", "modality:image", "arxiv:2606.14702", "region:us", "video", "text", "image" ]
MiG-NJU
false
false
false
false
false
false
false
true
true
true
false
false
2026-06-18T06:46:43.753000
tatsu-lab/alpaca
83,636
2,135,370
992
[ "task_categories:text-generation", "language:en", "license:cc-by-nc-4.0", "size_categories:10K<n<100K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "instruction-finetuning" ]
tatsu-lab
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
QuixiAI/ultrachat-uncensored
172
4,116
64
[ "license:mit", "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "region:us" ]
QuixiAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
Idavidrein/gpqa
114,728
1,825,009
461
[ "benchmark:official", "benchmark:eval-yaml", "task_categories:question-answering", "task_categories:text-generation", "language:en", "license:cc-by-4.0", "size_categories:1K<n<10K", "format:csv", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "...
Idavidrein
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
bigcode/the-stack-v2
16,554
309,541
585
[ "task_categories:text-generation", "language_creators:crowdsourced", "language_creators:expert-generated", "multilinguality:multilingual", "language:code", "license:other", "size_categories:1B<n<10B", "format:parquet", "modality:tabular", "modality:text", "library:datasets", "library:dask", ...
bigcode
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
NousResearch/hermes-function-calling-v1
30,598
100,141
423
[ "task_categories:text-generation", "task_categories:question-answering", "task_categories:feature-extraction", "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "re...
NousResearch
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
lambda/hermes-agent-reasoning-traces
3,032
13,845
360
[ "task_categories:text-generation", "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "tool-calling", "function-calling...
lambda
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
badlogicgames/pi-mono
2,875
24,583
155
[ "task_categories:text-generation", "language:en", "language:code", "license:other", "region:us", "agent-traces", "coding-agent", "pi-share-hf" ]
badlogicgames
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
InternScience/Scholar-kg
54
54
6
[ "region:us" ]
InternScience
false
false
false
false
false
false
false
false
false
false
false
false
2026-06-18T06:46:43.753000
xwm/WildGUI
14,995
14,995
6
[ "language:en", "license:cc-by-nc-4.0", "size_categories:10M<n<100M", "format:webdataset", "modality:image", "modality:text", "library:datasets", "library:webdataset", "library:mlcroissant", "arxiv:2605.14747", "region:us", "gui-agents", "gui-grounding", "interaction-trajectories", "video...
xwm
false
false
false
false
false
false
false
true
true
true
false
false
2026-06-18T06:46:43.753000
armand0e/minimax-m3-claude-code-traces
927
927
11
[ "task_categories:text-generation", "size_categories:n<1K", "format:json", "format:agent-traces", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us", "agent-traces", "format:agent-traces", "claude-code", "distillat...
armand0e
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
zhiqix/PUM-MATH
231
231
12
[ "task_categories:text-generation", "task_categories:text-classification", "language:en", "license:cc-by-4.0", "size_categories:100K<n<1M", "format:json", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2606.07190", ...
zhiqix
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
dieKarotte/SO-Bench
2,059
2,059
7
[ "size_categories:1K<n<10K", "format:audiofolder", "modality:audio", "library:datasets", "library:mlcroissant", "arxiv:2606.10738", "region:us" ]
dieKarotte
true
false
false
false
false
false
false
false
false
false
false
false
2026-06-18T06:46:43.753000
Jrmyrion/Fable-5-traces
162
162
6
[ "license:agpl-3.0", "size_categories:1K<n<10K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
Jrmyrion
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
cfahlgren1/Fable-5-traces
372
372
6
[ "license:agpl-3.0", "size_categories:n<1K", "format:json", "format:agent-traces", "modality:tabular", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us" ]
cfahlgren1
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
hotpotqa/hotpot_qa
84,008
1,065,742
306
[ "task_categories:question-answering", "annotations_creators:crowdsourced", "language_creators:found", "multilinguality:monolingual", "source_datasets:original", "language:en", "license:cc-by-sa-4.0", "size_categories:100K<n<1M", "format:parquet", "modality:text", "library:datasets", "library:d...
hotpotqa
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
HuggingFaceH4/ultrachat_200k
63,115
1,003,936
733
[ "task_categories:text-generation", "language:en", "license:mit", "size_categories:100K<n<1M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:mlcroissant", "library:polars", "arxiv:2305.14233", "region:us" ]
HuggingFaceH4
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
Salesforce/xlam-function-calling-60k
32,313
148,222
638
[ "task_categories:question-answering", "task_categories:text-generation", "task_categories:reinforcement-learning", "language:en", "license:cc-by-4.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", ...
Salesforce
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
HuggingFaceFW/fineweb-2
90,571
1,995,838
822
[ "task_categories:text-generation", "language:aai", "language:aak", "language:aau", "language:aaz", "language:aba", "language:abi", "language:abk", "language:abn", "language:abq", "language:abs", "language:abt", "language:abx", "language:aby", "language:abz", "language:aca", "language...
HuggingFaceFW
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
cais/hle
34,984
347,481
836
[ "benchmark:official", "license:mit", "size_categories:1K<n<10K", "format:parquet", "modality:image", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
cais
false
false
false
false
false
false
false
false
true
true
false
false
2026-06-18T06:46:43.753000
Anthropic/EconomicIndex
38,588
157,115
544
[ "language:en", "license:mit", "arxiv:2503.04761", "region:us", "AI", "LLM", "Economic Impacts", "Anthropic" ]
Anthropic
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
whale99/Interaction2Code
4,276
8,816
10
[ "task_categories:image-text-to-text", "language:en", "size_categories:1K<n<10K", "format:imagefolder", "modality:image", "library:datasets", "library:mlcroissant", "arxiv:2411.03292", "region:us" ]
whale99
false
false
false
false
false
false
false
false
true
true
false
false
2026-06-18T06:46:43.753000
nvidia/OpenCodeInstruct
8,443
49,533
97
[ "task_categories:text-generation", "language:en", "license:cc-by-4.0", "size_categories:1M<n<10M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2504.04030", "region:us", "code", "synthetic" ]
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
ScaleAI/SWE-bench_Pro
74,071
1,087,655
128
[ "benchmark:official", "benchmark:eval-yaml", "size_categories:n<1K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
ScaleAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
openai/gdpval
89,750
354,752
511
[ "size_categories:n<1K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
openai
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
TeichAI/claude-4.5-opus-high-reasoning-250x
961
24,654
397
[ "size_categories:n<1K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "region:us" ]
TeichAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
qixuewei/nuReasoning
14,073
16,153
19
[ "task_categories:visual-question-answering", "task_categories:text-generation", "task_categories:robotics", "license:other", "size_categories:10K<n<100K", "region:us" ]
qixuewei
false
false
false
true
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
ansulev/GPT-5.5-Thinking-Max-Distill-25k
216
268
6
[ "language:en", "license:apache-2.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "gpt-5-5", "thinking-max-distill", "god-level-recursive-seed-ai", "o1-style-reasoning", "test-tim...
ansulev
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
SWE-Explore-Bench/SWE-Explore-Bench
270
337
11
[ "license:cc-by-nc-nd-4.0", "size_categories:n<1K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2606.07297", "region:us" ]
SWE-Explore-Bench
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
WithinUsAI/claude_Opus_4.7_Distilled
468
566
20
[ "license:apache-2.0", "size_categories:10K<n<100K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
WithinUsAI
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
nvidia/Nemotron-SFT-ARC-AGI-v1
636
636
12
[ "task_categories:text-generation", "language:en", "license:other", "size_categories:100K<n<1M", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "Nemotron_3_Ultra", "code", "reasoning", "synthetic", "text", "s...
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
kyutai/interactivity-alignment-samples
2,020
2,020
7
[ "language:en", "license:cc-by-4.0", "size_categories:1K<n<10K", "format:audiofolder", "modality:audio", "library:datasets", "library:mlcroissant", "arxiv:2606.11167", "region:us" ]
kyutai
true
false
false
false
false
false
false
false
false
false
false
false
2026-06-18T06:46:43.753000
nvidia/Nemotron-Personas-El-Salvador
4,501
4,501
53
[ "task_categories:text-generation", "language:es", "license:cc-by-4.0", "size_categories:100K<n<1M", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "library:datadesigner", "region:us", "synthetic", ...
nvidia
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
inclusionAI/FinixDocBench
2,940
2,940
7
[ "task_categories:image-to-text", "task_categories:object-detection", "language:zh", "language:en", "license:cc-by-nc-sa-4.0", "size_categories:n<1K", "format:imagefolder", "modality:image", "modality:text", "library:datasets", "library:mlcroissant", "region:us", "document-parsing", "ocr", ...
inclusionAI
false
false
false
false
false
false
false
false
true
true
false
false
2026-06-18T06:46:43.753000
GenAI4ELab/papercli-papers
10,332
10,332
7
[ "license:cc-by-4.0", "size_categories:100K<n<1M", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us" ]
GenAI4ELab
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
Voxel51/kitscenes-multimodal
6,180
6,180
9
[ "task_categories:object-detection", "language:en", "license:cc-by-nc-4.0", "size_categories:10K<n<100K", "format:imagefolder", "modality:image", "library:datasets", "library:mlcroissant", "library:fiftyone", "arxiv:2606.02956", "region:us", "autonomous-driving", "fiftyone", "group", "hd-...
Voxel51
false
false
false
false
false
false
false
false
true
false
false
false
2026-06-18T06:46:43.753000
carpedkm/CustoMDiT
223
223
16
[ "task_categories:text-to-video", "license:cc-by-4.0", "size_categories:1M<n<10M", "format:csv", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2606.11783", "region:us", "video-customization", "identity-preserving", "open-domain", "d...
carpedkm
false
false
false
false
false
false
false
true
false
true
false
false
2026-06-18T06:46:43.753000
dynamic-maps/hard-intersection-multimodal-sample
1,385
1,385
5
[ "task_categories:image-to-3d", "task_categories:image-classification", "task_categories:image-segmentation", "task_categories:depth-estimation", "task_categories:object-detection", "task_categories:other", "annotations_creators:expert-generated", "annotations_creators:human-annotated", "language:en"...
dynamic-maps
false
false
false
false
false
false
false
false
true
false
false
false
2026-06-18T06:46:43.753000
openbmb/MA-ProofBench
192
192
6
[ "task_categories:text-generation", "language:en", "license:mit", "size_categories:n<1K", "format:json", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2606.13782", "region:us", "mathematics", "mathematical-analysis", "theorem-provin...
openbmb
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
TokenRhythm/Claw-SWE-Bench
205
205
5
[ "task_categories:text-generation", "multilinguality:monolingual", "language:en", "license:mit", "size_categories:n<1K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2606.12344", "arxiv:2...
TokenRhythm
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
victor/claude-fable-worldcup-2026-session
228
228
5
[ "language:en", "license:cc-by-4.0", "size_categories:n<1K", "format:json", "format:agent-traces", "modality:tabular", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "claude-code", "agent-transcript", "session-log", "football...
victor
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
england-lobster/zhang-xuefeng-data
123
123
5
[ "task_categories:text-generation", "language:zh", "license:other", "size_categories:1K<n<10K", "format:parquet", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "region:us", "conversational", "chinese", "style-transfer", "persona", "sft",...
england-lobster
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
King3Djbl/fable5-dataset
102
102
5
[ "task_categories:text-generation", "language:en", "language:code", "license:mit", "size_categories:1K<n<10K", "region:us" ]
King3Djbl
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
build-small-hackathon/CVE_Vulnerailities_Detailed
70
70
5
[ "size_categories:10K<n<100K", "format:parquet", "format:optimized-parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "region:us" ]
build-small-hackathon
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
cais/mmlu
480,483
41,907,685
769
[ "task_categories:question-answering", "task_ids:multiple-choice-qa", "annotations_creators:no-annotation", "language_creators:expert-generated", "multilinguality:monolingual", "source_datasets:original", "language:en", "license:mit", "size_categories:100K<n<1M", "format:parquet", "modality:text"...
cais
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
bigcode/starcoderdata
25,781
354,008
524
[ "task_categories:text-generation", "language_creators:crowdsourced", "language_creators:expert-generated", "multilinguality:multilingual", "language:code", "license:other", "size_categories:100M<n<1B", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:mlcroissant"...
bigcode
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
gaia-benchmark/GAIA
42,218
311,598
694
[ "language:en", "size_categories:n<1K", "format:parquet", "modality:audio", "modality:document", "modality:image", "modality:text", "library:datasets", "library:pandas", "library:polars", "library:mlcroissant", "arxiv:2311.12983", "region:us" ]
gaia-benchmark
true
false
false
false
false
false
false
false
true
true
false
false
2026-06-18T06:46:43.753000
irlspbru/RusLawOD
871
5,500
17
[ "language:ru", "license:cc-by-nc-4.0", "size_categories:100K<n<1M", "format:parquet", "modality:text", "library:datasets", "library:dask", "library:polars", "library:mlcroissant", "arxiv:2406.04855", "region:us", "legal", "corpus" ]
irlspbru
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
nonchev/TCGA_virtual_spatial_transcriptomics
14,052
17,576
15
[ "language:en", "license:cc-by-nc-nd-4.0", "size_categories:1K<n<10K", "format:csv", "modality:text", "library:datasets", "library:pandas", "library:mlcroissant", "library:polars", "region:us", "spatial-transcriptomics", "histology", "pathology", "transcriptomics", "machine-learning" ]
nonchev
false
false
false
false
false
false
false
false
false
true
false
false
2026-06-18T06:46:43.753000
End of preview. Expand in Data Studio

No dataset card yet

Downloads last month
450

Space using evijit/dataverse_daily_data 1