Steveeeeeeen HF Staff commited on
Commit
5a0b713
·
1 Parent(s): 5216e19
Files changed (3) hide show
  1. app.py +31 -7
  2. constants.py +1 -2
  3. utils_display.py +0 -2
app.py CHANGED
@@ -20,7 +20,6 @@ expanded_languages = set() # Track which languages are expanded
20
 
21
  column_names = {
22
  "MODEL": "Model",
23
- "Avg. WER": "Average WER ⬇️",
24
  "RTFx": "RTFx ⬆️️",
25
  "AMI WER": "AMI",
26
  "Earnings22 WER": "Earnings22",
@@ -28,11 +27,10 @@ column_names = {
28
  "LS Clean WER": "LS Clean",
29
  "LS Other WER": "LS Other",
30
  "SPGISpeech WER": "SPGISpeech",
31
- "Tedlium WER": "Tedlium",
32
  "Voxpopuli WER": "Voxpopuli",
33
  }
34
  always_visible = ["model", "Average WER ⬇️", "Rank Δ"]
35
- default_datasets = ["AMI", "Earnings22", "Gigaspeech", "LS Clean", "LS Other", "SPGISpeech", "Tedlium", "Voxpopuli"]
36
 
37
  AUDIO_LM_MODELS = {
38
  "nvidia/canary-qwen-2.5b",
@@ -70,6 +68,31 @@ if not csv_results.exists():
70
  # Get csv with data and parse columns
71
  original_df = pd.read_csv(csv_results)
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  # Formats the columns
74
  def formatter(x, col=None):
75
  # Special rule for "# Languages"
@@ -98,7 +121,10 @@ for col in original_df.columns:
98
  else:
99
  original_df[col] = original_df[col].apply(lambda x: formatter(x, col))
100
  original_df.rename(columns=column_names, inplace=True)
101
- original_df.sort_values(by='Average WER ⬇️', inplace=True)
 
 
 
102
 
103
  COLS = [c.name for c in fields(AutoEvalColumn)]
104
  TYPES = [c.type for c in fields(AutoEvalColumn)]
@@ -305,12 +331,11 @@ def create_longform_dataframe():
305
  # Get values from CSV, similar to other tabs
306
  earnings21_wer = row_data.get('earnings21', -1)
307
  earnings22_wer = row_data.get('earnings22', -1)
308
- tedlium_wer = row_data.get('tedlium', -1)
309
  coraal_wer = row_data.get('coraal_avg', -1)
310
  rtfx_value = row_data.get('RTFx', 0)
311
 
312
  # Calculate average WER from available datasets
313
- available_wers = [w for w in [earnings21_wer, earnings22_wer, tedlium_wer, coraal_wer] if w != -1 and w > 0]
314
  avg_wer = round(np.mean(available_wers), 2) if available_wers else 0.0
315
 
316
  row = {
@@ -319,7 +344,6 @@ def create_longform_dataframe():
319
  "RTFx ⬆️️": rtfx_value if rtfx_value > 0 else "NA",
320
  "Earnings21": earnings21_wer if earnings21_wer != -1 else "NA",
321
  "Earnings22": earnings22_wer if earnings22_wer != -1 else "NA",
322
- "Tedlium": tedlium_wer if tedlium_wer != -1 else "NA",
323
  "CORAAL": coraal_wer if coraal_wer != -1 else "NA",
324
  }
325
  longform_data.append(row)
 
20
 
21
  column_names = {
22
  "MODEL": "Model",
 
23
  "RTFx": "RTFx ⬆️️",
24
  "AMI WER": "AMI",
25
  "Earnings22 WER": "Earnings22",
 
27
  "LS Clean WER": "LS Clean",
28
  "LS Other WER": "LS Other",
29
  "SPGISpeech WER": "SPGISpeech",
 
30
  "Voxpopuli WER": "Voxpopuli",
31
  }
32
  always_visible = ["model", "Average WER ⬇️", "Rank Δ"]
33
+ default_datasets = ["AMI", "Earnings22", "Gigaspeech", "LS Clean", "LS Other", "SPGISpeech", "Voxpopuli"]
34
 
35
  AUDIO_LM_MODELS = {
36
  "nvidia/canary-qwen-2.5b",
 
68
  # Get csv with data and parse columns
69
  original_df = pd.read_csv(csv_results)
70
 
71
+
72
+ def _compute_average_wer_from_default_datasets(df):
73
+ """Compute Average WER from the default leaderboard datasets."""
74
+ df = df.copy()
75
+ wer_cols = [c for c in default_datasets if c in df.columns]
76
+
77
+ if wer_cols:
78
+ def compute_avg(row):
79
+ values = []
80
+ for col in wer_cols:
81
+ value = row[col]
82
+ if value == "NA" or value is None:
83
+ return "NA"
84
+ try:
85
+ values.append(float(value))
86
+ except (TypeError, ValueError):
87
+ return "NA"
88
+ return round(np.mean(values), 2) if values else "NA"
89
+
90
+ df["Average WER ⬇️"] = df.apply(compute_avg, axis=1)
91
+ else:
92
+ df["Average WER ⬇️"] = "NA"
93
+
94
+ return df
95
+
96
  # Formats the columns
97
  def formatter(x, col=None):
98
  # Special rule for "# Languages"
 
121
  else:
122
  original_df[col] = original_df[col].apply(lambda x: formatter(x, col))
123
  original_df.rename(columns=column_names, inplace=True)
124
+ if "Avg. WER" in original_df.columns:
125
+ original_df = original_df.drop(columns=["Avg. WER"])
126
+ original_df = _compute_average_wer_from_default_datasets(original_df)
127
+ original_df = original_df.sort_values(by='Average WER ⬇️', key=lambda col: pd.to_numeric(col, errors="coerce"), na_position="last")
128
 
129
  COLS = [c.name for c in fields(AutoEvalColumn)]
130
  TYPES = [c.type for c in fields(AutoEvalColumn)]
 
331
  # Get values from CSV, similar to other tabs
332
  earnings21_wer = row_data.get('earnings21', -1)
333
  earnings22_wer = row_data.get('earnings22', -1)
 
334
  coraal_wer = row_data.get('coraal_avg', -1)
335
  rtfx_value = row_data.get('RTFx', 0)
336
 
337
  # Calculate average WER from available datasets
338
+ available_wers = [w for w in [earnings21_wer, earnings22_wer, coraal_wer] if w != -1 and w > 0]
339
  avg_wer = round(np.mean(available_wers), 2) if available_wers else 0.0
340
 
341
  row = {
 
344
  "RTFx ⬆️️": rtfx_value if rtfx_value > 0 else "NA",
345
  "Earnings21": earnings21_wer if earnings21_wer != -1 else "NA",
346
  "Earnings22": earnings22_wer if earnings22_wer != -1 else "NA",
 
347
  "CORAAL": coraal_wer if coraal_wer != -1 else "NA",
348
  }
349
  longform_data.append(row)
constants.py CHANGED
@@ -31,6 +31,7 @@ CITATION_TEXT = """@misc{srivastav2025openasrleaderboardreproducible,
31
 
32
  # For new changes, add a bullet with the date at the start (it is extracted for the "Last updated..." info in the UI)
33
  CHANGELOG_TEXT = """
 
34
  - **5 May 2026** — Added 🔒 Private Data tab with benchmarks from Appen Inc. and DataoceanAI (11 datasets covering scripted and conversational speech across US, British, Australian, Canadian, and Indian accents). Private data average WER is now available as a toggleable column in the main leaderboard. Added rank column to show how ordering changes.
35
  """
36
 
@@ -52,7 +53,6 @@ For convenience, they are aggregated into a single dataset [here](https://huggin
52
  | [LibriSpeech (clean)](https://huggingface.co/datasets/openslr/librispeech_asr) | 5.4 | CC-BY-4.0 | Audiobooks | Read | Normalized |
53
  | [LibriSpeech (other)](https://huggingface.co/datasets/openslr/librispeech_asr) | 5.1 | CC-BY-4.0 | Audiobooks (noisier) | Read | Normalized |
54
  | [SPGISpeech](https://huggingface.co/datasets/kensho/spgispeech) | 100 | User Agreement | Financial meetings | Oratory, spontaneous | Punctuated, cased |
55
- | [TED-LIUM v3](https://arxiv.org/abs/1805.04699) | 3 | CC-BY-NC-ND 3.0 | TED Talks | Oratory | Disfluencies |
56
  | [VoxPopuli](https://huggingface.co/datasets/facebook/voxpopuli) | 5 | CC0 | European Parliament | Oratory | Punctuated |
57
 
58
  The "Multilingual" tab uses the test split of the following datasets.
@@ -74,7 +74,6 @@ CORAAL is [separate](https://huggingface.co/datasets/bezzam/coraal) as each spli
74
  | [CORAAL](https://oraal.github.io/coraal) | 159 | CC-BY-NC-4.0 | Sociolinguistic interviews | Spontaneous | Punctuated, cased, disfluencies |
75
  | [Earnings21](https://huggingface.co/datasets/Revai/earnings21) | 39 | CC-BY-SA-4.0 | Earnings calls | Oratory, spontaneous | Punctuated, cased, disfluencies |
76
  | [Earnings22](https://huggingface.co/datasets/distil-whisper/earnings22) | 119 | CC-BY-SA-4.0 | Earnings calls | Oratory, spontaneous | Punctuated, cased, disfluencies |
77
- | [TED-LIUM v3](https://arxiv.org/abs/1805.04699) | 3 | CC-BY-NC-ND 3.0 | TED Talks | Oratory | Disfluencies |
78
 
79
 
80
  The "Private data" tab uses the following datasets, which are not publicly available.
 
31
 
32
  # For new changes, add a bullet with the date at the start (it is extracted for the "Last updated..." info in the UI)
33
  CHANGELOG_TEXT = """
34
+ - **20 May 2026** - Removed Tedlium v3 from main and longform tabs due to license change in original data. Related commit for updated leaderboard results: https://huggingface.co/datasets/hf-audio/open-asr-leaderboard-results/commit/f3ff7c9d583f4beaf908f2b2c18f3055040e515b
35
  - **5 May 2026** — Added 🔒 Private Data tab with benchmarks from Appen Inc. and DataoceanAI (11 datasets covering scripted and conversational speech across US, British, Australian, Canadian, and Indian accents). Private data average WER is now available as a toggleable column in the main leaderboard. Added rank column to show how ordering changes.
36
  """
37
 
 
53
  | [LibriSpeech (clean)](https://huggingface.co/datasets/openslr/librispeech_asr) | 5.4 | CC-BY-4.0 | Audiobooks | Read | Normalized |
54
  | [LibriSpeech (other)](https://huggingface.co/datasets/openslr/librispeech_asr) | 5.1 | CC-BY-4.0 | Audiobooks (noisier) | Read | Normalized |
55
  | [SPGISpeech](https://huggingface.co/datasets/kensho/spgispeech) | 100 | User Agreement | Financial meetings | Oratory, spontaneous | Punctuated, cased |
 
56
  | [VoxPopuli](https://huggingface.co/datasets/facebook/voxpopuli) | 5 | CC0 | European Parliament | Oratory | Punctuated |
57
 
58
  The "Multilingual" tab uses the test split of the following datasets.
 
74
  | [CORAAL](https://oraal.github.io/coraal) | 159 | CC-BY-NC-4.0 | Sociolinguistic interviews | Spontaneous | Punctuated, cased, disfluencies |
75
  | [Earnings21](https://huggingface.co/datasets/Revai/earnings21) | 39 | CC-BY-SA-4.0 | Earnings calls | Oratory, spontaneous | Punctuated, cased, disfluencies |
76
  | [Earnings22](https://huggingface.co/datasets/distil-whisper/earnings22) | 119 | CC-BY-SA-4.0 | Earnings calls | Oratory, spontaneous | Punctuated, cased, disfluencies |
 
77
 
78
 
79
  The "Private data" tab uses the following datasets, which are not publicly available.
utils_display.py CHANGED
@@ -21,7 +21,6 @@ class AutoEvalColumn: # Auto evals column
21
  lsc_wer = ColumnContent("LS Clean", "number")
22
  lso_wer = ColumnContent("LS Other", "number")
23
  ss_wer = ColumnContent("SPGISpeech", "number")
24
- tl_wer = ColumnContent("Tedlium", "number")
25
  vp_wer = ColumnContent("Voxpopuli", "number")
26
 
27
  @dataclass(frozen=True)
@@ -47,7 +46,6 @@ class LongformColumn: # Long-form ASR benchmark columns
47
  rtf = ColumnContent("RTFx ⬆️️", "number")
48
  earnings21_wer = ColumnContent("Earnings21", "number")
49
  earnings22_wer = ColumnContent("Earnings22", "number")
50
- tedlium_wer = ColumnContent("Tedlium", "number")
51
 
52
 
53
  @dataclass(frozen=True)
 
21
  lsc_wer = ColumnContent("LS Clean", "number")
22
  lso_wer = ColumnContent("LS Other", "number")
23
  ss_wer = ColumnContent("SPGISpeech", "number")
 
24
  vp_wer = ColumnContent("Voxpopuli", "number")
25
 
26
  @dataclass(frozen=True)
 
46
  rtf = ColumnContent("RTFx ⬆️️", "number")
47
  earnings21_wer = ColumnContent("Earnings21", "number")
48
  earnings22_wer = ColumnContent("Earnings22", "number")
 
49
 
50
 
51
  @dataclass(frozen=True)