Spaces:

hf-audio
/

open_asr_leaderboard

Running on CPU Upgrade

App Files Files Community

Steveeeeeeen HF Staff commited on 10 days ago

Commit

5a0b713

1 Parent(s): 5216e19

remove tedlium (#67)

Browse files

- remove tedlium (07f45bfaafaa92c9bff0a40677e80170abe8f5eb)

Files changed (3) hide show

app.py +31 -7
constants.py +1 -2
utils_display.py +0 -2

app.py CHANGED Viewed

@@ -20,7 +20,6 @@ expanded_languages = set()  # Track which languages are expanded
 column_names = {
     "MODEL": "Model",
-    "Avg. WER": "Average WER ⬇️",
     "RTFx": "RTFx ⬆️️",
     "AMI WER": "AMI",
     "Earnings22 WER": "Earnings22",
@@ -28,11 +27,10 @@ column_names = {
     "LS Clean WER": "LS Clean",
     "LS Other WER": "LS Other",
     "SPGISpeech WER": "SPGISpeech",
-    "Tedlium WER": "Tedlium",
     "Voxpopuli WER": "Voxpopuli",
 }
 always_visible = ["model", "Average WER ⬇️", "Rank Δ"]
-default_datasets = ["AMI", "Earnings22", "Gigaspeech", "LS Clean", "LS Other", "SPGISpeech", "Tedlium", "Voxpopuli"]
 AUDIO_LM_MODELS = {
     "nvidia/canary-qwen-2.5b",
@@ -70,6 +68,31 @@ if not csv_results.exists():
 # Get csv with data and parse columns
 original_df = pd.read_csv(csv_results)
 # Formats the columns
 def formatter(x, col=None):
     # Special rule for "# Languages"
@@ -98,7 +121,10 @@ for col in original_df.columns:
     else:
         original_df[col] = original_df[col].apply(lambda x: formatter(x, col))
 original_df.rename(columns=column_names, inplace=True)
-original_df.sort_values(by='Average WER ⬇️', inplace=True)
 COLS = [c.name for c in fields(AutoEvalColumn)]
 TYPES = [c.type for c in fields(AutoEvalColumn)]
@@ -305,12 +331,11 @@ def create_longform_dataframe():
             # Get values from CSV, similar to other tabs
             earnings21_wer = row_data.get('earnings21', -1)
             earnings22_wer = row_data.get('earnings22', -1)
-            tedlium_wer = row_data.get('tedlium', -1)
             coraal_wer = row_data.get('coraal_avg', -1)
             rtfx_value = row_data.get('RTFx', 0)
             # Calculate average WER from available datasets
-            available_wers = [w for w in [earnings21_wer, earnings22_wer, tedlium_wer, coraal_wer] if w != -1 and w > 0]
             avg_wer = round(np.mean(available_wers), 2) if available_wers else 0.0
             row = {
@@ -319,7 +344,6 @@ def create_longform_dataframe():
                 "RTFx ⬆️️": rtfx_value if rtfx_value > 0 else "NA",
                 "Earnings21": earnings21_wer if earnings21_wer != -1 else "NA",
                 "Earnings22": earnings22_wer if earnings22_wer != -1 else "NA",
-                "Tedlium": tedlium_wer if tedlium_wer != -1 else "NA",
                 "CORAAL": coraal_wer if coraal_wer != -1 else "NA",
             }
             longform_data.append(row)

 column_names = {
     "MODEL": "Model",
     "RTFx": "RTFx ⬆️️",
     "AMI WER": "AMI",
     "Earnings22 WER": "Earnings22",
     "LS Clean WER": "LS Clean",
     "LS Other WER": "LS Other",
     "SPGISpeech WER": "SPGISpeech",
     "Voxpopuli WER": "Voxpopuli",
 }
 always_visible = ["model", "Average WER ⬇️", "Rank Δ"]
+default_datasets = ["AMI", "Earnings22", "Gigaspeech", "LS Clean", "LS Other", "SPGISpeech", "Voxpopuli"]
 AUDIO_LM_MODELS = {
     "nvidia/canary-qwen-2.5b",
 # Get csv with data and parse columns
 original_df = pd.read_csv(csv_results)
+def _compute_average_wer_from_default_datasets(df):
+    """Compute Average WER from the default leaderboard datasets."""
+    df = df.copy()
+    wer_cols = [c for c in default_datasets if c in df.columns]
+    if wer_cols:
+        def compute_avg(row):
+            values = []
+            for col in wer_cols:
+                value = row[col]
+                if value == "NA" or value is None:
+                    return "NA"
+                try:
+                    values.append(float(value))
+                except (TypeError, ValueError):
+                    return "NA"
+            return round(np.mean(values), 2) if values else "NA"
+        df["Average WER ⬇️"] = df.apply(compute_avg, axis=1)
+    else:
+        df["Average WER ⬇️"] = "NA"
+    return df
 # Formats the columns
 def formatter(x, col=None):
     # Special rule for "# Languages"
     else:
         original_df[col] = original_df[col].apply(lambda x: formatter(x, col))
 original_df.rename(columns=column_names, inplace=True)
+if "Avg. WER" in original_df.columns:
+    original_df = original_df.drop(columns=["Avg. WER"])
+original_df = _compute_average_wer_from_default_datasets(original_df)
+original_df = original_df.sort_values(by='Average WER ⬇️', key=lambda col: pd.to_numeric(col, errors="coerce"), na_position="last")
 COLS = [c.name for c in fields(AutoEvalColumn)]
 TYPES = [c.type for c in fields(AutoEvalColumn)]
             # Get values from CSV, similar to other tabs
             earnings21_wer = row_data.get('earnings21', -1)
             earnings22_wer = row_data.get('earnings22', -1)
             coraal_wer = row_data.get('coraal_avg', -1)
             rtfx_value = row_data.get('RTFx', 0)
             # Calculate average WER from available datasets
+            available_wers = [w for w in [earnings21_wer, earnings22_wer, coraal_wer] if w != -1 and w > 0]
             avg_wer = round(np.mean(available_wers), 2) if available_wers else 0.0
             row = {
                 "RTFx ⬆️️": rtfx_value if rtfx_value > 0 else "NA",
                 "Earnings21": earnings21_wer if earnings21_wer != -1 else "NA",
                 "Earnings22": earnings22_wer if earnings22_wer != -1 else "NA",
                 "CORAAL": coraal_wer if coraal_wer != -1 else "NA",
             }
             longform_data.append(row)

constants.py CHANGED Viewed

@@ -31,6 +31,7 @@ CITATION_TEXT = """@misc{srivastav2025openasrleaderboardreproducible,
 # For new changes, add a bullet with the date at the start (it is extracted for the "Last updated..." info in the UI)
 CHANGELOG_TEXT = """
 - **5 May 2026** — Added 🔒 Private Data tab with benchmarks from Appen Inc. and DataoceanAI (11 datasets covering scripted and conversational speech across US, British, Australian, Canadian, and Indian accents). Private data average WER is now available as a toggleable column in the main leaderboard. Added rank column to show how ordering changes.
 """
@@ -52,7 +53,6 @@ For convenience, they are aggregated into a single dataset [here](https://huggin
 | [LibriSpeech (clean)](https://huggingface.co/datasets/openslr/librispeech_asr)          | 5.4          | CC-BY-4.0       | Audiobooks                  | Read                 | Normalized                      |
 | [LibriSpeech (other)](https://huggingface.co/datasets/openslr/librispeech_asr)          | 5.1          | CC-BY-4.0       | Audiobooks (noisier)        | Read                 | Normalized                      |
 | [SPGISpeech](https://huggingface.co/datasets/kensho/spgispeech)                         | 100          | User Agreement  | Financial meetings          | Oratory, spontaneous | Punctuated, cased               |
-| [TED-LIUM v3](https://arxiv.org/abs/1805.04699)                                         | 3            | CC-BY-NC-ND 3.0 | TED Talks                   | Oratory              | Disfluencies                    |
 | [VoxPopuli](https://huggingface.co/datasets/facebook/voxpopuli)                         | 5            | CC0             | European Parliament         | Oratory              | Punctuated                      |
 The "Multilingual" tab uses the test split of the following datasets.
@@ -74,7 +74,6 @@ CORAAL is [separate](https://huggingface.co/datasets/bezzam/coraal) as each spli
 | [CORAAL](https://oraal.github.io/coraal)                                     | 159          | CC-BY-NC-4.0    | Sociolinguistic interviews | Spontaneous          | Punctuated, cased, disfluencies |
 | [Earnings21](https://huggingface.co/datasets/Revai/earnings21)               | 39           | CC-BY-SA-4.0    | Earnings calls             | Oratory, spontaneous | Punctuated, cased, disfluencies |
 | [Earnings22](https://huggingface.co/datasets/distil-whisper/earnings22)      | 119          | CC-BY-SA-4.0    | Earnings calls             | Oratory, spontaneous | Punctuated, cased, disfluencies |
-| [TED-LIUM v3](https://arxiv.org/abs/1805.04699)                              | 3            | CC-BY-NC-ND 3.0 | TED Talks                  | Oratory              | Disfluencies                    |
 The "Private data" tab uses the following datasets, which are not publicly available.

 # For new changes, add a bullet with the date at the start (it is extracted for the "Last updated..." info in the UI)
 CHANGELOG_TEXT = """
+- **20 May 2026** - Removed Tedlium v3 from main and longform tabs due to license change in original data. Related commit for updated leaderboard results: https://huggingface.co/datasets/hf-audio/open-asr-leaderboard-results/commit/f3ff7c9d583f4beaf908f2b2c18f3055040e515b
 - **5 May 2026** — Added 🔒 Private Data tab with benchmarks from Appen Inc. and DataoceanAI (11 datasets covering scripted and conversational speech across US, British, Australian, Canadian, and Indian accents). Private data average WER is now available as a toggleable column in the main leaderboard. Added rank column to show how ordering changes.
 """
 | [LibriSpeech (clean)](https://huggingface.co/datasets/openslr/librispeech_asr)          | 5.4          | CC-BY-4.0       | Audiobooks                  | Read                 | Normalized                      |
 | [LibriSpeech (other)](https://huggingface.co/datasets/openslr/librispeech_asr)          | 5.1          | CC-BY-4.0       | Audiobooks (noisier)        | Read                 | Normalized                      |
 | [SPGISpeech](https://huggingface.co/datasets/kensho/spgispeech)                         | 100          | User Agreement  | Financial meetings          | Oratory, spontaneous | Punctuated, cased               |
 | [VoxPopuli](https://huggingface.co/datasets/facebook/voxpopuli)                         | 5            | CC0             | European Parliament         | Oratory              | Punctuated                      |
 The "Multilingual" tab uses the test split of the following datasets.
 | [CORAAL](https://oraal.github.io/coraal)                                     | 159          | CC-BY-NC-4.0    | Sociolinguistic interviews | Spontaneous          | Punctuated, cased, disfluencies |
 | [Earnings21](https://huggingface.co/datasets/Revai/earnings21)               | 39           | CC-BY-SA-4.0    | Earnings calls             | Oratory, spontaneous | Punctuated, cased, disfluencies |
 | [Earnings22](https://huggingface.co/datasets/distil-whisper/earnings22)      | 119          | CC-BY-SA-4.0    | Earnings calls             | Oratory, spontaneous | Punctuated, cased, disfluencies |
 The "Private data" tab uses the following datasets, which are not publicly available.

utils_display.py CHANGED Viewed

@@ -21,7 +21,6 @@ class AutoEvalColumn: # Auto evals column
     lsc_wer = ColumnContent("LS Clean", "number")
     lso_wer = ColumnContent("LS Other", "number")
     ss_wer = ColumnContent("SPGISpeech", "number")
-    tl_wer = ColumnContent("Tedlium", "number")
     vp_wer = ColumnContent("Voxpopuli", "number")
 @dataclass(frozen=True)
@@ -47,7 +46,6 @@ class LongformColumn: # Long-form ASR benchmark columns
     rtf = ColumnContent("RTFx ⬆️️", "number")
     earnings21_wer = ColumnContent("Earnings21", "number")
     earnings22_wer = ColumnContent("Earnings22", "number")
-    tedlium_wer = ColumnContent("Tedlium", "number")
 @dataclass(frozen=True)

     lsc_wer = ColumnContent("LS Clean", "number")
     lso_wer = ColumnContent("LS Other", "number")
     ss_wer = ColumnContent("SPGISpeech", "number")
     vp_wer = ColumnContent("Voxpopuli", "number")
 @dataclass(frozen=True)
     rtf = ColumnContent("RTFx ⬆️️", "number")
     earnings21_wer = ColumnContent("Earnings21", "number")
     earnings22_wer = ColumnContent("Earnings22", "number")
 @dataclass(frozen=True)