[GH-ISSUE #142] [WhoScored] Can't pull data from custom league #32

Closed
opened 2026-03-02 15:55:11 +03:00 by kerem · 2 comments
Owner

Originally created by @OnlineAnalytics on GitHub (Jan 12, 2023).
Original GitHub issue: https://github.com/probberechts/soccerdata/issues/142

Hey!

So I'm trying to run my custom league, the Argentinian Liga Profesional.
ws = sd.WhoScored(leagues =['ARG-Liga Profesional'], seasons = '2022', proxy='tor',headless=False)
ws.read_schedule()
and I'm getting the error

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Input In [12], in <cell line: 1>()
----> 1 ws.read_schedule()

File ~\AppData\Roaming\Python\Python310\site-packages\soccerdata\whoscored.py:270, in WhoScored.read_schedule(self, force_cache)
    257 def read_schedule(self, force_cache: bool = False) -> pd.DataFrame:
    258     """Retrieve the game schedule for the selected leagues and seasons.
    259 
    260     Parameters
   (...)
    268     pd.DataFrame
    269     """
--> 270     df_seasons = self.read_seasons()
    271     filemask = "matches/{}_{}.csv"
    273     all_schedules = []

File ~\AppData\Roaming\Python\Python310\site-packages\soccerdata\whoscored.py:151, in WhoScored.read_seasons(self)
    144 def read_seasons(self) -> pd.DataFrame:
    145     """Retrieve the selected seasons for the selected leagues.
    146 
    147     Returns
    148     -------
    149     pd.DataFrame
    150     """
--> 151     df_leagues = self.read_leagues()
    153     seasons = []
    154     for lkey, league in df_leagues.iterrows():

File ~\AppData\Roaming\Python\Python310\site-packages\soccerdata\whoscored.py:135, in WhoScored.read_leagues(self)
    123     for league in region["tournaments"]:
    124         leagues.append(
    125             {
    126                 "region_id": region["id"],
   (...)
    131             }
    132         )
    134 df = (
--> 135     pd.DataFrame(leagues)
    136     .assign(league=lambda x: x.region + " - " + x.league)
    137     .pipe(self._translate_league)
    138     .set_index("league")
    139     .loc[self._selected_leagues.keys()]
    140     .sort_index()
    141 )
    142 return df

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexing.py:967, in _LocationIndexer.__getitem__(self, key)
    964 axis = self.axis or 0
    966 maybe_callable = com.apply_if_callable(key, self.obj)
--> 967 return self._getitem_axis(maybe_callable, axis=axis)

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexing.py:1191, in _LocIndexer._getitem_axis(self, key, axis)
   1188     if hasattr(key, "ndim") and key.ndim > 1:
   1189         raise ValueError("Cannot index with multidimensional key")
-> 1191     return self._getitem_iterable(key, axis=axis)
   1193 # nested tuple slicing
   1194 if is_nested_tuple(key, labels):

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexing.py:1132, in _LocIndexer._getitem_iterable(self, key, axis)
   1129 self._validate_key(key, axis)
   1131 # A collection of keys
-> 1132 keyarr, indexer = self._get_listlike_indexer(key, axis)
   1133 return self.obj._reindex_with_indexers(
   1134     {axis: [keyarr, indexer]}, copy=True, allow_dups=True
   1135 )

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexing.py:1327, in _LocIndexer._get_listlike_indexer(self, key, axis)
   1324 ax = self.obj._get_axis(axis)
   1325 axis_name = self.obj._get_axis_name(axis)
-> 1327 keyarr, indexer = ax._get_indexer_strict(key, axis_name)
   1329 return keyarr, indexer

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexes\base.py:5782, in Index._get_indexer_strict(self, key, axis_name)
   5779 else:
   5780     keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
-> 5782 self._raise_if_missing(keyarr, indexer, axis_name)
   5784 keyarr = self.take(indexer)
   5785 if isinstance(key, Index):
   5786     # GH 42790 - Preserve name from an Index

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexes\base.py:5842, in Index._raise_if_missing(self, key, indexer, axis_name)
   5840     if use_interval_msg:
   5841         key = list(key)
-> 5842     raise KeyError(f"None of [{key}] are in the [{axis_name}]")
   5844 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
   5845 raise KeyError(f"{not_found} not in index")

KeyError: "None of [Index(['ARG-Liga Profesional'], dtype='object', name='league')] are in the [index]"


This is my current JSON File

{
  "ENG-Premier League": {
      "ClubElo": "ENG_1",
      "MatchHistory": "E0",
      "FiveThirtyEight": "premier-league",
      "FBref": "Premier League",
      "ESPN": "eng.1",
      "SoFIFA": "English Premier League (1)",
      "WhoScored": "England - Premier League",
      "season_start": "Aug",
      "season_end": "May"
  },
  "ESP-La Liga": {
      "ClubElo": "ESP_1",
      "MatchHistory": "SP1",
      "FiveThirtyEight": "la-liga",
      "FBref": "La Liga",
      "ESPN": "esp.1",
      "SoFIFA": "Spain Primera Division (1)",
      "WhoScored": "Spain - LaLiga",
      "season_start": "Aug",
      "season_end": "May"
  },
  "ITA-Serie A": {
      "ClubElo": "ITA_1",
      "MatchHistory": "I1",
      "FiveThirtyEight": "serie-a",
      "FBref": "Serie A",
      "ESPN": "ita.1",
      "SoFIFA": " Italian Serie A (1)",
      "WhoScored": "Italy - Serie A",
      "season_start": "Aug",
      "season_end": "May"
  },
  "GER-Bundesliga": {
      "ClubElo": "GER_1",
      "MatchHistory": "D1",
      "FiveThirtyEight": "bundesliga",
      "FBref": "Fußball-Bundesliga",
      "ESPN": "ger.1",
      "SoFIFA": "German 1. Bundesliga (1)",
      "WhoScored": "Germany - Bundesliga",
      "season_start": "Aug",
      "season_end": "May"
  },
  "FRA-Ligue 1": {
      "ClubElo": "FRA_1",
      "MatchHistory": "F1",
      "FiveThirtyEight": "ligue-1",
      "FBref": "Ligue 1",
      "ESPN": "fra.1",
      "SoFIFA": "French Ligue 1 (1)",
      "WhoScored": "France - Ligue 1",
      "season_start": "Aug",
      "season_end": "May"
  },
  "EFL Championship": {
      "FBref": "EFL Championship",
      "season_start": "Aug",
      "season_end": "May"
  },
  "NED-Eredivisie": {
      "ClubElo": "NED_1",
      "MatchHistory": "N1",
      "SoFIFA": "Holland Eredivisie (1)",
      "FBref": "Dutch Eredivisie",
      "ESPN": "ned.1",
      "FiveThirtyEight": "eredivisie",
      "WhoScored": "Netherlands - Eredivisie",
      "season_start": "Aug",
      "season_end": "May"
  },
 "ARG-Liga Profesional": {
      "WhoScored":"Argentina-Liga-Profesional"
  }
}
Originally created by @OnlineAnalytics on GitHub (Jan 12, 2023). Original GitHub issue: https://github.com/probberechts/soccerdata/issues/142 Hey! So I'm trying to run my custom league, the Argentinian Liga Profesional. `ws = sd.WhoScored(leagues =['ARG-Liga Profesional'], seasons = '2022', proxy='tor',headless=False)` `ws.read_schedule()` and I'm getting the error ``` --------------------------------------------------------------------------- KeyError Traceback (most recent call last) Input In [12], in <cell line: 1>() ----> 1 ws.read_schedule() File ~\AppData\Roaming\Python\Python310\site-packages\soccerdata\whoscored.py:270, in WhoScored.read_schedule(self, force_cache) 257 def read_schedule(self, force_cache: bool = False) -> pd.DataFrame: 258 """Retrieve the game schedule for the selected leagues and seasons. 259 260 Parameters (...) 268 pd.DataFrame 269 """ --> 270 df_seasons = self.read_seasons() 271 filemask = "matches/{}_{}.csv" 273 all_schedules = [] File ~\AppData\Roaming\Python\Python310\site-packages\soccerdata\whoscored.py:151, in WhoScored.read_seasons(self) 144 def read_seasons(self) -> pd.DataFrame: 145 """Retrieve the selected seasons for the selected leagues. 146 147 Returns 148 ------- 149 pd.DataFrame 150 """ --> 151 df_leagues = self.read_leagues() 153 seasons = [] 154 for lkey, league in df_leagues.iterrows(): File ~\AppData\Roaming\Python\Python310\site-packages\soccerdata\whoscored.py:135, in WhoScored.read_leagues(self) 123 for league in region["tournaments"]: 124 leagues.append( 125 { 126 "region_id": region["id"], (...) 131 } 132 ) 134 df = ( --> 135 pd.DataFrame(leagues) 136 .assign(league=lambda x: x.region + " - " + x.league) 137 .pipe(self._translate_league) 138 .set_index("league") 139 .loc[self._selected_leagues.keys()] 140 .sort_index() 141 ) 142 return df File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexing.py:967, in _LocationIndexer.__getitem__(self, key) 964 axis = self.axis or 0 966 maybe_callable = com.apply_if_callable(key, self.obj) --> 967 return self._getitem_axis(maybe_callable, axis=axis) File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexing.py:1191, in _LocIndexer._getitem_axis(self, key, axis) 1188 if hasattr(key, "ndim") and key.ndim > 1: 1189 raise ValueError("Cannot index with multidimensional key") -> 1191 return self._getitem_iterable(key, axis=axis) 1193 # nested tuple slicing 1194 if is_nested_tuple(key, labels): File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexing.py:1132, in _LocIndexer._getitem_iterable(self, key, axis) 1129 self._validate_key(key, axis) 1131 # A collection of keys -> 1132 keyarr, indexer = self._get_listlike_indexer(key, axis) 1133 return self.obj._reindex_with_indexers( 1134 {axis: [keyarr, indexer]}, copy=True, allow_dups=True 1135 ) File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexing.py:1327, in _LocIndexer._get_listlike_indexer(self, key, axis) 1324 ax = self.obj._get_axis(axis) 1325 axis_name = self.obj._get_axis_name(axis) -> 1327 keyarr, indexer = ax._get_indexer_strict(key, axis_name) 1329 return keyarr, indexer File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexes\base.py:5782, in Index._get_indexer_strict(self, key, axis_name) 5779 else: 5780 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr) -> 5782 self._raise_if_missing(keyarr, indexer, axis_name) 5784 keyarr = self.take(indexer) 5785 if isinstance(key, Index): 5786 # GH 42790 - Preserve name from an Index File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\indexes\base.py:5842, in Index._raise_if_missing(self, key, indexer, axis_name) 5840 if use_interval_msg: 5841 key = list(key) -> 5842 raise KeyError(f"None of [{key}] are in the [{axis_name}]") 5844 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique()) 5845 raise KeyError(f"{not_found} not in index") KeyError: "None of [Index(['ARG-Liga Profesional'], dtype='object', name='league')] are in the [index]" ``` ​ This is my current JSON File ``` { "ENG-Premier League": { "ClubElo": "ENG_1", "MatchHistory": "E0", "FiveThirtyEight": "premier-league", "FBref": "Premier League", "ESPN": "eng.1", "SoFIFA": "English Premier League (1)", "WhoScored": "England - Premier League", "season_start": "Aug", "season_end": "May" }, "ESP-La Liga": { "ClubElo": "ESP_1", "MatchHistory": "SP1", "FiveThirtyEight": "la-liga", "FBref": "La Liga", "ESPN": "esp.1", "SoFIFA": "Spain Primera Division (1)", "WhoScored": "Spain - LaLiga", "season_start": "Aug", "season_end": "May" }, "ITA-Serie A": { "ClubElo": "ITA_1", "MatchHistory": "I1", "FiveThirtyEight": "serie-a", "FBref": "Serie A", "ESPN": "ita.1", "SoFIFA": " Italian Serie A (1)", "WhoScored": "Italy - Serie A", "season_start": "Aug", "season_end": "May" }, "GER-Bundesliga": { "ClubElo": "GER_1", "MatchHistory": "D1", "FiveThirtyEight": "bundesliga", "FBref": "Fußball-Bundesliga", "ESPN": "ger.1", "SoFIFA": "German 1. Bundesliga (1)", "WhoScored": "Germany - Bundesliga", "season_start": "Aug", "season_end": "May" }, "FRA-Ligue 1": { "ClubElo": "FRA_1", "MatchHistory": "F1", "FiveThirtyEight": "ligue-1", "FBref": "Ligue 1", "ESPN": "fra.1", "SoFIFA": "French Ligue 1 (1)", "WhoScored": "France - Ligue 1", "season_start": "Aug", "season_end": "May" }, "EFL Championship": { "FBref": "EFL Championship", "season_start": "Aug", "season_end": "May" }, "NED-Eredivisie": { "ClubElo": "NED_1", "MatchHistory": "N1", "SoFIFA": "Holland Eredivisie (1)", "FBref": "Dutch Eredivisie", "ESPN": "ned.1", "FiveThirtyEight": "eredivisie", "WhoScored": "Netherlands - Eredivisie", "season_start": "Aug", "season_end": "May" }, "ARG-Liga Profesional": { "WhoScored":"Argentina-Liga-Profesional" } } ```
kerem closed this issue 2026-03-02 15:55:11 +03:00
Author
Owner

@probberechts commented on GitHub (Jan 12, 2023):

I think you've misspelled the WhoScored league name. "Argentina-Liga-Profesional" should be "Argentina - Liga Profesional".

<!-- gh-comment-id:1380052130 --> @probberechts commented on GitHub (Jan 12, 2023): I think you've misspelled the WhoScored league name. `"Argentina-Liga-Profesional"` should be `"Argentina - Liga Profesional"`.
Author
Owner

@OnlineAnalytics commented on GitHub (Jan 12, 2023):

You're correct! Sorry to bother you.

<!-- gh-comment-id:1380062328 --> @OnlineAnalytics commented on GitHub (Jan 12, 2023): You're correct! Sorry to bother you.
Sign in to join this conversation.
No milestone
No project
No assignees
1 participant
Notifications
Due date
The due date is invalid or out of range. Please use the format "yyyy-mm-dd".

No due date set.

Dependencies

No dependencies set.

Reference
starred/soccerdata#32
No description provided.