Source code for cowidev.testing.incremental.sweden

import re
from typing import Iterator
from datetime import timedelta

from epiweeks import Week
from bs4 import BeautifulSoup, element
import pandas as pd


from cowidev.utils.web import get_soup
from cowidev.utils.clean import clean_date, clean_count
from cowidev.testing import CountryTestBase


[docs]class Sweden(CountryTestBase): location = "Sweden" units = "tests performed" source_label = "Swedish Public Health Agency" notes = "" source_url = "https://www.folkhalsomyndigheten.se/smittskydd-beredskap/utbrott/aktuella-utbrott/covid-19/statistik-och-analyser/antalet-testade-for-covid-19/" regex = { "title": r"Antalet testade individer och genomförda test per", "week": r"[vV]ecka (\d+)", }
[docs] def read(self) -> pd.DataFrame: """Reads data from source.""" soup = get_soup(self.source_url) data = self._parse_data(soup) df = self._build_df(data) return df
[docs] def _build_df(self, data: dict) -> pd.DataFrame: """Builds dataframe from data.""" # Create dataframe df = pd.DataFrame([data]) # Create date range (check week distance) dt_min = self._load_last_date() + timedelta(days=1) dt = clean_date(df.Date.max(), "%Y-%m-%d", as_datetime=True) if ((days_diff := (dt - dt_min).days) != 6) & (days_diff != -1): raise ValueError(f"Date distance is no longer a week ({days_diff})! Please check.") ds = pd.Series(pd.date_range(dt - timedelta(days=6), dt).astype(str), name="Date") # Distribute week value over 7 days df = df.merge(ds, how="right") df = df.assign( **{ "Source URL": self.source_url, "Daily change in cumulative total": round(df["Daily change in cumulative total"].bfill()), } ) return df
[docs] def _parse_data(self, soup: BeautifulSoup) -> list: """Gets data from the source page.""" # Extract the relevant element elem = self._get_relevant_element(soup) # Get the week number week_num = self._get_week_num_from_element(elem) # Extract the text from the element text = self._get_text_from_element(elem) # Extract the metrics weekly_change = self._parse_metrics(text, week_num) # Parse date date = self._parse_date(week_num) record = { "Date": date, "Daily change in cumulative total": round(weekly_change / 7), } return record
[docs] def _get_relevant_element(self, soup: BeautifulSoup) -> element.Tag: """Gets element from the soup.""" elem_list = soup.find_all("h2") elem = [title for title in elem_list if self.regex["title"] in title.text] elem = elem[0] if not elem: raise TypeError("Website Structure Changed, please update the script") return elem
[docs] def _get_week_num_from_element(self, elem: element.Tag) -> int: """Gets week number from element.""" week_num = int(re.search(self.regex["week"], elem.text).group(1)) if week_num == 1: raise ValueError("Week 1: New Year, please update date in the script") return week_num
[docs] def _get_text_from_element(self, elem: element.Tag) -> str: """Gets text from element.""" elem = elem.find_next_sibling("table").text.replace(" ", "").replace("\n", " ") return elem
[docs] def _parse_metrics(self, text: str, week_num) -> int: """Gets metrics from the text.""" count = re.search(fr"[vV]ecka{week_num} \d+ (\d+)", text).group(1) return clean_count(count)
[docs] def _parse_date(self, week_num) -> Iterator: """parses the date from the week number.""" date = Week(2022, week_num, system="iso").enddate() return clean_date(date)
[docs] def _load_last_date(self) -> str: """Loads the last date from the datafile.""" df_current = self.load_datafile() date = df_current.Date.max() return clean_date(date, "%Y-%m-%d", as_datetime=True)
[docs] def export(self): """Exports data to csv.""" df = self.read().pipe(self.pipe_metadata) self.export_datafile(df, attach=True)
[docs]def main(): Sweden().export()