Source code for cowidev.testing.batch.ireland

"""Constructs daily time series of COVID-19 testing data for Ireland.

Dashboard: https://covid19ireland-geohive.hub.arcgis.com/pages/hospitals-icu--testing

"""

import json
import requests
import datetime
import pandas as pd

from cowidev.testing import CountryTestBase


[docs]class Ireland(CountryTestBase): location = "Ireland" units = "tests performed" TESTING_TYPE = "PCR only" source_label = "Government of Ireland" source_url_ref = "https://covid19ireland-geohive.hub.arcgis.com/pages/hospitals-icu--testing" source_url = "https://services-eu1.arcgis.com/z6bHNio59iTqqSUY/arcgis/rest/services/LaboratoryLocalTimeSeriesHistoricView/FeatureServer/0/query" rename_columns = { "Date_HPSC": "Date", "Test24": "Daily change in cumulative total", "TotalLabs": "Cumulative total", "PosR7": "Positive rate", }
[docs] def read(self): DATE_COL = "Date_HPSC" params = { "f": "json", "where": f"{DATE_COL}>'2020-01-01 00:00:00'", # "Dates>'2020-01-01 00:00:00'", "returnGeometry": False, "spatialRel": "esriSpatialRelIntersects", "outFields": f"{DATE_COL},TotalLabs,Test24,PosR7", "orderByFields": f"{DATE_COL} asc", "resultOffset": 0, "resultRecordCount": 32000, "resultType": "standard", } res = requests.get(self.source_url, params=params) json_data = json.loads(res.text) df = pd.DataFrame([d["attributes"] for d in json_data["features"]]) return df
[docs] def pipe_date(self, df: pd.DataFrame): df["Date"] = df["Date"].astype(int).apply(lambda dt: datetime.datetime.utcfromtimestamp(dt / 1000)) df["Date"] = df["Date"].dt.strftime("%Y-%m-%d") return df
[docs] def pipeline(self, df: pd.DataFrame): df = df.pipe(self.pipe_rename_columns) df = df.pipe(self.pipe_date) # drops duplicate YYYY-MM-DD rows. # df[df[DATE_COL].dt.strftime('%Y-%m-%d').duplicated(keep=False)] # prints out rows with duplicate YYYY-MM-DD value # df.sort_values(DATE_COL, inplace=True) # df.drop_duplicates(subset=['Date'], keep='last', inplace=True) df = df[["Date", "Cumulative total", "Positive rate"]] df = df.sort_values("Date").dropna(subset=["Date", "Cumulative total", "Positive rate"], how="any") df["Cumulative total"] = df["Cumulative total"].astype(int) df["Positive rate"] = (df["Positive rate"].astype(int)).div(100) df = df.pipe(self.pipe_metadata) return df
[docs] def export(self) -> None: df = self.read().pipe(self.pipeline) sanity_checks(df) self.export_datafile(df) return None
[docs]def sanity_checks(df: pd.DataFrame) -> None: """checks that there are no obvious errors in the scraped data.""" df_temp = df.copy() # checks that the max date is less than tomorrow's date. assert datetime.datetime.strptime(df_temp["Date"].max(), "%Y-%m-%d") < ( datetime.datetime.utcnow() + datetime.timedelta(days=1) ) # checks that there are no duplicate dates assert df_temp["Date"].duplicated().sum() == 0, "One or more rows share the same date." if "Cumulative total" not in df_temp.columns: df_temp["Cumulative total"] = df_temp["Daily change in cumulative total"].cumsum() # checks that the cumulative number of tests on date t is always greater than the figure for t-1: assert ( df_temp["Cumulative total"].iloc[1:] >= df_temp["Cumulative total"].shift(1).iloc[1:] ).all(), "On one or more dates, `Cumulative total` is greater on date t-1." return None
[docs]def main(): Ireland().export()