Source code for cowidev.testing.batch.armenia
import re
import json
import pandas as pd
from cowidev.testing import CountryTestBase
from cowidev.utils.web import get_soup
from cowidev.utils.clean import clean_date_series, clean_count
[docs]class Armenia(CountryTestBase):
location: str = "Armenia"
units: str = "tests performed"
source_label: str = "National Center for Disease Control"
source_url_ref: str = "https://ncdc.am/coronavirus/confirmed-cases-by-days/"
source_url: str = "https://e.infogram.com/"
regex: dict = {
"entity": "f5b6e83c-39b1-47c6-a84f-cd7ebaa3b7b1",
"element": r"window\.infographicData=({.*})",
}
rename_columns: dict = {
"": "Date",
"Հաստատված դեպքեր": "positive",
"Բացասական թեստերի արդյունքներ": "negative",
}
[docs] def read(self) -> pd.DataFrame:
"""Read data from source"""
data_id = self._get_data_id_from_source(self.source_url_ref)
data = self._load_data(data_id)
df = self._build_df(data)
return df
[docs] def _get_data_id_from_source(self, source_url: str) -> str:
"""Get Data ID from source"""
soup = get_soup(source_url)
data_id = soup.find(class_="infogram-embed")["data-id"]
return data_id
[docs] def _load_data(self, data_id):
"""Load data from source"""
url = f"{self.source_url}{data_id}"
soup = get_soup(url)
match = re.search(self.regex["element"], str(soup))
if not match:
raise ValueError("Website Structure Changed, please update the script")
data = json.loads(match.group(1))
return data
[docs] def _build_df(self, data: dict) -> pd.DataFrame:
"""Create df from raw data"""
data = data["elements"]["content"]["content"]["entities"][self.regex["entity"]]["props"]["chartData"]["data"][
0
]
df = pd.DataFrame(data[1:], columns=data[0])
return df
[docs] def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame:
"""Clean date"""
df["Date"] = df.Date.apply(lambda x: re.sub(r"\D", "", x))
return df.assign(Date=clean_date_series(df["Date"], "%d%m%Y")).sort_values("Date")
[docs] def pipe_metrics(self, df: pd.DataFrame) -> pd.DataFrame:
"""Process metrics"""
return df.assign(
**{"Daily change in cumulative total": df.positive.apply(clean_count) + df.negative.apply(clean_count)}
)
[docs] def pipe_pr(self, df: pd.DataFrame) -> pd.DataFrame:
"""Calculate Positive Rate"""
return df.assign(
**{
"Positive rate": df.positive.rolling(7)
.sum()
.div(df["Daily change in cumulative total"].rolling(7).sum())
.round(3)
.fillna(0)
}
)
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame:
"""Pipeline for data processing"""
return (
df.pipe(self.pipe_rename_columns)
.pipe(self.pipe_date)
.pipe(self.pipe_metrics)
.pipe(self.pipe_pr)
.pipe(self.pipe_metadata)
)
[docs] def export(self):
"""Export data to csv"""
df = self.read().pipe(self.pipeline)
self.export_datafile(df)