cowidev.vax.utils#

cowidev.vax.utils.orgs
- cowidev.vax.utils.orgs._config_loader

cowidev.vax.utils.base#

class cowidev.vax.utils.base.CountryVaxBase[source]#

Bases: object

_check_attributes(mapping)[source]#

_check_metadata(metadata)[source]#

_export_datafile_age(df, metadata, filename, attach)[source]#: Export age data.

_export_datafile_main(df, filename, attach=False, reset_index=False, valid_cols_only=False, **kwargs)[source]#: Export main data.

_export_datafile_manufacturer(df, metadata, filename, attach)[source]#: Export manufacturer data

_export_datafile_secondary(df, metadata, output_path, output_path_meta)[source]#: Export secondary data.

_postprocessing(df, valid_cols_only)[source]#

Minor post processing after all transformations.

Basically sort by date, ensure correct column order, correct type for metrics.

_postprocessing_age(df)[source]#

Minor post processing after all transformations.

Basically sort by date, ensure correct column order, correct type for metrics.

_postprocessing_manufacturer(df)[source]#

Minor post processing after all transformations.

Basically sort by date, ensure correct column order, correct type for metrics.

check_column_values(df: DataFrame, col_name: str, values_accepted: list) → DataFrame[source]#

export_datafile(df=None, df_age=None, df_manufacturer=None, meta_age=None, meta_manufacturer=None, filename=None, attach=False, attach_age=False, attach_manufacturer=False, reset_index=False, valid_cols_only=False, **kwargs)[source]#

Export country data.

Parameters:

df (pd.DataFrame) – Main country data.
df_age (pd.DataFrame, optional) – Country data by age group. Defaults to None.
df_manufacturer (pd.DataFrame, optional) – Country data by manufacturer. Defaults to None.
meta_age (dict, optional) – Country metadata by age. Defaults to None.
meta_manufacturer (dict, optional) – Country metadata by manufacturer. Defaults to None.
filename (str, optional) – Name of output file. If None, defaults to country name.
attach (bool, optional) – Set to True to attach to already existing data. Defaults to False.
attach_age (bool, optional) – Set to True to attach to already existing data. Defaults to False.
attach_manufacturer (bool, optional) – Set to True to attach to already existing data. Defaults to False.
valid_cols_only (bool, optional) – Export only valid columns. Defaults to False.
reset_index (bool, optional) – Brin index back as a column. Defaults to False.

force_monotonic()[source]#

from_ice()[source]#: Loads single CSV location.csv from S3 as DataFrame.

get_output_path(filename=None, age=False, manufacturer=False)[source]#

last_update(**kwargs)[source]#

load_datafile(**kwargs)[source]#

location: str = None#

make_monotonic(df, group_cols=None, max_removed_rows=10, strict=False)[source]#

property output_path#: Country output file.

property output_path_age#: Country output file for age-group data.

property output_path_manufacturer#: Country output file for manufacturer data.

pipe_age_per_capita(df: DataFrame) → DataFrame[source]#

pipe_check_vaccine(df: DataFrame, vaccines_accepted=None) → DataFrame[source]#

pipe_merge_with_current(df, filename=None)[source]#

pipe_metadata(df: DataFrame) → DataFrame[source]#

pipe_rename_columns(df: DataFrame) → DataFrame[source]#

cowidev.vax.utils.base._build_population_age_group_df(location, df)[source]#

cowidev.vax.utils.base._check_last_update(path, country)[source]#

cowidev.vax.utils.base.merge_with_current_data(df: DataFrame, filepath: str) → DataFrame[source]#

cowidev.vax.utils.checks#

class cowidev.vax.utils.checks.CountryChecker(df: DataFrame, allow_extra_cols: bool = True, monotonic_check_skip: list = [], anomalies: bool = True, anomaly_check_skip: list = [])[source]#

Bases: object

_check_anomalies(df, metric, th=6)[source]#

_check_metrics_anomalies(df)[source]#

_check_metrics_inequalities(df: DataFrame)[source]#

_check_metrics_monotonic(df: DataFrame)[source]#

_get_location(df)[source]#

_skip_check_ids(check_skip)[source]#

check_column_names()[source]#

check_date()[source]#

check_location()[source]#

check_metrics()[source]#

check_source_url()[source]#

check_vaccine()[source]#

property metrics_present#

run()[source]#

cowidev.vax.utils.checks.country_df_sanity_checks(df: DataFrame, monotonic_check_skip: list = [], anomalies: bool = True, anomaly_check_skip: list = []) → DataFrame[source]#

cowidev.vax.utils.checks.validate_vaccines(df, vaccines_accepted, vaccines_raw=None)[source]#

cowidev.vax.utils.extra_source#

cowidev.vax.utils.extra_source.add_latest_from_acdc(df: DataFrame, metrics: list, priority: bool = False)[source]#

cowidev.vax.utils.files#

cowidev.vax.utils.files.export_metadata(df: DataFrame, source_name: str, source_url: str, output_path: str)[source]#

cowidev.vax.utils.files.get_file_encoding(file_path)[source]#

cowidev.vax.utils.files.load_data(data_filename: str, file_ext: str = 'csv')[source]#

Load a data from a file in vax._static folder.

Parameters:

data_filename (str) – Name of the data file. If no extension is provided, {query_filename}.{file_ext} will be loaded
file_ext (str, optional) – Extension of the file. Defaults to “csv”.

Raises:

FileNotFoundError – If no file is found
ValueError – If non-supported format is provided

Returns:

Loaded query as a string

Return type:

dict

cowidev.vax.utils.files.load_query(query_filename: str, file_ext: str = 'json', to_str: bool = True)[source]#

Load a query from a file in vax._static folder.

Parameters:

query_filename (str) – Name of the query file. If no extension is provided, {query_filename}.{file_ext} will be loaded
file_ext (str, optional) – Extension of the file. Defaults to “json”.

Raises:

FileNotFoundError – If no file is found
ValueError – If non-supported format is provided

Returns:

Loaded query as a string

Return type:

dict

cowidev.vax.utils.incremental#

cowidev.vax.utils.incremental._build_df(location, total_vaccinations, date, vaccine, source_url, people_vaccinated=None, people_partly_vaccinated=None, people_fully_vaccinated=None, total_boosters=None)[source]#

cowidev.vax.utils.incremental._check_fields(location, source_url, vaccine, date, total_vaccinations, people_vaccinated, people_partly_vaccinated, people_fully_vaccinated, total_boosters)[source]#

cowidev.vax.utils.incremental._from_gh_to_scripts(location)[source]#

cowidev.vax.utils.incremental._increment(filepath, location, total_vaccinations, date, vaccine, source_url, people_vaccinated=None, people_partly_vaccinated=None, people_fully_vaccinated=None, total_boosters=None)[source]#

cowidev.vax.utils.incremental.enrich_data(ds: Series, row, value) → Series[source]#

cowidev.vax.utils.incremental.increment(location, total_vaccinations, date, vaccine, source_url, people_vaccinated=None, people_partly_vaccinated=None, people_fully_vaccinated=None, total_boosters=None, make_series_monotonic=False)[source]#

cowidev.vax.utils.incremental.merge_with_current_data(df: DataFrame, filepath: str) → DataFrame[source]#

cowidev.vax.utils.utils#

cowidev.vax.utils.utils.add_latest_who_values(df: DataFrame, who_location_name: str, metrics: list)[source]#: Inserts the latest data available from the WHO vaccination dataset into the existing dataframe. metrics: list of metrics to be used from the WHO dataset. Other metrics that aren’t listed will be automatically set to pd.NA for this specific row.

cowidev.vax.utils.utils.build_vaccine_timeline(df: DataFrame, vaccine_timeline: dict) → DataFrame[source]#

Add vaccines to the data based on admin. starting date.

vaccine_timeline: dictionary of “vaccine” -> “start_date”

Example:

{: “Pfizer/BioNTech”: “2021-02-24”, “Sinovac”: “2021-03-03”, “Oxford/AstraZeneca”: “2021-05-03”, “CanSino”: “2021-05-09”, “Sinopharm”: “2021-09-18”,

}

cowidev.vax.utils.utils.get_latest_file(path, extension)[source]#

cowidev.vax.utils.utils.make_monotonic(df: DataFrame, max_removed_rows=10, new_version=False) → DataFrame[source]#