cowidev.utils#

cowidev.utils.annotations#

class cowidev.utils.annotations.AnnotatorInternal(config: dict)[source]#

Bases: object

Adds annotations column.

Uses attribute config to add annotations. Its format should be as:

{
    "vaccinations": [{
        'annotation_text': 'Data for China added on Jun 10',
        'location': ['World', 'Asia', 'Upper middle income'],
        'date': '2020-06-10'
    }],
    "case-tests": [{
        'annotation_text': 'something',
        'location': ['World', 'Asia', 'Upper middle income'],
        'date': '2020-06-11'
    }],
}

```

Keys in config should match those in internal_files_columns.

_add_annotations(df: DataFrame, stream: str) → DataFrame[source]#

add_annotations(df: DataFrame, stream: str) → DataFrame[source]#

classmethod from_yaml(path)[source]#

property streams#

cowidev.utils.exceptions#

exception cowidev.utils.exceptions.ConfigFileError[source]#: Bases: Exception

exception cowidev.utils.exceptions.EnvironmentError[source]#: Bases: Exception

exception cowidev.utils.exceptions.SecretsFileError[source]#: Bases: Exception

cowidev.utils.io#

cowidev.utils.io.extract_zip(input_path, output_folder, **kwargs)[source]#

cowidev.utils.log#

cowidev.utils.log.get_logger(mode='info')[source]#

cowidev.utils.log.normalize_country_name(country_name: str)[source]#

cowidev.utils.log.print_eoe()[source]#

cowidev.utils.log.system_details()[source]#

cowidev.utils.paths#

cowidev.utils.paths.CONFIG_DIR = '/home/docs/.config/owid'#: Where temporary files are stored.

cowidev.utils.paths.CONFIG_FILE = '/home/docs/checkouts/readthedocs.org/user_builds/owidcovid-19-data/checkouts/latest/scripts/config.yaml'#: YAML with pipeline & execution configuration. Obtained from env var $OWID_COVID_CONFIG.

cowidev.utils.paths.SECRETS_FILE = '\'""\''#: YAML with secrets, links and credentials. Not shared publicly. Obtained from env var $OWID_COVID_SECRETS.

cowidev.utils.paths._get_project_dir_from_env(err: bool = False)[source]#

cowidev.utils.paths.out_vax(country: str, public=False, age=False, manufacturer=False, proposal=False)[source]#

cowidev.utils.s3#

Most logic from: https://github.com/owid/walden/blob/master/owid/walden/owid_cache.py

class cowidev.utils.s3.S3(profile_name='default')[source]#

Bases: object

check_for_default_profile()[source]#

connect(profile_name='default')[source]#: Return a connection to Walden’s DigitalOcean space.

download_from_s3(s3_path: Union[str, list], local_path: Union[str, list]) → Optional[str][source]#

Download file from S3.

Parameters:

s3_path (Union[str, list]) – File location to load object from.
local_path (Union[str, list]) – Path where to save file locally.

get_metadata(s3_path)[source]#

Get metadata from file s3_path

Parameters:: s3_path (str) – Path to S3 file.
Returns:: Metadata
Return type:: dict

obj_from_s3(s3_path, **kwargs)[source]#

Load object from s3 location.

Parameters:: s3_path (str) – File location to load object from.
Returns:: File loaded as object. Currently JSON -> dict, CSV/XLS/XLSV -> pd.DataFrame, general -> str
Return type:: object

obj_to_s3(obj, s3_path, public=False, **kwargs)[source]#

Upload an object to S3, as a file.

Parameters:

obj (object) – Object to upload to S3. Currently: - dict -> JSON - str -> text - DataFrame -> CSV/XLSX/XLS/ZIP depending on s3_path value.
s3_path (srt) – Object S3 file destination.
public (bool, optional) – Set to True if file is to be publicly accessed. Defaults to False.

Raises:

ValueError – If file format is not supported.

spaces_endpoint = 'https://nyc3.digitaloceanspaces.com'#

upload_to_s3(local_path: Union[str, list], s3_path: Union[str, list], public: bool = False) → Optional[str][source]#

Upload file to Walden.

Parameters:

local_path (Union[str, list]) – Local path to file. It can be a list of paths, should match s3_file’s length.
s3_path (Union[str, list]) – File location to load object from. It can be a list of paths, should match local_path’s length.
public (bool) – Set to True to expose the file to the public (read only). Defaults to False.

exception cowidev.utils.s3.UploadError[source]#: Bases: Exception

cowidev.utils.s3._check_s3_local_files(local_file, s3_path)[source]#

cowidev.utils.s3._url_to_path_and_bucket(s3_path)[source]#: Check if S3 path format is correct

cowidev.utils.s3._url_to_path_and_bucket_mult(s3_path)[source]#

cowidev.utils.s3.df_from_s3(s3_path: Union[str, list], **kwargs) → Optional[str][source]#: Deprecated. Use obj_from_s3 instead

cowidev.utils.s3.df_to_s3(df: DataFrame, s3_path: Optional[str] = None, public: bool = False, **kwargs) → Optional[str][source]#: Deprecated. Use obj_to_s3 instead

cowidev.utils.s3.dict_from_s3(s3_path: Union[str, list], **kwargs) → dict[source]#: Deprecated. Use obj_from_s3 instead

cowidev.utils.s3.dict_to_s3(data: dict, s3_path: Optional[str] = None, public: bool = False, **kwargs) → Optional[str][source]#: Deprecated. Use obj_to_s3 instead

cowidev.utils.s3.obj_from_s3(s3_path: Union[str, list], **kwargs) → dict[source]#

cowidev.utils.s3.obj_to_s3(data: dict, s3_path: Optional[str] = None, public: bool = False, **kwargs) → Optional[str][source]#

cowidev.utils.s3.str_to_s3(text: str, s3_path: Optional[str] = None, public: bool = False, **kwargs) → Optional[str][source]#: Deprecated. Use obj_to_s3 instead

cowidev.utils.slackapi#

class cowidev.utils.slackapi.SlackAPI[source]#

Bases: object

_load_client()[source]#

_send_msg(channel, title, message_color, title_header=None, message='', trace=None)[source]#

send_error(channel, title, message='', trace=None)[source]#

send_success(channel, title, message='', trace=None)[source]#

send_warning(channel, title, message='', trace=None)[source]#

cowidev.utils.utils#

cowidev.utils.utils.check_known_columns(df: DataFrame, known_cols: list) → None[source]#

cowidev.utils.utils.dict_to_compact_json(d: dict)[source]#: Encodes a Python dict into valid, minified JSON.

cowidev.utils.utils.export_timestamp(timestamp_filename: str, force_directory: Optional[str] = None, timestamp=None)[source]#

cowidev.utils.utils.get_filename(filepath: str, remove_extension: bool = True)[source]#

cowidev.utils.utils.get_traceback(e)[source]#

cowidev.utils.utils.make_monotonic(df: DataFrame, column_date: str, column_metrics: list, max_removed_rows=10, strict=False, new=False) → DataFrame[source]#

cowidev.utils.utils.make_monotonic_new(df: DataFrame, column_date: str, column_metrics: list, max_removed_rows_per_chunk=10) → DataFrame[source]#

cowidev.utils.utils.pd_series_diff_values(a, b)[source]#

cowidev.utils.utils.series_monotonic(ds)[source]#

cowidev.utils.utils.time_str_grapher()[source]#

cowidev.utils.utils.xlsx2csv(filename_xlsx: str, filename_csv: str)[source]#

cowidev.utils.clean_count(count)[source]#

cowidev.utils.clean_date(date_or_text: Union[str, datetime, date], fmt: Optional[str] = None, lang: str = 'en', loc: str = '', minus_days: int = 0, unicode_norm: bool = True, output_fmt: str = '%Y-%m-%d', as_datetime: bool = False)[source]#

Extract a date from a text.

The date from text is extracted using locale loc. Alternatively, you can provide language lang instead.

By default, system default locale is used.

Parameters:

date_or_text (Union[str, datetime, date]) – Input text or date.
fmt (str, optional) – Text format. More details at https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes.
lang (str, optional) – Language two-letter code, e.g. ‘da’ (dansk). If given, loc will be ignored and redefined based on lang. Defaults to None.
loc (str, optional) – Locale, e.g es_ES. Get list of available locales with locale.locale_alias or locale.windows_locale in windows. Defaults to “” (system default).
minus_days (int, optional) – Number of days to subtract. Defaults to 0.
unicode_norm (bool, optional) – [description]. Defaults to True.
output_fmt (str, optional) – Format of the output date. By default, uses DATE_FORMAT.
as_datetime (bool, optional) – Set to True to return the date as a datetime.

Returns:

Extracted date in format %Y-%m-%d

Return type:

str

cowidev.utils.clean_date_series(ds: Union[Series, list], format_input: Optional[str] = None, format_output: str = '%Y-%m-%d', as_datetime: bool = False, **kwargs) → Union[Series, list][source]#

cowidev.utils.get_soup(source: str, from_encoding: Optional[str] = None, parser='lxml', request_method: str = 'get', use_proxy: bool = False, **kwargs) → BeautifulSoup[source]#

Get soup from website.

Parameters:

source (str) – Website url.
from_encoding (str, optional) – Encoding to use. Defaults to None.
parser (str, optional) – HTML parser. Read https://www.crummy.com/software/BeautifulSoup/bs4/doc/ #installing-a-parser. Defaults to ‘lxml’.
request_method (str, optional) – Request method. Options are ‘get’ and ‘post’. Defaults to GET method. For POST method, make sure to specify a header (default one does not work).
use_proxy (bool) –
kwargs (dict) – Extra arguments passed to requests.get method. Default values for headers, verify and timeout are used.

Returns:

Website soup.

Return type:

BeautifulSoup