cowidev.megafile.export#

cowidev.megafile.export.annotations#

class cowidev.megafile.export.annotations.AnnotatorInternal(config: dict, logger=None)[source]#

Bases: object

Adds annotations column.

Uses attribute config to add annotations. Its format should be as:

{
    "vaccinations": [{
        'annotation_text': 'Data for China added on Jun 10',
        'location': ['World', 'Asia', 'Upper middle income'],
        'date': '2020-06-10'
    }],
    "case-tests": [{
        'annotation_text': 'something',
        'location': ['World', 'Asia', 'Upper middle income'],
        'date': '2020-06-11'
    }],
}

Keys in config should match those in internal_files_columns.

_add_annotations(df: DataFrame, stream: str) → DataFrame[source]#

_remove_config_duplicates()[source]#

add_annotations(df: DataFrame, stream: str) → DataFrame[source]#

property config#

config_flat_to_nested(df_config)[source]#

Converts flattened config dataframe to class instance format.

Parameters:: df_config (pd.DataFrame) – Flattened config.
Returns:: Dictionary with original data.
Return type:: dict

config_nested_to_flat(config)[source]#

Convert class attribute config to a flattened dataframe.

Each row in the dataframe contains [stream, annotation_text, location, date]. Essentially, what gets flattened is the location field, which originally contains a list of locations.

Parameters:: config (dict) – Dictionary with original class config.
Returns:: Table with config in a flatten version.
Return type:: pd.DataFrame

classmethod from_yaml(path, logger=None)[source]#

insert_annotation(stream: str, annotation: dict)[source]#

property streams#

to_yaml()[source]#

cowidev.megafile.export.annotations.add_annotations_countries_100_percentage(df, annotator)[source]#

cowidev.megafile.export.html#

cowidev.megafile.export.html.generate_htmls()[source]#

cowidev.megafile.export.html.pipe_vax_locations_to_html(df: DataFrame) → DataFrame[source]#

cowidev.megafile.export.internal#

cowidev.megafile.export.internal.add_fully_vaccinated_no_boosters(df)[source]#

cowidev.megafile.export.internal.add_partially_vaccinated(df: DataFrame, country_data: str)[source]#

cowidev.megafile.export.internal.add_total_vaccinations_no_boosters(df)[source]#

cowidev.megafile.export.internal.country_vax_data_partly(country_data)[source]#

cowidev.megafile.export.internal.create_internal(df: DataFrame, output_dir: str, annotations_path: str, country_data: str, logger)[source]#

cowidev.megafile.export.internal.df_to_columnar_json(complete_dataset, output_path)[source]#

Writes a columnar JSON version of the complete dataset.

NA values are dropped from the output.

In columnar JSON, the table headers are keys, and the values are lists of all cells for a column.

Example

{: “iso_code”: [“AFG”, “AFG”, … ], “date”: [“2020-03-01”, “2020-03-02”, … ]

}

cowidev.megafile.export.internal.fillna_boosters_till_valid(df)[source]#

cowidev.megafile.export.public#

cowidev.megafile.export.public.create_dataset(df, macro_variables, logger)[source]#: Export dataset as CSV, XLSX and JSON (complete time series).

cowidev.megafile.export.public.create_latest(df, logger)[source]#: Export dataset as CSV, XLSX and JSON (latest data points).

cowidev.megafile.export.public.df_to_dict(complete_dataset, static_columns, valid_json=False)[source]#: Writes a JSON version of the complete dataset, with the ISO code at the root. NA values are dropped from the output. Macro variables are normalized by appearing only once, at the root of each ISO code.

cowidev.megafile.export.public.df_to_json(complete_dataset, output_path, static_columns)[source]#: Writes a JSON version of the complete dataset, with the ISO code at the root. NA values are dropped from the output. Macro variables are normalized by appearing only once, at the root of each ISO code.

cowidev.megafile.export.readme#

cowidev.megafile.export.readme._generate_category_notes(df_notes, category)[source]#

cowidev.megafile.export.readme._generate_category_notes_top(df_notes, category)[source]#

cowidev.megafile.export.readme.generate_readme(readme_template: str, readme_output: str)[source]#

cowidev.megafile.export.readme.get_excluded_locations()[source]#

cowidev.megafile.export.readme.get_num_countries_by_iso(iso_code_colname, csv_filepath=None, df=None)[source]#

cowidev.megafile.export.readme.get_num_countries_by_location(csv_filepath, location_colname, low_memory=True)[source]#

cowidev.megafile.export.readme.get_num_countries_jhu(csv_filepath)[source]#

cowidev.megafile.export.readme.get_placeholder()[source]#

cowidev.megafile.export.readme.get_variable_section()[source]#

cowidev.megafile.export.readme.load_macro_df()[source]#

cowidev.megafile.export.status#

cowidev.megafile.export.status.generate_status(template: str, output: str)[source]#

cowidev.megafile.export.status.get_placeholder()[source]#

cowidev.megafile.export.status.load_status_get(path, path_ts)[source]#

cowidev.megafile.export.status.load_status_process(path, path_ts)[source]#

cowidev.megafile.export.create_dataset(df, macro_variables, logger)[source]#: Export dataset as CSV, XLSX and JSON (complete time series).

cowidev.megafile.export.create_internal(df: DataFrame, output_dir: str, annotations_path: str, country_data: str, logger)[source]#

cowidev.megafile.export.create_latest(df, logger)[source]#: Export dataset as CSV, XLSX and JSON (latest data points).

cowidev.megafile.export.generate_htmls()[source]#

cowidev.megafile.export.generate_readme(readme_template: str, readme_output: str)[source]#

cowidev.megafile.export.generate_status(template: str, output: str)[source]#