Source code for cowidev.utils.annotations
import yaml
import pandas as pd
[docs]class AnnotatorInternal:
"""Adds annotations column.
Uses attribute `config` to add annotations. Its format should be as:
.. code-block::
{
"vaccinations": [{
'annotation_text': 'Data for China added on Jun 10',
'location': ['World', 'Asia', 'Upper middle income'],
'date': '2020-06-10'
}],
"case-tests": [{
'annotation_text': 'something',
'location': ['World', 'Asia', 'Upper middle income'],
'date': '2020-06-11'
}],
}
```
Keys in config should match those in `internal_files_columns`.
"""
def __init__(self, config: dict):
self.config = config
[docs] @classmethod
def from_yaml(cls, path):
with open(path, "r") as f:
dix = yaml.safe_load(f)
return cls(dix)
@property
def streams(self):
return list(self.config.keys())
[docs] def add_annotations(self, df: pd.DataFrame, stream: str) -> pd.DataFrame:
if stream in self.streams:
print(f"Adding annotation for {stream}")
return self._add_annotations(df, stream)
return df
[docs] def _add_annotations(self, df: pd.DataFrame, stream: str) -> pd.DataFrame:
df = df.assign(annotations=pd.NA)
conf = self.config[stream]
for c in conf:
if not ("location" in c and "annotation_text" in c):
raise ValueError(
f"Missing field in {stream} (`location` and `annotation_text` are required)."
)
if isinstance(c["location"], str):
mask = df.location == c["location"]
elif isinstance(c["location"], list):
mask = df.location.isin(c["location"])
if "date" in c:
mask = mask & (df.date >= c["date"])
df.loc[mask, "annotations"] = c["annotation_text"]
return df