Source code for cowidev.vax.manual.twitter.paraguay

import re

import pandas as pd

from cowidev.utils.clean import clean_count, clean_date
from cowidev.vax.manual.twitter.base import TwitterCollectorBase


[docs]class Paraguay(TwitterCollectorBase): def __init__(self, api, **kwargs): super().__init__(api=api, username="msaludpy", location="Paraguay", add_metrics_nan=True, **kwargs)
[docs] def _propose_df(self): regex = r"VACUNACIÓN #COVID19 \| Reporte del (\d{1,2}\.\d{1,2}\.202\d) - \d{1,2}:\d{1,2}" data = [] for tweet in self.tweets: match = re.search(regex, tweet.full_text) if match: regex_doses = r"Total Dosis Administradas: ([\d\.]+)" total_vaccinations = re.search(regex_doses, tweet.full_text) if total_vaccinations: total_vaccinations = clean_count(total_vaccinations.group(1)) else: total_vaccinations = pd.NA regex_people = r"Total personas vacunadas: ([\d\.]+)" people_vaccinated = re.search(regex_people, tweet.full_text) if people_vaccinated: people_vaccinated = clean_count(people_vaccinated.group(1)) else: people_vaccinated = pd.NA people_fully_vaccinated = total_vaccinations - people_vaccinated dt = clean_date(match.group(1), "%d.%m.%Y") if self.stop_search(dt): break data.append( { "date": dt, "total_vaccinations": total_vaccinations, "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, "text": tweet.full_text, "source_url": 1, # pan.build_post_url(tweet.id), "media_url": tweet.extended_entities["media"][0]["media_url_https"], } ) df = pd.DataFrame(data) return df
[docs]def main(api): Paraguay(api).to_csv()