Source code for cowidev.vax.manual.twitter.paraguay

import re

import pandas as pd

from cowidev.utils.clean import clean_count, clean_date
from cowidev.vax.manual.twitter.base import TwitterCollectorBase


[docs]class Paraguay(TwitterCollectorBase):
    def __init__(self, api, **kwargs):
        super().__init__(api=api, username="msaludpy", location="Paraguay", add_metrics_nan=True, **kwargs)

[docs]    def _propose_df(self):
        regex = r"VACUNACIÓN #COVID19 \| Reporte del (\d{1,2}\.\d{1,2}\.202\d) - \d{1,2}:\d{1,2}"
        data = []
        for tweet in self.tweets:
            match = re.search(regex, tweet.full_text)
            if match:
                regex_doses = r"Total Dosis Administradas: ([\d\.]+)"
                total_vaccinations = re.search(regex_doses, tweet.full_text)
                if total_vaccinations:
                    total_vaccinations = clean_count(total_vaccinations.group(1))
                else:
                    total_vaccinations = pd.NA
                regex_people = r"Total personas vacunadas: ([\d\.]+)"
                people_vaccinated = re.search(regex_people, tweet.full_text)
                if people_vaccinated:
                    people_vaccinated = clean_count(people_vaccinated.group(1))
                else:
                    people_vaccinated = pd.NA
                people_fully_vaccinated = total_vaccinations - people_vaccinated
                dt = clean_date(match.group(1), "%d.%m.%Y")
                if self.stop_search(dt):
                    break
                data.append(
                    {
                        "date": dt,
                        "total_vaccinations": total_vaccinations,
                        "people_vaccinated": people_vaccinated,
                        "people_fully_vaccinated": people_fully_vaccinated,
                        "text": tweet.full_text,
                        "source_url": 1,  # pan.build_post_url(tweet.id),
                        "media_url": tweet.extended_entities["media"][0]["media_url_https"],
                    }
                )
        df = pd.DataFrame(data)
        return df


[docs]def main(api):
    Paraguay(api).to_csv()