from datetime import datetime, timedelta, date
from contextlib import contextmanager
import locale
import threading
from sys import platform
from typing import Union
import pytz
import re
import pandas as pd
import epiweeks
from cowidev.utils.clean.strings import clean_string
LOCALE_LOCK = threading.Lock()
DEFAULT_LOCALE = "C" # "en_US.ISO8859-1"
DATE_FORMAT = "%Y-%m-%d"
[docs]def week_to_date(year: int, week: int, output_fmt: str = DATE_FORMAT):
week = epiweeks.Week(year, week)
dt = week.enddate()
return clean_date(dt, output_fmt=output_fmt)
[docs]def clean_date(
date_or_text: Union[str, datetime, date],
fmt: str = None,
lang: str = "en",
loc: str = "",
minus_days: int = 0,
unicode_norm: bool = True,
output_fmt: str = DATE_FORMAT,
as_datetime: bool = False,
):
"""Extract a date from a `text`.
The date from text is extracted using locale `loc`. Alternatively, you can provide language `lang` instead.
By default, system default locale is used.
Args:
date_or_text (Union[str, datetime, date]): Input text or date.
fmt (str, optional): Text format. More details at
https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes.
lang (str, optional): Language two-letter code, e.g. 'da' (dansk). If given, `loc` will be ignored and redefined
based on `lang`. Defaults to None.
loc (str, optional): Locale, e.g es_ES. Get list of available locales with `locale.locale_alias` or
`locale.windows_locale` in windows. Defaults to "" (system default).
minus_days (int, optional): Number of days to subtract. Defaults to 0.
unicode_norm (bool, optional): [description]. Defaults to True.
output_fmt (str, optional): Format of the output date. By default, uses `DATE_FORMAT`.
as_datetime (bool, optional): Set to True to return the date as a datetime.
Returns:
str: Extracted date in format %Y-%m-%d
"""
if isinstance(date_or_text, (datetime, date)):
return date_or_text.strftime(output_fmt)
# If lang is given, map language to a locale
if fmt is None:
raise ValueError("Input date format is required!")
if loc == "" and lang is not None:
if lang in locale.locale_alias:
loc = locale.locale_alias[lang]
if platform == "win32":
if loc is not None:
loc = loc.replace("_", "-")
# Unicode
if unicode_norm:
date_or_text = clean_string(date_or_text)
# Fix possible issues
date_or_text = date_or_text.replace("O", "0")
# Thread-safe extract date
with _setlocale(loc):
dt = datetime.strptime(date_or_text, fmt) - timedelta(days=minus_days)
if not as_datetime:
return dt.strftime(output_fmt)
return dt
[docs]def _replace_date_fields(date_raw: str, replace_fields: dict = {}, date_format: str = DATE_FORMAT):
"""Replace date field.
Args:
date_raw (str): Date raw in standard format %Y-%m-%d.
replace_fields (dict, optional): Fields to replace. Format should be: dict(field, value), e.g. {year: "2021"}.
date_format (str, optional): Date format of `date_raw`. Defaults to DATE_FORMAT.
Returns:
str: Modified date, in standard format %Y-%m-%d.
"""
dt = datetime.strptime(date_raw, date_format)
dt = dt.replace(**replace_fields)
return dt.strftime(DATE_FORMAT)
[docs]def list_timezones():
return pytz.all_timezones
[docs]def localdatenow(tz: str = "utc", **kwargs):
return localdate(tz, force_today=True, **kwargs)
[docs]def localdate(
tz: str = "utc",
force_today: bool = False,
hour_limit: int = None,
date_format: str = DATE_FORMAT,
plus_days: int = None,
as_datetime: bool = False,
minus_days: int = 0,
):
"""Get local date.
By default, gets date prior to execution.
Args:
tz (str, optional): Timezone name. Defaults to UTC.
force_today (bool, optional): If True, return today's date regardles of `hour_limit` value.
hour_limit (int, optional): If local time hour is lower than this, returned date is previous day.
Defaults to None.
date_format (str, optional): Format of output datetime. Uses default YYYY-mm-dd.
plus_days (int, optional): Number of days to add to local date.
as_datetime (bool, optional): Set to True to return the date as a datetime.
minus_days (int, optional): Number of days to subtract. Defaults to 0.
"""
if tz is None:
local_time = datetime.now()
else:
tz = pytz.timezone(tz)
local_time = datetime.now(tz=tz)
if (minus_days == 0) and (not force_today) and ((hour_limit is None) or (local_time.hour < hour_limit)):
local_time = local_time - timedelta(days=1)
local_time = local_time - timedelta(days=minus_days)
if plus_days:
local_time += timedelta(days=plus_days)
if as_datetime:
return local_time
return local_time.strftime(date_format)
[docs]def clean_date_series(
ds: Union[pd.Series, list],
format_input: str = None,
format_output: str = DATE_FORMAT,
as_datetime: bool = False,
**kwargs
) -> Union[pd.Series, list]:
is_list = isinstance(ds, list)
if format_output is None:
format_output = DATE_FORMAT
ds_new = pd.to_datetime(ds, format=format_input, **kwargs)
if is_list:
ds_new = pd.Series(ds_new)
if not as_datetime:
ds_new = ds_new.dt.strftime(format_output)
if is_list:
return ds_new.tolist()
return ds_new
[docs]@contextmanager
def _setlocale(name: str):
# REF: https://stackoverflow.com/questions/18593661/how-do-i-strftime-a-date-object-in-a-different-locale
# with LOCALE_LOCK:
# saved = locale.setlocale(locale.LC_TIME, DEFAULT_LOCALE)
# try:
# print("DEBUG -- try", name)
# yield locale.setlocale(locale.LC_TIME, name)
# finally:
# print("DEBUG -- finally", saved)
# locale.setlocale(locale.LC_TIME, saved)
with LOCALE_LOCK:
saved = locale.setlocale(locale.LC_ALL)
# print("DEBUG -- init", saved)
try:
# print("DEBUG -- try", name)
yield locale.setlocale(locale.LC_ALL, name)
finally:
# print("DEBUG -- finally", saved)
locale.setlocale(locale.LC_ALL, saved)
[docs]def from_tz_to_tz(dt: datetime, from_tz: str = "UTC", to_tz: str = None):
dt = dt.replace(tzinfo=pytz.timezone(from_tz))
dt = dt.astimezone(pytz.timezone(to_tz))
return dt