Source code for cowidev.vax.incremental.africacdc

from datetime import datetime

import pandas as pd

from cowidev.utils.clean import clean_date
from cowidev.utils.web import request_json
from cowidev.vax.utils.incremental import increment
from cowidev.vax.utils.orgs import WHO_VACCINES, ACDC_COUNTRIES, ACDC_VACCINES


[docs] class AfricaCDC: _base_url = ( "https://services8.arcgis.com/vWozsma9VzGndzx7/ArcGIS/rest/services/" "Admin_Boundaries_Africa_corr_Go_Vaccine_DB_JOIN/FeatureServer/0" ) source_url_ref = "https://africacdc.org/covid-19-vaccination/" columns_rename = { "ADM0_SOVRN": "location", "TotAmtAdmi": "total_vaccinations", "FullyVacc": "people_fully_vaccinated", "VacAd1Dose": "people_vaccinated", "Booster": "total_boosters", } columns_use = list(columns_rename.keys()) + [ "ISO_3_CODE", "VacAd2Dose", "VaccApprov", ] def __init__(self, skip_who: bool = False) -> None: self.skip_who = skip_who @property def source_url(self): return f"{self._base_url}/query?f=json&where=1=1&outFields=*" @property def source_url_date(self): return f"{self._base_url}?f=pjson"
[docs] def read(self) -> pd.DataFrame: data = request_json(self.source_url) res = [d["attributes"] for d in data["features"]] df = pd.DataFrame(res) return df
[docs] def pipe_filter_columns(self, df: pd.DataFrame) -> pd.DataFrame: return df[self.columns_use]
[docs] def pipe_rename(self, df: pd.DataFrame) -> pd.DataFrame: return df.rename(columns=self.columns_rename)
[docs] def pipe_filter_countries(self, df: pd.DataFrame, countries: dict) -> pd.DataFrame: """Get rows from selected countries.""" df = df[df.location.isin(countries.keys())] df = df.assign(location=df.location.replace(countries)) return df
[docs] def pipe_one_dose_correction(self, df: pd.DataFrame) -> pd.DataFrame: single_shot = df.people_fully_vaccinated - df.VacAd2Dose return df.assign(people_vaccinated=df.people_vaccinated + single_shot)
[docs] def pipe_vaccine(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(vaccine=df.VaccApprov.apply(self._map_vaccines))
[docs] def _map_vaccines(self, vaccine_raw: str): vaccine_raw = vaccine_raw.strip() vaccines = [] for vax_old, vax_new in ACDC_VACCINES.items(): if vax_old in vaccine_raw: vaccines.append(vax_new) vaccine_raw = vaccine_raw.replace(vax_old, "").strip() if vaccine_raw == "": break if vaccine_raw != "": raise ValueError(f"Some vaccines were unknown {vaccine_raw}") vaccines = ", ".join(sorted(vaccines)) return vaccines
[docs] def pipe_vaccine_who(self, df: pd.DataFrame) -> pd.DataFrame: if self.skip_who: return df url = "https://covid19.who.int/who-data/vaccination-data.csv" df_who = pd.read_csv(url, usecols=["ISO3", "VACCINES_USED"]).rename(columns={"VACCINES_USED": "vaccine"}) df_who = df_who.dropna(subset=["vaccine"]) df = df.merge(df_who, left_on="ISO_3_CODE", right_on="ISO3") df = df.assign( vaccine=df.vaccine.apply(lambda x: ", ".join(sorted(set(WHO_VACCINES[xx.strip()] for xx in x.split(","))))) ) return df
[docs] def pipe_source(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(source_url=self.source_url_ref)
[docs] def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(date=self._parse_date())
[docs] def _parse_date(self): res = request_json(self.source_url_date) edit_ts = res["editingInfo"]["lastEditDate"] return clean_date(datetime.fromtimestamp(edit_ts / 1000))
[docs] def pipe_select_out_cols(self, df: pd.DataFrame) -> pd.DataFrame: cols = [ "location", "date", "source_url", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters", ] if not self.skip_who: cols += ["vaccine"] return df[cols]
[docs] def pipe_exclude_observations(self, df: pd.DataFrame) -> pd.DataFrame: # Exclude observations where people_fully_vaccinated == 0, as they always seem to be # data errors rather than countries without any full vaccination. df = df[df.people_fully_vaccinated > 0] # Exclude observations where people_fully_vaccinated > people_vaccinated df = df[df.people_fully_vaccinated <= df.people_vaccinated] return df
[docs] def pipeline(self, df: pd.DataFrame, countries: dict = ACDC_COUNTRIES, exclude=True) -> pd.DataFrame: df = ( df.pipe(self.pipe_filter_columns) .pipe(self.pipe_rename) .pipe(self.pipe_filter_countries, countries) .pipe(self.pipe_one_dose_correction) .pipe(self.pipe_vaccine_who) .pipe(self.pipe_source) .pipe(self.pipe_date) .pipe(self.pipe_select_out_cols) ) if exclude: df = df.pipe(self.pipe_exclude_observations) return df
[docs] def increment_countries(self, df: pd.DataFrame): for row in df.sort_values("location").iterrows(): row = row[1] increment( location=row["location"], total_vaccinations=row["total_vaccinations"], people_vaccinated=row["people_vaccinated"], people_fully_vaccinated=row["people_fully_vaccinated"], total_boosters=row["total_boosters"], date=row["date"], vaccine=row["vaccine"], source_url=row["source_url"], ) country = row["location"]
# logger.info(f"\tvax.incremental.africacdc.{country}: SUCCESS ✅")
[docs] def export(self): df = self.read().pipe(self.pipeline) self.increment_countries(df)
[docs] def main(): AfricaCDC().export()