[docs]defread(self)->pd.DataFrame:"""Reads data from the source page."""soup=get_soup(self.source_url_ref)data=self._parse_data(soup)returndata
[docs]def_parse_data(self,soup:BeautifulSoup)->pd.DataFrame:"""Gets data from the source page."""# Get relevant elementelem=self._get_relevant_element(soup)# Extract text from elementtext=self._get_text_from_element(elem)# Extract data from textdata=self._parse_metrics(text)returndata
[docs]def_get_relevant_element(self,soup:BeautifulSoup)->element.Tag:"""Gets the relevant element."""elem=soup.find("script",text=re.compile(self.regex["script"]))ifnotelem:raiseValueError("No element found, please update the script")returnelem
[docs]def_get_text_from_element(self,elem:element.Tag)->str:"""Extracts text from the element."""text=re.sub(r"\s+"," ",str(elem))returntext
[docs]def_parse_metrics(self,text:str)->pd.DataFrame:"""Get metrics from text."""df_pcr=self._df_builder("pcr",text)df_tma=self._df_builder("tma",text)df=pd.merge(df_pcr,df_tma)returndf
[docs]def_df_builder(self,regex_key:str,text:str)->pd.DataFrame:"""Builds Dataframe"""match=re.search(self.regex[regex_key],text)ifnotmatch:raiseValueError("No match found, please update the regex")df=pd.DataFrame([json.loads(match.group(1)),json.loads(match.group(2))],index=["Date",f"{regex_key}"]).Treturndf
[docs]defpipe_date(self,df:pd.DataFrame)->pd.DataFrame:"""Pipes date column."""returndf.assign(Date=clean_date_series(df.Date,"%d/%m/%y"))
[docs]defpipe_correct_dp(self,df:pd.DataFrame):"""Pipes the replacement data point."""date="2021-03-22"correct_dp=164665df.loc[df.Date==date,"Cumulative total"]=correct_dpreturndf
[docs]defpipeline(self,df:pd.DataFrame)->pd.DataFrame:"""Pipeline for data."""return(df.pipe(self.pipe_date).pipe(self.pipe_metrics).pipe(self.pipe_correct_dp).pipe(self.pipe_metadata).sort_values("Date"))
[docs]defexport(self):"""Exports data to CSV."""df=self.read().pipe(self.pipeline)self.export_datafile(df)