[docs]defload_query(query_filename:str,file_ext:str="json",to_str:bool=True):"""Load a query from a file in vax._static folder. Args: query_filename (str): Name of the query file. If no extension is provided, {query_filename}.{file_ext} will be loaded file_ext (str, optional): Extension of the file. Defaults to "json". Raises: FileNotFoundError: If no file is found ValueError: If non-supported format is provided Returns: dict: Loaded query as a string """filename_path=os.path.join(QUERIES_DIR,query_filename)ifnotos.path.isfile(filename_path):query_filename=f"{Path(query_filename).stem}.{file_ext}"filename_path=os.path.join(QUERIES_DIR,query_filename)ifnotos.path.isfile(f"{filename_path}"):raiseFileNotFoundError(f"File {filename_path} not found")iffile_ext=="json":withopen(filename_path)asf:data=json.load(f)else:raiseValueError("Only JSON format supported")ifto_str:returnstr(data)returndata
[docs]defload_data(data_filename:str,file_ext:str="csv"):"""Load a data from a file in vax._static folder. Args: data_filename (str): Name of the data file. If no extension is provided, {query_filename}.{file_ext} will be loaded file_ext (str, optional): Extension of the file. Defaults to "csv". Raises: FileNotFoundError: If no file is found ValueError: If non-supported format is provided Returns: dict: Loaded query as a string """filename_path=os.path.join(DATA_DIR,data_filename)ifnotos.path.isfile(filename_path):data_filename=f"{Path(data_filename).stem}.{file_ext}"filename_path=os.path.join(DATA_DIR,data_filename)ifnotos.path.isfile(f"{filename_path}"):raiseFileNotFoundError(f"File {filename_path} not found")iffile_ext=="csv":df=pd.read_csv(filename_path)else:raiseValueError("Only CSV format supported")returndf
[docs]defexport_metadata(df:pd.DataFrame,source_name:str,source_url:str,output_path:str):if"location"notindfor"date"notindf:raiseValueError("df must have columns `location` and `date`.")df=(df.sort_values("date")[["location","date"]].drop_duplicates(subset=["location"],keep="last").rename(columns={"date":"last_observation_date"}).assign(source_name=source_name,source_url=source_url,))assertpd.api.types.is_string_dtype(df.last_observation_date),"`date` is not of type string in the dataframe"ifos.path.isfile(output_path):df_current=pd.read_csv(output_path)df_current=df_current.loc[~df_current.location.isin(df.location)]df=pd.concat([df_current,df])df.sort_values("location")[["location","last_observation_date","source_name","source_url"]].to_csv(output_path,index=False)