I have install Airflow on a ubuntu 20.04 in wsl window10 and anaconda
I would an ETL process with parameters. with API workflow.
But the first task failed.
So do you have an explain ? or a solution. Thanks lot.
import json
import pandas as pd
from airflow.decorators import dag, task
from airflow.utils.dates import days_ago
These args will get passed on to each operator
You can override them on a per-task basis during operator initialization
default_args = {
‘owner’: ‘airflow’,
}
path = ‘/mnt/c/data/’
file = ‘cpta_cr.csv’
out = ‘output.xlsx’
@dag(default_args=default_args, schedule_interval=None, start_date=days_ago(2))
def tutorial_taskflow_api_etl():
@task()
def extract(path : str, file : str):
df = pd.read_csv(path+file,sep='|',encoding='Windows-1252')
return df
@task()
def load(path : stri,df:pd.DataFrame, out : str):
path = '/mnt/c/data/'
out = 'output.xlsx'
df.to_excel(path+out)
oplus_data = extract(path,file)
load(path,oplus_data,out)
tutorial_etl_dag = tutorial_taskflow_api_etl()