Hello, I have trouble refreshing the dataset. It used to take a lot of time to refresh, about over 45 mins and now it only failed to refresh. However I already tried by best by dividing into 2 tables fact table and dim table to increase its speed. I don’t know i could i do else to fix this. Thanks in advance!
And this is my SQL query:
with cte1 as (
select *
from LandingZone_Datalake.affiliate.publishers
where _at_site_no <> ‘’ and status <> -1
),
iv as (select * from LandingZone_Datalake.koc.influencer_viral_data where viral_property_id in (21,6,14,17)),
icc as (select * from LandingZone_Datalake.koc.influencer_channels),
ic as (select * from LandingZone_Datalake.koc.influencer_category),
clicks_summary as (
select publisher_name, campaign_id, dt, count_clicks, qualified_click
from LandingZone_Datalake.affiliate.clicks_summary_temp t1
union
select publisher_name, campaign_id, dt, count_clicks, qualified_click
from LandingZone_Datalake.affiliate.clicks_summary t2
where concat(publisher_name, campaign_id, dt) not in (select concat(publisher_name, campaign_id, dt) from LandingZone_Datalake.affiliate.clicks_summary_temp) and t2.dt >= ‘2021-01-01’)
select u.id as influencer_id,
u.login_name,
i.full_name,
u.email,
i.description as i_description,
ic.performance_category_id,
icc.viral_name,
icc.viral_url,
iv.value as follower,
icc.description as c_description,
cte1._at_site_no,
cte1.follower as publishers_follower,
cast(cte1.ctime as date) as publishers_ctime,
t.publisher as transactions_publisher,
t.campaign_id as transactions_campaign_id,
cast(t.sales_time as date) as transaction_sales_date,
sum(at_commission) as sum_occ_at_com,
sum(pub_commission) as summ_occ_pub_com,
sum(case when t.status = 1 then at_commission end) as sum_app_at_com,
sum(case when t.status = 1 then pub_commission end) as sum_app_pub_com,
count(seq_no) as conversion,
count(case when t.status = 1 then seq_no end) as occ_conversion,
count(distinct ref_id) as occ_order,
count(distinct case when t.status = 1 then ref_id end) as app_order,
sum(amount) as occ_amount,
sum(case when t.status = 1 then amount end) as app_amount,
null as count_clicks
from LandingZone_Datalake.koc.users u
left join LandingZone_Datalake.koc.influencers i on i.user_id = u.id
left join cte1 on u.login_name = cte1.login_name
left join LandingZone_Datalake.affiliate.transactions t on u.login_name = t.publisher
left join ic on i.id = ic.influencer_id
left join icc on i.id = icc.influencer_id
left join iv on icc.id = iv.influencer_channel_id
where t.dt >= ‘2022-01-01’
group by u.id,
u.login_name,
i.full_name,
u.email,
i.description,
ic.performance_category_id,
icc.viral_name,
icc.viral_url,
iv.value,
icc.description,
cte1._at_site_no,
cte1.follower,
cast(cte1.ctime as date),
t.publisher,
t.campaign_id,
cast(t.sales_time as date)
UNION
select u.id as influencer_id,
u.login_name,
i.full_name,
u.email,
i.description as i_description,
ic.performance_category_id,
icc.viral_name,
icc.viral_url,
iv.value as follower,
icc.description as c_description,
cte1._at_site_no,
cte1.follower,
cast(cte1.ctime as date) as publishers_ctime,
cs.publisher_name as clicks_publisher,
cs.campaign_id as clicks_campaign_id,
date_parse(cs.dt, ‘%Y-%m-%d’) as click_time,
null as sum_occ_at_com,
null as summ_occ_pub_com,
null as sum_app_at_com,
null as sum_app_pub_com,
null as conversion,
null as occ_conversion,
null as occ_order,
null as app_order,
null as occ_amount,
null as app_amount,
count_clicks
from LandingZone_Datalake.koc.users u
left join LandingZone_Datalake.koc.influencers i on i.user_id = u.id
left join cte1 on u.login_name = cte1.login_name
left join clicks_summary cs on u.login_name = cs.publisher_name
left join ic on i.id = ic.influencer_id
left join icc on i.id = icc.influencer_id
left join iv on icc.id = iv.influencer_channel_id