我得到了这样一个表:
<表类>
group_id
start_date
end_date
tbody><<tr>19335 20220613 20220714 19527 20220620 20220719 19339 20220614 20220720 19436 20220616 20220715 20095 20220711 20220809 表类>
我能想到两种方法。
- 连接所有表,然后执行查询。
- 为每个用户创建动态查询
两种方法都需要search_from和search_to事先可用,即你需要在做任何事情之前计算每个用户的搜索范围。
,
WITH users_per_group AS (
SELECT
user_id, group_id
,DATE_SUB(parse_date("%Y%m%d", start_date), INTERVAL 4 DAY)search_from
,DATE_SUB(parse_date("%Y%m%d", start_date), INTERVAL 1 DAY)search_to
FROM TableName
)
一旦你有了这样的表,你就可以使用上述任何一种方法。
因为我没有你的数据,不知道你的表名,我给了一个例子,使用公共数据集方式1。
-- consider this your main table which contains user,grp,start_date,end_date
with maintable as (
select 'India' visit_from, '20161115' as start_date, '20161202' end_date
union all select 'Sweden' , '20161201', '20161202'
),
--then calculate search from-to date for every user and group
user_per_grp as(
select *, DATE_SUB(parse_date("%Y%m%d", start_date), INTERVAL 4 DAY)search_from --change interval as per your need
,DATE_SUB(parse_date("%Y%m%d", start_date), INTERVAL 1 DAY)search_to
from maintable
)
select visit_from,_TABLE_SUFFIX date,count(visitId) total_visits from
user_per_grp ug
left join `bigquery-public-data.google_analytics_sample.ga_sessions_*` as pub on pub.geoNetwork.country = ug.visit_from
where _TABLE_SUFFIX between format_date("%Y%m%d",ug.search_from) and format_date("%Y%m%d",ug.search_to)
group by 1,2
方法2
declare queries array<string> default [];
create temp table maintable as (
select 'India' visit_from, '20161115' as start_date, '20161202' end_date
union all select 'Sweden' , '20161201', '20161202'
);
create temp table user_per_grp as(
select *, DATE_SUB(parse_date("%Y%m%d", start_date), INTERVAL 4 DAY)search_from
,DATE_SUB(parse_date("%Y%m%d", start_date), INTERVAL 1 DAY)search_to
from maintable
);
-- for each user create a seperate query here
FOR record IN (SELECT * from user_per_grp)
DO
set queries = queries || [format('select "%s" Visit_From,_TABLE_SUFFIX Date,count(visitId) total_visits from `bigquery-public-data.google_analytics_sample.ga_sessions_*` where _TABLE_SUFFIX between format_date("%%Y%%m%%d","%t") and format_date("%%Y%%m%%d","%t") and geoNetwork.country="%s" group by 1,2',record.visit_from,record.search_from,record.search_to,record.visit_from)];
--replace your query here.
END FOR;
--aggregating all the queries and executing it
execute immediate (select string_agg(query, ' union all ') from unnest(queries) query);
这里,第二种方法处理的数据(~750 KB)比第一种方法(~17 MB)少得多。但对于你的数据集来说,这可能不一样,因为两个用户的日期范围可能重叠,这将导致读取同一个表两次。