让我们构建这个可视化:
这样的查询就可以了——这个查询获取/r/AmITheAsshole上的帖子计数,按最典型的判断顺序排列:
SELECT link_flair_text, COUNT(*) c
FROM `fh-bigquery.reddit_posts.2019_08`
WHERE subreddit = 'AmItheAsshole'
GROUP BY 1
ORDER BY 2 DESC
LIMIT 1000
要查询几个月的数据,您可以使用
*
上
FROM
:
SELECT link_flair_text, COUNT(*) c
FROM `fh-bigquery.reddit_posts.2019_*`
WHERE subreddit = 'AmItheAsshole'
GROUP BY 1
ORDER BY 2 DESC
LIMIT 1000
查看每月百分比的演变:
SELECT link_flair_text, ARRAY_AGG(STRUCT(month, ROUND(100*c/total_month,1) AS perc, c) ORDER BY month) AS arr
FROM (
SELECT *, SUM(c) OVER(PARTITION BY month) total_month
FROM (
SELECT link_flair_text, DATE(TIMESTAMP_TRUNC(TIMESTAMP_SECONDS(created_utc), MONTH)) month, COUNT(*) c
FROM `fh-bigquery.reddit_posts.2019_*`
WHERE subreddit = 'AmItheAsshole'
AND link_flair_text IS NOT NULL
GROUP BY 1,2
HAVING c > 100
)
)
GROUP BY 1
ORDER BY SUM(c) DESC
上面所示的viz的SQL:
CREATE OR REPLACE TABLE `temp.assholes`
AS
SELECT *, c/total_month AS perc
FROM (
SELECT *, SUM(c) OVER(PARTITION BY month) total_month, SUM(c) OVER(PARTITION BY link_flair_text) total_flair
FROM (
SELECT LOWER(link_flair_text) link_flair_text, DATE(TIMESTAMP_TRUNC(TIMESTAMP_SECONDS(created_utc), MONTH)) month, COUNT(*) c
FROM `fh-bigquery.reddit_posts.201*`
WHERE subreddit = 'AmItheAsshole'
AND link_flair_text IS NOT NULL
AND _table_suffix >= '8_03'
GROUP BY 1,2
)
)
WHERE c/total_month > 0.01
AND total_flair > 1000
ORDER BY month
如何获取2016年之前的数据?
几乎相同的查询,但它包含了一个不同的表,其中包含了2016年之前的所有历史内容:
WITH data AS (
SELECT * FROM `fh-bigquery.reddit_posts.201*` WHERE _table_suffix >= '5_12'
UNION ALL
SELECT * FROM `fh-bigquery.reddit_posts.full_corpus_201512`
)
SELECT *, c/total_month AS perc
FROM (
SELECT *, SUM(c) OVER(PARTITION BY month) total_month, SUM(c) OVER(PARTITION BY link_flair_text) total_flair
FROM (
SELECT LOWER(link_flair_text) link_flair_text, DATE(TIMESTAMP_TRUNC(TIMESTAMP_SECONDS(created_utc), MONTH)) month, COUNT(*) c
FROM data
WHERE subreddit = 'AmItheAsshole'
AND link_flair_text IS NOT NULL
GROUP BY 1,2
)
)
WHERE c/total_month > 0.01
AND total_flair > 1000
ORDER BY month