【无标题】数仓实战之亚马逊全球跨境电商分析平台
一、Sqoop框架
?--1. 驗證sqoop
cd /export/server/sqoop-1.4.7
bin/sqoop list-databases \
--connect jdbc:mysql://node1:3306/ \
--username root --password 123456
二、拉鏈表-Demo
-- 1、MySQL創建商品表
-- 創建數據庫?
CREATE DATABASE demo; -- 創建商品表?
create table if not exists demo.product(?
goods_id varchar(50), -- 商品編號?
goods_status varchar(50), -- 商品狀態?
createtime varchar(50), -- 商品創建時間?
modifytime varchar(50) -- 商品修改時間?
) ;?
--2、Hive-ODS層建表
-- 創建表
create database if not exists demo;
-- 創建ods層表
create table if not exists demo.ods_product(
? goods_id string, ? ? ? ?-- 商品編號
? goods_status string, ? ?-- 商品狀態
? createtime string, ? ? ?-- 商品創建時間
? modifytime string ? ? ? -- 商品修改時間
)
partitioned by (dt string) ? --按照天分區
row format delimited fields terminated by '\t';
--3、Hive dw層創建拉鏈表
-- 創建拉鏈表
create table if not exists demo.dw_product(
? goods_id string, ? ? ? ?-- 商品編號
? goods_status string, ? ?-- 商品狀態
? createtime string, ? ? ?-- 商品創建時間
? modifytime string, ? ? ? -- 商品修改時間
? dw_start_date string, ? -- 生效日期
? dw_end_date string ? ? ?-- 失效日期
)
row format delimited fields terminated by '\t';
--4、MySQL數據庫導入12月20日數據(4條數據)
insert into demo.product(goods_id, goods_status, createtime, modifytime) values
('001', '待審核', '2020-12-18', '2020-12-20'),
('002', '待售', '2020-12-19', '2020-12-20'),
('003', '在售', '2020-12-20', '2020-12-20'),
('004', '已刪除', '2020-12-15', '2020-12-20');
--5、使用sqoop進行全量同步MySQL數據到Hive ods層表,創建Hive分區,并導入mysql的數據
-- 創建分區
alter table demo.ods_product ?add ?partition (dt='2020-12-20');
--將mysql數據導入Hive
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/demo \
--username root \
--password 123456 \
--table product \
--m 1 ?\
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/demo.db/ods_product/dt=2020-12-20
--6 編寫SQL從ods導入dw當天最新的數據
-- 從ods層導入dw當天最新數據
insert overwrite table demo.dw_product
select
? goods_id, ? ? ? ? ? ? ? ?-- 商品編號
? goods_status, ? ? ? ? ? ?-- 商品狀態
? createtime, ? ? ? ? ? ? ?-- 商品創建時間
? modifytime, ? ? ? ? ? ? ?-- 商品修改時間
? modifytime as dw_start_date, ? ?-- 生效日期
? ?'9999-12-31' as dw_end_date ? ? -- 失效日期
from
? demo.ods_product
where
? dt = '2020-12-20';
--7、增量導入2019年12月21日數據- MySQL數據庫導入12月21日數據(6條數據)
--模擬更新數據
UPDATE demo.product SET goods_status = '待售', modifytime = '2020-12-21' WHERE goods_id = '001';
INSERT INTO demo.product(goods_id, goods_status, createtime, modifytime) VALUES
('005', '待審核', '2020-12-21', '2020-12-21'),
('006', '待審核', '2020-12-21', '2020-12-21');
--8、使用sqoop開發增量同步MySQL數據到Hive ods層表
--Hive創建分區
alter table demo.ods_product add ?partition (dt='2020-12-21');
--增量導入
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/demo \
--username root \
--password 123456 \
--target-dir /user/hive/warehouse/demo.db/ods_product/dt=2020-12-21 \
--query "select * from product where modifytime = '2020-12-21' and \$CONDITIONS" \
--delete-target-dir \
--fields-terminated-by '\t' \
--m 1?
--8、編寫SQL處理dw層歷史數據,重新計算之前的dw_end_date
?app 層
?dw ?層 ?(2020-12-20) ? dw_product
?ods 層 ?(2020-12-20 、 2020-12-21)
-- 重新計算dw層拉鏈表中的失效時間
select
? t1.goods_id, ? ? ? ? ? ? ? ?-- 商品編號
? t1.goods_status, ? ? ? ? ? ?-- 商品狀態
? t1.createtime, ? ? ? ? ? ? ?-- 商品創建時間
? t1.modifytime, ? ? ? ? ? ? ?-- 商品修改時間
? t1.dw_start_date, ? ? ? ? ? -- 生效日期(生效日期無需重新計算)
? case when (t2.goods_id is not null and t1.dw_end_date > '2020-12-21')
? then '2020-12-21'
? else t1.dw_end_date
? end as dw_end_date ? ? ? -- 更新生效日期(需要重新計算)
from
? demo.dw_product t1
? left join
? (select * from demo.ods_product where dt='2020-12-21') t2
? ?on t1.goods_id = t2.goods_id
--9、合并當天最新的數據和歷史數據
insert overwrite table demo.dw_product
select
? t1.goods_id, ? ? ? ? ? ? ? ?-- 商品編號
? t1.goods_status, ? ? ? ? ? ?-- 商品狀態
? t1.createtime, ? ? ? ? ? ? ?-- 商品創建時間
? t1.modifytime, ? ? ? ? ? ? ?-- 商品修改時間
? t1.dw_start_date, ? ? ? ? ? -- 生效日期(生效日期無需重新計算)
? case when (t2.goods_id is not null and t1.dw_end_date > '2020-12-21')
? then '2020-12-21'
? else t1.dw_end_date
? end as dw_end_date ? ? ? -- 更新生效日期(需要重新計算)
from
? demo.dw_product t1
? left join
? (select * from demo.ods_product where dt='2020-12-21') t2
? ?on t1.goods_id = t2.goods_id
union all
select?
? goods_id, ? ? ? ? ? ? ? ?-- 商品編號
? goods_status, ? ? ? ? ? ?-- 商品狀態
? createtime, ? ? ? ? ? ? ?-- 商品創建時間
? modifytime, ? ? ? ? ? ? ?-- 商品修改時間
? modifytime as dw_start_date, ?-- 生效日期
? ?'9999-12-31' as dw_end_date ? -- 失效日期
from
? demo.ods_product where dt='2020-12-21'
order by dw_start_date, goods_id;
--10、拉鏈表查詢
-- 查詢2020-12-20數據
?select * from dw_product where dw_start_date <= ?'2020-12-20' and dw_end_date >= '2020-12-20' order by goods_id;
-- 查詢當前訂單的最新狀態
?select * from dw_product where dw_end_date = '9999-12-31' order by goods_id ;
三、業務數據分析
--1、項目環境初始化-導入Mysql業務數據
mysql -uroot –p
mysql>source /export/data/hivedatas/10tables.sql;
--2、項目環境初始化-Hive創建分層數據庫和表
--使用zeppelin連接Hive,并執行以下語句
create database if not exists itcast_ods;
create database if not exists itcast_dw;
create database if not exists itcast_app;
--3、創建ods層數據表
hive -f /export/data/hive_sql/ods_create_table.hql
--4、?在訂單表中創建?2021-01-09分區
alter table itcast_ods.itcast_orders ?add partition(dt='2021-01-09');
--5、導入itcast_orders表數據,這里我們先只導入修改時間為2021-01-09日以及之前的數據,后期我們再增量導入其他數據
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_orders/dt=2021-01-09 \
--query "select * from itcast_orders WHERE modifiedTime <= '2021-01-09 23:59:59' and \$CONDITIONS" \
--delete-target-dir \
--fields-terminated-by '\t' \
--m 1?
--6、ODS層數據采集-其他事實表采集
--?? ?訂單明細事實表采集
/export/server/sqoop-1.4.7/bin/sqoop import ?\
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_order_goods ?\
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_order_goods/ \
--delete-target-dir \
--fields-terminated-by '\t' \
--m 1
--訂單退貨表數據采集
/export/server/sqoop-1.4.7/bin/sqoop import ?\
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_order_refunds \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_order_refunds \
--delete-target-dir \
--fields-terminated-by '\t' \
--m 1
--7、ODS層數據采集-維度表數據采集
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_goods \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_goods \
--delete-target-dir \
--fields-terminated-by '\t' \
--m 1
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_goods_cats \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_goods_cats \
--m 1 ?
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_org \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_org \
--m 1 ?
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_payments \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_payments \
--m 1 ?
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_shops \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_shops \
--m 1 ?
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_user_address \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_user_address \
--m 1 ?
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_users \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_users \
--m 1
--8、測試數據是否都正確被加載
select * from itcast_ods.itcast_orders limit 10;
select * from itcast_ods.itcast_goods limit 10;
select * from itcast_ods.itcast_order_goods limit 10;
select * from itcast_ods.itcast_shops limit 10;
select * from itcast_ods.itcast_goods_cats limit 10;
select * from itcast_ods.itcast_org limit 10;
select * from itcast_ods.itcast_order_refunds limit 10;
select * from itcast_ods.itcast_users limit 10;
select * from itcast_ods.itcast_user_address limit 10;
select * from itcast_ods.itcast_payments limit 10;
-----------------------------------業務數據分析-DW層數據加載-------------------------------
--訂單事實表數據加載-拉鏈表
--1、創建dw層訂單拉鏈表
-- 創建dw層訂單事實表--帶有分區字段
DROP TABLE IF EXISTS itcast_dw.dw_fact_orders;
create ?table itcast_dw.dw_fact_orders (
? orderId ? ? ? ? ? ?bigint,
? orderNo ? ? ? ? ? string,
? userId ? ? ? ? ? ? bigint,
? orderStatus ? ? ? ?bigint,
? goodsMoney ? ? ? ? double,
? deliverType ? ? ? ?bigint,
? deliverMoney ? ? ? double,
? totalMoney ? ? ? ? double,
? realTotalMoney ? ? double,
? payType ? ? ? ? ? ?bigint,
? isPay ? ? ? ? ? ? ?bigint,
? areaId ? ? ? ? ? ? bigint,
? userAddressId ? ? ?bigint,
? areaIdPath ? ? ? ? string,
? userName ? ? ? ? ? string,
? userAddress ? ? ? string,
? userPhone ? ? ? ? string,
? orderScore ? ? ? ? bigint,
? isInvoice ? ? ? ? ?bigint,
? invoiceClient ? ? string,
? orderRemarks ? ? ? string,
? orderSrc ? ? ? ? ? bigint,
? needPay ? ? ? ? ? ?double,
? payRand ? ? ? ? ? ?bigint,
? orderType ? ? ? ? ?bigint,
? isRefund ? ? ? ? ? bigint,
? isAppraise ? ? ? ? bigint,
? cancelReason ? ? ? bigint,
? rejectReason ? ? ? bigint,
? rejectOtherReason string,
? isClosed ? ? ? ? ? bigint,
? goodsSearchKeys ? string,
? orderunique ? ? ? string,
? isFromCart ? ? ? ?, ?
? receiveTime ? ? ? string,
? deliveryTime ? ? ? string,
? tradeNo ? ? ? ? ? string,
? dataFlag ? ? ? ? ? bigint,
? createTime ? ? ? ? string,
? settlementId ? ? ? bigint,
? commissionFee ? ? ?double,
? scoreMoney ? ? ? ? double,
? useScore ? ? ? ? ? bigint,
? orderCode ? ? ? ? string,
? extraJson ? ? ? ? string,
? orderCodeTargetId ?bigint,
? noticeDeliver ? ? ?bigint,
? invoiceJson ? ? ? string,
? lockCashMoney ? ? ?double,
? payTime ? ? ? ? ? string,
? isBatch ? ? ? ? ? ?bigint,
? totalPayFee ? ? ? ?bigint,
? modifiedTime ? ? ? string,
? dw_start_date ? ? ? string,
? dw_end_date ? ? ? ? string
)
partitioned by (dt string) --按照天分區
row format delimited fields terminated by '\t';
--2、第一次全量數據導入dw層拉鏈表
?--訂單表數據:ods層導入dw層
insert overwrite table itcast_dw.dw_fact_orders ?partition(dt='2021-01-09')
select
orderId, ? ? ? ? ?
orderNo, ? ? ? ? ?
userId, ? ? ? ? ??
orderStatus, ? ? ?
goodsMoney, ? ? ??
deliverType, ? ? ?
deliverMoney, ? ??
totalMoney, ? ? ??
realTotalMoney, ??
payType, ? ? ? ? ?
isPay, ? ? ? ? ? ?
areaId, ? ? ? ? ??
userAddressId, ? ?
areaIdPath, ? ? ??
userName, ? ? ? ??
userAddress, ? ? ?
userPhone, ? ? ? ?
orderScore, ? ? ??
isInvoice, ? ? ? ?
invoiceClient, ? ?
orderRemarks, ? ??
orderSrc, ? ? ? ??
needPay, ? ? ? ? ?
payRand, ? ? ? ? ?
orderType, ? ? ? ?
isRefund, ? ? ? ??
isAppraise, ? ? ??
cancelReason, ? ??
rejectReason, ? ??
rejectOtherReason,
isClosed, ? ? ? ??
goodsSearchKeys, ?
orderunique, ? ? ?
isFromCart, ? ? ??
receiveTime, ? ? ?
deliveryTime, ? ??
tradeNo, ? ? ? ? ?
dataFlag, ? ? ? ??
createTime, ? ? ??
settlementId, ? ??
commissionFee, ? ?
scoreMoney, ? ? ??
useScore, ? ? ? ??
orderCode, ? ? ? ?
extraJson, ? ? ? ?
orderCodeTargetId,
noticeDeliver, ? ?
invoiceJson, ? ? ?
lockCashMoney, ? ?
payTime, ? ? ? ? ?
isBatch, ? ? ? ? ?
totalPayFee, ? ? ?
modifiedTime,
--增加開始時間
date_format(modifiedTime,'yyyy-MM-dd') as dw_start_date,
--增加結束時間
'9999-12-31' as dw_end_date
from itcast_ods.itcast_orders where dt="2021-01-09";
--3、模擬新增和修改數據
--在mysql中添加模擬增量數據,修改orderId=1的訂單狀態,添加新訂單數據
--將資料中的itcast_orders_append_update.sql腳本上傳到/export/data/hive_sql/目錄
mysql -uroot -p
source /export/data/hive_sql/itcast_orders_append_update.sql
--4、Sqoop抽取增量數據導入ods層
-- 在訂單表中創建 2020-09-10分區
alter table itcast_ods.itcast_orders ?add partition(dt='2021-01-10');
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_orders \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_orders/dt=2021-01-10 \
--check-column modifiedTime \
--incremental lastmodified \
--last-value "2021-01-10 00:00:00" \
--fields-terminated-by '\t' \
--m 1 \
--append?
--5、ods層數據合并到dw層拉鏈表中
--開啟動態分區,解釋一下動態分區,比如我需要將A表查詢到的數據插入到B表,而A表是分區表,如果開啟了自動分區功能,則查詢到的A表數據原來在哪個分區,則添加到B表字后,B表會自動創建對應的分區。
set hive.exec.dynamici.partition=true; ?
set hive.exec.dynamic.partition.mode=nonstrict;
--合并歷史數據和最新數據到拉鏈表sql
insert overwrite ?table itcast_dw.dw_fact_orders
select
dw.orderId ? ? ? ? ? ?,
dw.orderNo ? ? ? ? ? ?,
dw.userId ? ? ? ? ? ? ,
dw.orderStatus ? ? ? ?,
dw.goodsMoney ? ? ? ? ,
dw.deliverType ? ? ? ?,
dw.deliverMoney ? ? ? ,
dw.totalMoney ? ? ? ? ,
dw.realTotalMoney ? ? ,
dw.payType ? ? ? ? ? ?,
dw.isPay ? ? ? ? ? ? ?,
dw.areaId ? ? ? ? ? ? ,
dw.userAddressId ? ? ?,
dw.areaIdPath ? ? ? ? ,
dw.userName ? ? ? ? ? ,
dw.userAddress ? ? ? ?,
dw.userPhone ? ? ? ? ?,
dw.orderScore ? ? ? ? ,
dw.isInvoice ? ? ? ? ?,
dw.invoiceClient ? ? ?,
dw.orderRemarks ? ? ? ,
dw.orderSrc ? ? ? ? ? ,
dw.needPay ? ? ? ? ? ?,
dw.payRand ? ? ? ? ? ?,
dw.orderType ? ? ? ? ?,
dw.isRefund ? ? ? ? ? ,
dw.isAppraise ? ? ? ? ,
dw.cancelReason ? ? ? ,
dw.rejectReason ? ? ? ,
dw.rejectOtherReason ?,
dw.isClosed ? ? ? ? ? ,
dw.goodsSearchKeys ? ?,
dw.orderunique ? ? ? ?,
dw.isFromCart ? ? ? ? ,
dw.receiveTime ? ? ? ?,
dw.deliveryTime ? ? ? ,
dw.tradeNo ? ? ? ? ? ?,
dw.dataFlag ? ? ? ? ? ,
dw.createTime ? ? ? ? ,
dw.settlementId ? ? ? ,
dw.commissionFee ? ? ?,
dw.scoreMoney ? ? ? ? ,
dw.useScore ? ? ? ? ? ,
dw.orderCode ? ? ? ? ?,
dw.extraJson ? ? ? ? ?,
dw.orderCodeTargetId ?,
dw.noticeDeliver ? ? ?,
dw.invoiceJson ? ? ? ?,
dw.lockCashMoney ? ? ?,
dw.payTime ? ? ? ? ? ?,
dw.isBatch ? ? ? ? ? ?,
dw.totalPayFee ? ? ? ?,
dw.modifiedTime ,
dw.dw_start_date,
--修改end_date
case when ods.orderid is not null and dw.dw_end_date ='9999-12-31'
then '2021-01-10'
else dw.dw_end_date
end as dw_end_date,
--動態分區需要的字段
dw.dt
from?
itcast_dw.dw_fact_orders ?dw?
left join?
(select * from itcast_ods.itcast_orders where dt ='2021-01-10') ods
on dw.orderid=ods.orderid?
union all
--今天新增數據的插入動作
select
orderId ? ? ? ? ? ?,
orderNo ? ? ? ? ? ?,
userId ? ? ? ? ? ? ,
orderStatus ? ? ? ?,
goodsMoney ? ? ? ? ,
deliverType ? ? ? ?,
deliverMoney ? ? ? ,
totalMoney ? ? ? ? ,
realTotalMoney ? ? ,
payType ? ? ? ? ? ?,
isPay ? ? ? ? ? ? ?,
areaId ? ? ? ? ? ? ,
userAddressId ? ? ?,
areaIdPath ? ? ? ? ,
userName ? ? ? ? ? ,
userAddress ? ? ? ?,
userPhone ? ? ? ? ?,
orderScore ? ? ? ? ,
isInvoice ? ? ? ? ?,
invoiceClient ? ? ?,
orderRemarks ? ? ? ,
orderSrc ? ? ? ? ? ,
needPay ? ? ? ? ? ?,
payRand ? ? ? ? ? ?,
orderType ? ? ? ? ?,
isRefund ? ? ? ? ? ,
isAppraise ? ? ? ? ,
cancelReason ? ? ? ,
rejectReason ? ? ? ,
rejectOtherReason ?,
isClosed ? ? ? ? ? ,
goodsSearchKeys ? ?,
orderunique ? ? ? ?,
isFromCart ? ? ? ? ,
receiveTime ? ? ? ?,
deliveryTime ? ? ? ,
tradeNo ? ? ? ? ? ?,
dataFlag ? ? ? ? ? ,
createTime ? ? ? ? ,
settlementId ? ? ? ,
commissionFee ? ? ?,
scoreMoney ? ? ? ? ,
useScore ? ? ? ? ? ,
orderCode ? ? ? ? ?,
extraJson ? ? ? ? ?,
orderCodeTargetId ?,
noticeDeliver ? ? ?,
invoiceJson ? ? ? ?,
lockCashMoney ? ? ?,
payTime ? ? ? ? ? ?,
isBatch ? ? ? ? ? ?,
totalPayFee ? ? ? ?,
modifiedTime ? ? ? ,
--增加開始時間
date_format(modifiedTime,'yyyy-MM-dd') as dw_start_date,
--增加結束時間
'9999-12-31' as dw_end_date,
--指定動態分區使用的字段,動態分區的用法:就是查詢字段的最后一個字段hive表進行解析然后存入指定分區
--此次數據分區按照訂單的修改時間
date_format(modifiedTime,'yyyy-MM-dd')?
from itcast_ods.itcast_orders where dt="2021-01-10";
--6、驗證數據查詢拉鏈表數據
select * from itcast_dw.dw_fact_orders limit 15;
--7、其他事實表的加載
-- 創建dw層訂單明細表
drop table if exists itcast_dw.dw_fact_order_goods;
create ?table itcast_dw.dw_fact_order_goods(
? ? ogId ? ? ? ? ? ?bigint,
? ? orderId ? ? ? ? bigint,
? ? goodsId ? ? ? ? bigint,
? ? goodsNum ? ? ? ?bigint,
? ? goodsPrice ? ? ?double,
? ? payPrice ? ? ? ?double,
? ? goodsSpecId ? ? bigint,
? ? goodsSpecNames ?string,
? ? goodsName ? ? ? string,
? ? goodsImg ? ? ? ?string,
? ? extraJson ? ? ? string,
? ? goodsType ? ? ? bigint,
? ? commissionRate ?double,
? ? goodsCode ? ? ? string,
? ? promotionJson ? string,
? ? createtime ? ? ?string
)
row format delimited fields terminated by '\t';
--將ods層的數據全部加載到dw層
insert overwrite table itcast_dw.dw_fact_order_goods select * from itcast_ods.itcast_order_goods;
-- 創建dw層訂單退貨表
drop table if exists itcast_dw.itcast_order_refunds;
create ?table itcast_dw.dw_fact_order_refunds(
? ? id ? ? ? ? ? ? ? ?bigint,
? ? orderId ? ? ? ? ? bigint,
? ? goodsId ? ? ? ? ? bigint,
? ? refundTo ? ? ? ? ?bigint,
? ? refundReson ? ? ? bigint,
? ? refundOtherReson ?string,
? ? backMoney ? ? ? ? double,
? ? refundTradeNo ? ? string,
? ? refundRemark ? ? ?string,
? ? refundTime ? ? ? ?string,
? ? shopRejectReason ?string,
? ? refundStatus ? ? ?bigint,
? ? createTime ? ? ? ?string,
? ? modifiedTime ? ? ? ?string
)
row format delimited fields terminated by '\t';
--將ods層的數據全部加載到dw層
insert overwrite table itcast_dw.dw_fact_order_refunds select * from itcast_ods.itcast_order_refunds;
------------APP層指標開發-指標開發1-時間維度分析業務開發------------------------------
--1、創建app層數據表
drop table if exists itcast_app.app_order_date;
create table itcast_app.app_order_date(
? date_type string, ? -- 時間維度類型
? date_val string, ? ?-- 時間維度值
? order_count bigint, -- 訂單筆數
? order_money double ?-- 訂單交易額
)
row format delimited fields terminated by '\t' ;
--2、創建日期維度表
DROP TABLE IF EXISTS itcast_dw.dw_dim_date;
CREATE TABLE itcast_dw.dw_dim_date(
? date_key string,
? date_value string,
? day_in_year string,
? day_in_month string,
? is_first_day_in_month string,
? is_last_day_in_month string,
? weekday string,
? week_in_month string,
? is_first_day_in_week string,
? is_dayoff string,
? is_workday string,
? is_holiday string,
? date_type string,
? month_number string, ? ? ? ? ? ? ? ? ? ? ? ?
? year string,
? year_month_number string,
? quarter_name string,
? quarter_number string,
? year_quarter string
)
row format delimited fields terminated by '\t';
--給表加載數據
load data local inpath '/export/data/hivedatas/dim_holiday.txt' into table itcast_dw.dw_dim_date;
--3、統計2020年期間每個季度的銷售訂單筆數、訂單總額
insert overwrite table itcast_app.app_order_date?
select
?'1' as date_type, ? ? ? ? ? ? ?-- 時間維度類型,1表示季度
? t1.year_quarter as date_val, ?-- 季度名稱
? count(orderid), ? ? ? ? ? ? ? -- 訂單總數
? sum(totalMoney) ? ? ? ? ? ? ? -- 訂單總額
from?
? itcast_dw.dw_dim_date t1
? left join
? (select * from itcast_dw.dw_fact_orders ?where dt >='2020-01-01' and dw_end_date='9999-12-31') t2
? ?on t1.date_value = date_format(createtime, 'yyyy-MM-dd')
group by t1.year_quarter
order by t1.year_quarter;
-- 測試
select * from itcast_app.app_order_date where dt='2021-01-09' and date_type = '1';
--4、統計2020年期間每個月的銷售訂單筆數、訂單總額
insert into table itcast_app.app_order_date
select
? ?'2' as date_type, --2表示月類型
? t1.year_month_number as date_val,
? ?count(orderid), ? ? ? ? ? ? ? ? ?-- 訂單總數
? ?if(sum(totalMoney) is NULL,0,sum(totalMoney)) ? ? ? ? ? ? ? ? ? ? ? ? -- 訂單總額
from
? itcast_dw.dw_dim_date t1
? left join
? ? (select * from itcast_dw.dw_fact_orders ?where dt >='2020-01-01' and dw_end_date='9999-12-31') t2
? ? on t1.date_value = date_format(createtime, 'yyyy-MM-dd')
group by?
? t1.year_month_number
order by t1.year_month_number;
---- 測試
select * from itcast_app.app_order_date where date_type = '2';
--5、統計2020年期間每周一到周日的銷售訂單筆數、訂單總額
insert into table itcast_app.app_order_date
select
? ?'3' as date_type,
? ?t1.weekday as date_val,
? ? count(orderid), ? ? ? ? ? ? ? ? ?-- 訂單總數
?if(sum(totalMoney) is NULL,0,sum(totalMoney)) ? ? ? ? ? ? ? ? ? ? ? ? ? ?-- 訂單總額
from?
? itcast_dw.dw_dim_date t1
? left join
? (select * from itcast_dw.dw_fact_orders ?where dt >='2020-01-01' and dw_end_date='9999-12-31') t2
? ? on t1.date_value = date_format(createtime, 'yyyy-MM-dd')
group by
? t1.weekday
order by t1.weekday;
-- 測試
select * from itcast_app.app_order_date where ?date_type = '3';
-- 統計2020年期間國家法定節假日、休息日、工作日的訂單筆數、訂單總額
insert into table itcast_app.app_order_date?
select
? ?'4' as date_type,
? t1.date_type as date_val,
? ?count(orderid) as order_count, ? ? ? ? ? ? ? ? ?-- 訂單總數
?if(sum(totalMoney) is NULL,0,sum(totalMoney)) ?-- 訂單總額
from?
? itcast_dw.dw_dim_date t1
? left join
? (select * from itcast_dw.dw_fact_orders ?where dt >='2020-01-01' and dw_end_date='9999-12-31') t2
? ? on t1.date_value = date_format(createtime, 'yyyy-MM-dd')
group by
? t1.date_type
order by t1.date_type;
-- 測試
select * from itcast_app.app_order_date where date_type = '4';
------------APP層指標開發-指標開發2-訂單分析地域、分類維度分析業務開發------------------------------
--1、創建app(數據集市層)訂單分析表
DROP TABLE IF EXISTS itcast_app.app_trade_order;
create table if not exists itcast_app.app_trade_order(
? area_type string, ? ? ? ? ? ? ? -- 區域范圍:區域類型(全國、大區、城市)
? region_name string, ? ? ? ? ? ? -- 區域名稱
? city_name string, ? ? ? ? ? ? ? -- 城市名稱
? category_type string, ? ? ? ? ? --分類類型(一級、二級)
? category_1st_name string, ? ? ? -- 一級分類名稱
? category_2d_name string, ? ? ? ?-- 二級分類名稱
? payment_name string, ? ? ? ? ? ?-- 支付方式(所有、微信、支付寶、...)
? total_count bigint, ? ? ? ? ? ? -- 訂單數量
? total_goods_num bigint, ? ? ? ? -- 商品數量
? total_money double ? ? ? ? ? ? ?-- 支付金額
)
row format delimited fields terminated by '\t';
--2、商品分類維度數據表操作
? -- 1:表創建
? DROP TABLE IF EXISTS itcast_dw.dw_dim_goods_cats;
? create table if not exists itcast_dw.dw_dim_goods_cats(
? ? cat_3d_id string, ? ? ? ? ? ? ? ? ? -- 三級商品分類id
? ? cat_3d_name string, ? ? ? ? ? ? ? ? -- 三級商品分類名稱
? ? cat_2d_id string, ? ? ? ? ? ? ? ? ? ?-- 二級商品分類Id
? ? cat_2d_name string, ? ? ? ? ? ? ? ? ?-- 二級商品分類名稱
? ? cat_1t_id string, ? ? ? ? ? ? ? ? ? -- 一級商品分類id
? ? cat_1t_name string ? ? ? ? ? ? ? ? ?-- 一級商品分類名稱
? )
? row format delimited fields terminated by '\t' ;
-- 2. 加載商品分類維度表數據
insert overwrite table itcast_dw.dw_dim_goods_cats
select?
? t3.catid as cat_3d_id, ? ? ? ? ?-- 三級分類id
? t3.catname as cat_3d_name, ? ? ?-- 三級分類名稱
? t2.catid as cat_2d_id, ? ? ? ? ?-- 二級分類id
? t2.catname as cat_2d_name, ? ? ?-- 二級分類名稱
? t1.catid as cat_1t_id, ? ? ? ? ?-- 一級分類id
? t1.catname as cat_1t_name ? ? ? -- 一級分類名稱
from?
? (select catid, catname, parentid from itcast_ods.itcast_goods_cats where cat_level=3) t3 -- 商品三級分類數據
? left join
? (select catid, catname, parentid from itcast_ods.itcast_goods_cats where cat_level=2) t2 -- 商品二級分類數據
? ?on t3.parentid = t2.catid
? left join
? (select catid, catname, parentid from itcast_ods.itcast_goods_cats where cat_level=1) t1 -- 商品一級分類數據
? ?on t2.parentid = t1.catid;
? ?
-- 測試數據
select * from itcast_dw.dw_dim_goods_cats limit 5;
--3、支付方式維度數據表操作
-- 1. 創建支付方式維度表
drop table if exists itcast_dw.dw_dim_payments;
create table if not exists itcast_dw.dw_dim_payments(
? payment_id string, ? ? ? ? ?-- 支付方式id
? payment_name string ? ? ? ? -- 支付方式名稱
)
row format delimited fields terminated by '\t' ;
?
-- 2. 加載支付方式維度數據
-- 需要額外添加一行數據 0 -> 其他
insert overwrite table itcast_dw.dw_dim_payments?
select?
? t1.id as payment_id, ? ? ? ?-- 支付方式id
? t1.payName as payment_name ?-- 支付方式名稱
from?
? itcast_ods.itcast_payments ?t1 ;
?
? ?-- 測試查詢支付方式維度數據
select * from itcast_dw.dw_dim_payments limit 5;
--4、訂單明細事實表拉寬
-- 4.1. 創建訂單明細事實表
DROP TABLE IF EXISTS itcast_dw.dw_fact_order_goods_wide;
create table if not exists itcast_dw.dw_fact_order_goods_wide(
? order_id string, ? ? ? ? ? ?-- 訂單id
? goods_cat_3d_id string, ? ? -- 商品三級分類id
? shop_id string, ? ? ? ? ? ? -- 店鋪id
? payment_id string, ? ? ? ? ?-- 訂單支付方式
? goods_num bigint, ? ? ? ? ? -- 商品數量
? pay_money double, ? ? ? ? ? -- 訂單明細金額
? paytime string ? ? ? ? ? ? ?-- 訂單時間
)
row format delimited fields terminated by '\t' ;
-- 4.2、加載數據
insert overwrite table itcast_dw.dw_fact_order_goods_wide?
select
? t1.orderid as order_id,
? t3.goodscatid as goods_cat_3d_id,
? t3.shopid as shop_id,
? t1.paytype as payment_id,
? t2.goodsnum as goods_num,
? t2.goodsnum*t2.payprice ?as pay_money,
? t1.paytime as paytime
from
? (select orderid, paytype, paytime from itcast_dw.dw_fact_orders ?) t1 ? ? ? ? ? ? ? ? ? ? -- 訂單表數據
?join
? (select orderid, goodsid, goodsnum, payprice from itcast_ods.itcast_order_goods) t2 ? ? -- 訂單明細數
? ?on t1.orderid = t2.orderid
?join
? (select goodsid, shopid, goodscatid from itcast_ods.itcast_goods ) t3 ? ? ? ? ? -- 商品數量
? ?on t2.goodsid = t3.goodsid;
? ?
-- 測試數據
select * from itcast_dw.dw_fact_order_goods_wide limit 5;
--5、指標計算匯總計算-全國、一級商品分類維度交易信息
-- 獲取全國、一級商品分類維度的分支付類型數據
-- 5.1 獲取全國、一級商品分類維度的不分支付類型數據
insert into table itcast_app.app_trade_order?
select
? ?'全國' as area_type,
? ?'' as region_name,
? ?'' as city_name,
? ?'一級分類' as category_type,
? t1.cat_1t_name as category_1st_name,
? ?'' as category_2d_name,
? ?'所有' as payment_name,
? ?count(distinct t2.order_id) as total_count,
? sum(case when t2.goods_num is null
? ? ? then 0
? ? ? else t2.goods_num
? ? ? end
? ? ? ) as total_goods_num,
? sum(case when t2.pay_money is null
? ? ? then 0
? ? ? else t2.pay_money
? ? ? end
? ? ? ) as total_money
from
? (select * from itcast_dw.dw_dim_goods_cats) t1
? left join?
? (select * from itcast_dw.dw_fact_order_goods_wide) t2
? ?on t1.cat_3d_id = t2.goods_cat_3d_id
group by t1.cat_1t_name;
-- 測試
select * from itcast_app.app_trade_order where category_type = '一級分類' and payment_name = '所有';
-- 5、全國、二級商品分類維度的分支付類型數據
insert into table itcast_app.app_trade_order
select
? ?'全國' as area_type,
? ?'' as region_name,
? ?'' as city_name,
? ?'二級分類' as category_type,
? t3.cat_1t_name as category_1st_name,
? t3.cat_2d_name as category_2d_name,
? t1.payment_name as payment_name,
? ?count(distinct t2.order_id) as total_count,
? sum(t2.goods_num) as total_goods_num,
? sum(t2.pay_money) as total_money
from
? (select * from itcast_dw.dw_dim_payments ) t1
? left join
? (select * from itcast_dw.dw_fact_order_goods_wide ) t2
? ?on t1.payment_id = t2.payment_id
? left join
? (select * from itcast_dw.dw_dim_goods_cats) t3
? ?on t2.goods_cat_3d_id = t3.cat_3d_id
group by t1.payment_name, t3.cat_1t_name, t3.cat_2d_name;
?
-- 測試查詢數據
select * from itcast_app.app_trade_order where ?area_type = '全國' and category_type = '二級分類' limit 10;
------------APP層指標開發-指標開發3-用戶訂單指標業務開發------------------------------
--1、創建APP表
drop table if exists itcast_app.user_order_measure;
create table itcast_app.user_order_measure(
? userid string, ? ? ? ? ? ? ? ? ? ? ? ? ?-- 用戶id
? username string, ? ? ? ? ? ? ? ? ? ? ? ?-- 用戶名稱
? first_paytime string, ? ? ? ? ? ? ? ? ? -- 第一次消費時間
? lastest_paytime string, ? ? ? ? ? ? ? ? -- 最近一次消費時間
? first_day_during_days bigint, ? ? ? ? ? -- 首單距今時間
? lastest_day_durning_days bigint, ? ? ? ?-- 尾單距今時間
? min_paymoney double, ? ? ? ? ? ? ? ? ? ?-- 最小消費金額
? max_paymoney double, ? ? ? ? ? ? ? ? ? ?-- 最大消費金額
? total_count_without_back bigint, ? ? ? ?-- 累計消費次數(不含退拒)
? total_money_without_back double, ? ? ? ?-- 累計消費金額(不含退拒)
? total_count_without_back_30 bigint, ? ? -- 近30天累計消費次數(不含退拒)
? total_money_without_back_30 double, ? ? -- 近30天累計消費金額(不含退拒)
? total_count_30 bigint, ? ? ? ? ? ? ? ? ?-- 近30天累計消費次數(含退拒)
? total_money_30 double, ? ? ? ? ? ? ? ? ?-- 近30天累計消費金額(含退拒)
? atv double, ? ? ? ? ? ? ? ? ? ? ? ? ? ? -- 客單價(含退拒)
? atv_withoutback double, ? ? ? ? ? ? ? ? -- 客單價(不含退拒)
? atv_60 double, ? ? ? ? ? ? ? ? ? ? ? ? ?-- 近60天客單價(含退拒)
? atv_60_withoutback double, ? ? ? ? ? ? ?-- 近60天客單價(不含退拒)
? school_order_count bigint, ? ? ? ? ? ? ?-- 學校下單總數
? company_order_count bigint, ? ? ? ? ? ? -- 單位下單總數
? home_order_count bigint, ? ? ? ? ? ? ? ?-- 家里下單總數
? am_order_count bigint, ? ? ? ? ? ? ? ? ?-- 上午下單總數
? pm_order_count bigint, ? ? ? ? ? ? ? ? ?-- 下午下單總數
? night_order_count bigint, ? ? ? ? ? ? ? -- 晚上下單總數
? most_usual_address string, ? ? ? ? ? ? ?-- 常用收貨地址
? most_usual_paytype string ? ? ? ? ? ? ? -- 常用支付方式
)
row format delimited fields terminated by '\t';
--2、創建用戶維度表:itcast_dw.dw_dim_user
drop table if exists itcast_dw.dw_dim_users;
create table itcast_dw.dw_dim_users(
? userId ? ? ? ? ?bigint,
? loginName ? ? ? string,
? userSex ? ? ? ? bigint
)
row format delimited fields terminated by '\t';
--3、加載數據
insert overwrite table itcast_dw.dw_dim_users
select?
? userId, ??
? loginName,
? userSex ?
from
? itcast_ods.itcast_users ;
? ?
--驗證
select * from itcast_dw.dw_dim_users limit 10;
--4、創建dw層dim_user_address表
drop table if exists itcast_dw.dw_dim_user_address;
create table itcast_dw.dw_dim_user_address(
? addressId ? ?bigint,
? userId ? ? ? bigint,
? userName ? ? string,
? otherName ? string,
? userPhone ? string,
? areaIdPath ? string,
? areaId ? ? ? bigint,
? userAddress string,
? isDefault ? ?bigint,
? dataFlag ? ? bigint,
? createTime ? string
)
row format delimited fields terminated by '\t';
--5、從ods層itcast_user_address導出數據到dim_user_address表
insert overwrite table itcast_dw.dw_dim_user_address?
select?
addressId, --用戶地址id
userId, --用戶id
userName, --用戶名
otherName, --地址類型
userPhone, --用戶聯系方式
areaIdPath, --地址id路徑
areaId, ? ? --區域ID
userAddress, --用戶地址
isDefault, ?--是否默認地址
dataFlag, ? --數據狀態
createTime ?--創建時間
from itcast_ods.itcast_user_address;
--6、創建訂單時間標志、地址標志寬表 fact_order_wide
drop table if exists itcast_dw.dw_fact_order_wide;
create table itcast_dw.dw_fact_order_wide(
? orderId bigint, --訂單id
? orderStatus bigint, --訂單狀態
? payType bigint, --支付類型
? userId bigint, --用戶id
? userAddressId bigint, --用戶地址id
? payTime string, --支付時間
? payMoney double, --支付金額
? createtime string, --創建時間
? flag30 bigint, --最近30天標記
? flag60 bigint, --最近60天標記
? flag90 bigint, --最近90天筆記
? flag180 bigint,--最近180天筆記
? flagTimeBucket string, --一天時間段標志(凌晨、早晨、上午.
? othername string --地址標志(家里、學校、工作單位..)
)
row format delimited fields terminated by '\t';
--7、加載數據
insert overwrite table itcast_dw.dw_fact_order_wide
select
? t1.orderId,
? t1.orderStatus,
? t1.payType,
? t1.userId,
? t1.userAddressId,
? t1.payTime,
? t1.totalMoney,
? t1.createtime,
? ?--近30天
? case when datediff(current_timestamp, t1.createtime) <= 30
? ? ? then 1
? else 0
? end as flag_30,
? ? --近60天
? case when datediff(current_timestamp, t1.createtime) <= 60
? ? ? then 1
? else 0
? end as flag_60,
? ? --近90天
? case when datediff(current_timestamp, t1.createtime) <= 90
? ? ? then 1
? else 0
? end as flag_90,?
? ? --近180天
? case when datediff(current_timestamp, t1.createtime) >= 180
? then 1
? else 0
? end as flag_180,
? ?--所屬時間段
? case when hour(t1.createtime) >= 0 and hour(t1.createtime) < 6
? ? ? then '凌晨'
? when hour(t1.createtime) >= 6 and hour(t1.createtime) < 12
? ? ? then '上午'
? when hour(t1.createtime) >= 12 and hour(t1.createtime) < 14
? ? ? then '中午'
? when hour(t1.createtime) >= 14 and hour(t1.createtime) < 18
? ? ? then '下午'
? else '晚上'
? end as flag_time_bucket,
? ?--家里、單位、學校
? t2.othername
from?
? (select orderId,orderStatus,payType,userId,userAddressId,payTime,totalMoney,createtime from itcast_dw.dw_fact_orders?
where dw_end_date ='9999-12-31') t1 ?
left join
? (select * from itcast_dw.dw_dim_user_address) t2
? ?on t1.userAddressId = t2.addressId;
? ? ? ?
-- 測試語句
select * from itcast_dw.dw_fact_order_wide limit 5;?
--1、指標開發一
select
? t1.userid, --用戶id
? t1.loginname, --登錄名字
? MIN(t2.payTime) as first_paytime, --首次下單時間
? MAX(t2.payTime) as lastest_paytime, --尾單時間
? DATEDIFF(CURRENT_TIMESTAMP, MIN(t2.payTime)) as first_day_during_days,--首單距今
? DATEDIFF(CURRENT_TIMESTAMP, MAX(t2.payTime)) as lastest_day_durning_days, --尾單距今
? MIN(t2.paymoney) as min_paymoney, --最小支付金額
? MAX(t2.paymoney) as max_paymoney ?--最大支付金額
from
? (select * from itcast_dw.dw_fact_order_wide) as t2
? left join
? ? (select * from itcast_dw.dw_dim_users) as t1
? ?on t1.userId = t2.userId
group by t1.userid,t1.loginname;
--2、指標開發二
select
? t2.userid,
? t2.loginname,
? MIN(t1.payTime) as first_paytime,
? MAX(t1.payTime) as lastest_paytime,
? DATEDIFF(CURRENT_TIMESTAMP, MIN(t1.payTime)) as first_day_during_days,
? DATEDIFF(CURRENT_TIMESTAMP, MAX(t1.payTime)) as lastest_day_durning_days,
? MIN(t1.paymoney) as min_paymoney,
? MAX(t1.paymoney) as max_paymoney,
sum(
case when t1.orderstatus !=10 and t1.orderstatus !=11
? then 1
? else 0
? end
) as total_count_without_back,--累計消費次數不含退拒,
sum(case when t1.orderstatus !=10 and t1.orderstatus !=11
? then t1.paymoney
? else 0
? end
? ) as total_money_without_back, --累計消費金額不含退拒
sum(case when t1.flag30 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
? then 1
? ? else 0
? ? end
? ) as total_count_without_back_30, ? --累計近30天消費次數不含退拒
? sum(case when t1.flag30 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
? then t1.paymoney
? ? else 0
? ? end
? ) as total_money_without_back_30,-- 累計近30天消費金額不含退拒
sum(case when t1.flag30 =1?
? then 1
? ? else 0
? ? end
? ) as total_count_without_30, ? ?--累計近30天消費次數含退拒
? sum(case when t1.flag30 =1?
? then t1.paymoney
? ? else 0
? ? end
? ) as total_money_with_back_30 ? ?--累計近30天消費金額含退拒
? ??
from?
(select * from itcast_dw.dw_fact_order_wide) t1?
left join ?
(select * from itcast_dw.dw_dim_users) t2 on?
t1.userid=t2.userid
group by t2.userid,t2.loginname limit 5;
---------------------------------------------------
select
? t2.userid,
? t2.loginname,
? MIN(t1.payTime) as first_paytime,
? MAX(t1.payTime) as lastest_paytime,
? DATEDIFF(CURRENT_TIMESTAMP, MIN(t1.payTime)) as first_day_during_days,
? DATEDIFF(CURRENT_TIMESTAMP, MAX(t1.payTime)) as lastest_day_durning_days,
? MIN(t1.paymoney) as min_paymoney,
? MAX(t1.paymoney) as max_paymoney,
sum(
case when t1.orderstatus !=10 and t1.orderstatus !=11
? then 1
? else 0
? end
) as total_count_without_back,--累計消費次數不含退拒,
sum(case when t1.orderstatus !=10 and t1.orderstatus !=11
? then t1.paymoney
? else 0
? end
? ) as total_money_without_back, --累計消費金額不含退拒
sum(case when t1.flag180 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
? then 1
? ? else 0
? ? end
? ) as total_count_without_back_180, ? --累計近30天消費次數不含退拒
??
? sum(case when t1.flag180 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
? then t1.paymoney
? ? else 0
? ? end
? ) as total_money_without_back_180,-- 累計近30天消費金額不含退拒
sum(case when t1.flag180 =1?
? then 1
? ? else 0
? ? end
? ) as total_count_without_180, ? ?--累計近30天消費次數含退拒
? sum(case when t1.flag180 =1?
? then t1.paymoney
? ? else 0
? ? end
? ) as total_money_with_back_180 ? ?--累計近30天消費金額含退拒
? ??
from?
(select * from itcast_dw.dw_fact_order_wide) t1?
left join ?
(select * from itcast_dw.dw_dim_users) t2 on?
t1.userid=t2.userid
group by t2.userid,t2.loginname limit 5
--3、指標開發三
select
? t2.userid,
? t2.loginname,
SUM(case when t1.otherName = '學校'
? ? ? then 1
? ? ? else 0
? ? ? end) as school_order_count, ? ? ?-- 學校下單總數
? SUM(case when t1.otherName = '單位'
? ? ? then 1
? ? ? else 0
? ? ? end) as company_order_count, ? ?-- 單位下單總數
? SUM(case when t1.otherName = '家里'
? ? ? then 1
? ? ? else 0
? ? ? end) as home_order_count, ? ? ? ?-- 家里下單總數
? SUM(case when t1.flagTimeBucket = '上午'
? ? ? then 1
? ? ? else 0
? ? ? end) as am_order_count, ? ? ? ? ?-- 上午下單總數
? SUM(case when t1.flagTimeBucket = '下午'
? ? ? then 1
? ? ? else 0
? ? ? end) as pm_order_count, ? ? ? ? ?-- 下午下單總數
? SUM(case when t1.flagTimeBucket = '晚上'
? ? ? then 1
? ? ? else 0
? ? ? end) as night_order_count-- 晚上下單總數
from
(select * from itcast_dw.dw_fact_order_wide) t1?
left join ?
(select * from itcast_dw.dw_dim_users) t2 on?
t1.userid=t2.userid
group by t2.userid,t2.loginname limit 5;
--4、指標數據加載
insert overwrite table itcast_app.user_order_measure?
select
?t2.userid,
? t2.loginname,
? MIN(t1.payTime) as first_paytime,
? MAX(t1.payTime) as lastest_paytime,
? DATEDIFF(CURRENT_TIMESTAMP, MIN(t1.payTime)) as first_day_during_days,
? DATEDIFF(CURRENT_TIMESTAMP, MAX(t1.payTime)) as lastest_day_durning_days,
? MIN(t1.paymoney) as min_paymoney,
? MAX(t1.paymoney) as max_paymoney,
sum(
case when t1.orderstatus !=10 and t1.orderstatus !=11
? then 1
? else 0
? end
) as total_count_without_back,--累計消費次數不含退拒,
sum(case when t1.orderstatus !=10 and t1.orderstatus !=11
? then t1.paymoney
? else 0
? end
? ) as total_money_without_back, --累計消費金額不含退拒
sum(case when t1.flag30 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
? then 1
? ? else 0
? ? end
? ) as total_count_without_back_30, ? --累計近30天消費次數不含退拒
? sum(case when t1.flag30 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
? then t1.paymoney
? ? else 0
? ? end
? ) as total_money_without_back_30,-- 累計近30天消費金額不含退拒
sum(case when t1.flag30 =1?
? then 1
? ? else 0
? ? end
? ) as total_count_without_30, ? ?--累計近30天消費次數含退拒
? sum(case when t1.flag30 =1?
? then t1.paymoney
? ? else 0
? ? end
? ) as total_money_with_back_30, ? ?--累計近30天消費金額含退拒
? ? ? 0 as ?atv,
? ? ? 0 as atv_withoutback,
? ? ? 0 as total_count_without_60,
? ? ? 0 as total_money_with_back_60,
? ? SUM(case when t1.otherName = '學校'
? ? then 1
? ? else 0
? ? end) as school_order_count, ? ? ?-- 學校下單總數
SUM(case when t1.otherName = '單位'
? ? then 1
? ? else 0
? ? end) as company_order_count, ? ?-- 單位下單總數
SUM(case when t1.otherName = '家里'
? ? then 1
? ? else 0
? ? end) as home_order_count, ? ? ? ?-- 家里下單總數
SUM(case when t1.flagTimeBucket = '上午'
? ? then 1
? ? else 0
? ? end) as am_order_count, ? ? ? ? ?-- 上午下單總數
SUM(case when t1.flagTimeBucket = '下午'
? ? then 1
? ? else 0
? ? end) as pm_order_count, ? ? ? ? ?-- 下午下單總數
SUM(case when t1.flagTimeBucket = '晚上'
? ? then 1
? ? else 0
? ? end) as night_order_count,-- 晚上下單總數
? ? ? --最常用地址
? ? '' most_usual_address,
--常用的支付方式
? ? '' most_usual_paytype
from?
(select * from itcast_dw.dw_fact_order_wide) t1?
left join ?
(select * from itcast_dw.dw_dim_users) t2 on?
t1.userid=t2.userid
group by t2.userid,t2.loginname
-------------------------結果表導出----------------------------------
--1、在Mysql中創建目標數據庫
create database hive_app_result;
--2、導出時間維度分析業務指標數據、
?--2.1 創建Mysql目標表
? create table hive_app_result.app_order_date(
? ? date_type varchar(20), ? -- 時間維度類型
? ? date_val varchar(20), ? ?-- 時間維度值
? ? order_count bigint, -- 訂單筆數
? ? order_money double ?-- 訂單交易額
? );
--2.2 數據導出
? /export/server/sqoop-1.4.7/bin/sqoop export \
? --connect jdbc:mysql://192.168.88.100:3306/hive_app_result \
? --username root \
? --password 123456 \
? --table app_order_date \
? --input-fields-terminated-by '\t' \
? --export-dir /user/hive/warehouse/itcast_app.db/app_order_date
--3、導出地域和商品分類維度指標數據
?--3.1 創建Mysql目標表
? DROP TABLE IF EXISTS hive_app_result.app_trade_order;
? CREATE TABLE IF NOT EXISTS hive_app_result.app_trade_order(
? ? area_type VARCHAR(20), ? ? ? ? ? ? ? -- 區域范圍:區域類型(全國、大區、城市)
? ? region_name VARCHAR(20), ? ? ? ? ? ? -- 區域名稱
? ? city_name VARCHAR(20), ? ? ? ? ? ? ? -- 城市名稱
? ? category_type VARCHAR(20), ? ? ? ? ? --
? ? category_1st_name VARCHAR(20), ? ? ? -- 一級分類名稱
? ? category_2d_name VARCHAR(20), ? ? ? ?-- 二級分類名稱
? ? payment_name VARCHAR(20), ? ? ? ? ? ?-- 支付方式(所有、微信、支付寶、...)
? ? total_count BIGINT, ? ? ? ? ? ? -- 訂單數量
? ? total_goods_num BIGINT, ? ? ? ? -- 商品數量
? ? total_money DOUBLE ? ? ? ? ? ? ?-- 支付金額
? );
--3.2 數據導出
? /export/server/sqoop-1.4.7/bin/sqoop export \
? --connect jdbc:mysql://192.168.88.100:3306/hive_app_result \
? --username root \
? --password 123456 \
? --table app_trade_order \
? --input-fields-terminated-by '\t' \
? --input-null-string '\\N' --input-null-non-string '\\N' \
? --export-dir /user/hive/warehouse/itcast_app.db/app_trade_order
? --4、導出用戶訂單指標數據
? --4.1 創建Mysql目標表
? CREATE TABLE hive_app_result.user_order_measure(
? ? userid VARCHAR(50), ? ? ? ? ? ? ? ? ? ? ? ? ?-- 用戶id
? ? username VARCHAR(50), ? ? ? ? ? ? ? ? ? ? ? ?-- 用戶名稱
? ? first_paytime VARCHAR(50), ? ? ? ? ? ? ? ? ? -- 第一次消費時間
? ? lastest_paytime VARCHAR(50), ? ? ? ? ? ? ? ? -- 最近一次消費時間
? ? first_day_during_days BIGINT, ? ? ? ? ? -- 首單距今時間
? ? lastest_day_durning_days BIGINT, ? ? ? ?-- 尾單距今時間
? ? min_paymoney DOUBLE, ? ? ? ? ? ? ? ? ? ?-- 最小消費金額
? ? max_paymoney DOUBLE, ? ? ? ? ? ? ? ? ? ?-- 最大消費金額
? ? total_count_without_back BIGINT, ? ? ? ?-- 累計消費次數(不含退拒)
? ? total_money_without_back DOUBLE, ? ? ? ?-- 累計消費金額(不含退拒)
? ? total_count_without_back_30 BIGINT, ? ? -- 近30天累計消費次數(不含退拒)
? ? total_money_without_back_30 DOUBLE, ? ? -- 近30天累計消費金額(不含退拒)
? ? total_count_30 BIGINT, ? ? ? ? ? ? ? ? ?-- 近30天累計消費次數(含退拒)
? ? total_money_30 DOUBLE, ? ? ? ? ? ? ? ? ?-- 近30天累計消費金額(含退拒)
? ? atv DOUBLE, ? ? ? ? ? ? ? ? ? ? ? ? ? ? -- 客單價(含退拒)
? ? atv_withoutback DOUBLE, ? ? ? ? ? ? ? ? -- 客單價(不含退拒)
? ? atv_60 DOUBLE, ? ? ? ? ? ? ? ? ? ? ? ? ?-- 近60天客單價(含退拒)
? ? atv_60_withoutback DOUBLE, ? ? ? ? ? ? ?-- 近60天客單價(不含退拒)
? ? school_order_count BIGINT, ? ? ? ? ? ? ?-- 學校下單總數
? ? company_order_count BIGINT, ? ? ? ? ? ? -- 單位下單總數
? ? home_order_count BIGINT, ? ? ? ? ? ? ? ?-- 家里下單總數
? ? am_order_count BIGINT, ? ? ? ? ? ? ? ? ?-- 上午下單總數
? ? pm_order_count BIGINT, ? ? ? ? ? ? ? ? ?-- 下午下單總數
? ? night_order_count BIGINT, ? ? ? ? ? ? ? -- 晚上下單總數
? ? most_usual_address VARCHAR(50), ? ? ? ? ? ? ?-- 常用收貨地址
? ? most_usual_paytype VARCHAR(50) ? ? ? ? ? ? ? -- 常用支付方式
? );
?--4.2 數據導出
?/export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/hive_app_result \
--username root \
--password 123456 \
--table user_order_measure \
--input-fields-terminated-by '\t' \
--export-dir /user/hive/warehouse/itcast_app.db/user_order_measure
---------------數據可視化------------------------------------------------------
--1、啟動superset
superset run -h 192.168.88.100 -p 8099 --with-threads --reload --debugger
--2、訪問superset
?http://192.168.88.100:8099
? 用戶名:admin
? 密碼: 123456
--3、連接Mysql數據庫
?mysql+pymysql://root:123456@192.168.88.100/hive_app_result?charset=utf8
--4、指標可視化?
?--4.1、2020年期間每個季度的銷售訂單筆數、訂單總額可視化
? ?select * from hive_app_result.app_order_date where date_type = '1';
--4.2、2020年期間每個月的銷售訂單筆數可視化
-- 2020-01-1 00:00:00
?SELECT STR_TO_DATE(concat(date_val,"-01"),'%Y-%m-%d') AS TIMESTAMP, order_count,order_money?
?FROM hive_app_result.app_order_date?
?WHERE ?date_type = '2'
--4.3、2020年期間周一到周日的訂單金額可視化
SELECT?
date_val,
CASE WHEN ?date_val = '1' THEN '周一'
?WHEN ?date_val = '2' THEN '周二'
?WHEN ?date_val = '3' THEN '周三'
?WHEN ?date_val = '4' THEN '周四'
?WHEN ?date_val = '5' THEN '周五'
?WHEN ?date_val = '6' THEN '周六'
?WHEN ?date_val = '7' THEN '周日'
END AS week_val,
order_count,
order_money
FROM `app_order_date`
WHERE date_type = '3'
ORDER BY date_val ASC?
--4.4、全國一級商品分類維度的不分支付類型數據可視化
select * from hive_app_result.app_trade_order where category_type = '一級分類' and payment_name = '所有';
--4.5、全國二級商品分類維度的不分支付類型數據可視化
SELECT * FROM hive_app_result.app_trade_order WHERE ?area_type = '全國' AND category_type = '二級分類';
四、業務數據分析
--1、現將日志數據采集到HDFS上.
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
a1.sources.r1.type = TAILDIR
a1.sources.r1.positionFile = /var/log/flume/taildir_position.json
a1.sources.r1.filegroups = f1
a1.sources.r1.filegroups.f1 = /usr/local/nginx/logs/access*.log
# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = /flume/web_log/2021-02-03
a1.sinks.k1.hdfs.filePrefix = events-
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute
a1.sinks.k1.hdfs.rollInterval = 0
a1.sinks.k1.hdfs.rollSize = 4194304
a1.sinks.k1.hdfs.rollCount = 1000000
a1.sinks.k1.hdfs.idleTimeout= 10
a1.sinks.k1.hdfs.batchSize = 100
a1.sinks.k1.hdfs.useLocalTimeStamp = true
#生成的文件類型,默認是Sequencefile,可用DataStream,則為普通文本
a1.sinks.k1.hdfs.fileType = DataStream
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
--2、啟動flume采集
?/export/server/flume-1.8.0/bin/flume-ng agent -c /export/server/flume-1.8.0/conf ?-f /export/server/flume-1.8.0/conf/web_log.conf -n a1 ?-Dflume.root.logger=INFO,console
--3、數據預處理?
?hadoop jar /export/data/mapreduce/web_log.jar cn.itcast.bigdata.weblog.pre.WeblogPreProcess ??
?源文件路徑:/flume/web_log/2021-02-03
?目標路徑:/output/web_log/pre_web_log
--原始日志數據
163.177.71.12 - - [01/Nov/2020:08:31:39 +0000] "HEAD / HTTP/1.1" 200 20 "-" "DNSPod-Monitor/1.0"
163.177.71.12 - - [01/Nov/2020:08:31:42 +0000] "HEAD / HTTP/1.1" 200 20 "-" "DNSPod-Monitor/1.0"
60.247.77.253 - - [01/Nov/2020:08:31:41 +0000] "GET /wp-content/uploads/2020/09/Hadoop-mahout-roadmap.png HTTP/1.1" 200 178536 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1"
--預處理之后的日志數據
true163.177.71.12-2020-11-01 08:28:43/20020"-""DNSPod-Monitor/1.0"
true163.177.71.12-2020-11-01 08:28:46/20020"-""DNSPod-Monitor/1.0"
true60.247.77.253-2020-11-01 08:31:34/wp-content/uploads/2020/09/Hadoop-mahout-roadmap.png200178528"-""Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.1(KHTML,likeGecko)Chrome/21.0.1180.89Safari/537.1"
--pageviews模型
4e7f9897-fc23-4307-93cf-94c97ce1feea163.177.71.12-2020-11-01 08:28:43/13"-""DNSPod-Monitor/1.0"20200
4e7f9897-fc23-4307-93cf-94c97ce1feea163.177.71.12-2020-11-01 08:28:46/2173"-""DNSPod-Monitor/1.0"20200
4e7f9897-fc23-4307-93cf-94c97ce1aaaa163.177.71.12-2020-11-01 15:28:46/14"-""DNSPod-Monitor/1.0"20200
4e7f9897-fc23-4307-93cf-94c97ce1aaaa163.177.71.12-2020-11-01 15:28:50/260"-""DNSPod-Monitor/1.0"20200
0772cabd-5da2-47f7-95fc-484dce095ca360.247.77.253-2020-11-01 08:30:09/category/%e6%95%b0%e6%8d%ae%e6%8c%96%e6%8e%98/18"http://blog.fens.me/category/%e6%95%b0%e6%8d%ae%e6%8c%96%e6%8e%98/""Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.1(KHTML,likeGecko)Chrome/21.0.1180.89Safari/537.1"17504200
0772cabd-5da2-47f7-95fc-484dce095ca360.247.77.253-2020-11-01 08:30:17/hadoop-mahout-roadmap/21"http://blog.fens.me/category/%e6%95%b0%e6%8d%ae%e6%8c%96%e6%8e%98/""Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.1(KHTML,likeGecko)Chrome/21.0.1180.89Safari/537.1"10336200
--得到pageviews模型
hadoop jar /export/data/mapreduce/web_log.jar ?cn.itcast.bigdata.weblog.clickstream.ClickStreamPageView ??
--visits模型
00ebcd4c-9443-4112-828b-582b2e4e3c03183.60.9.2172020-11-01 07:10:572018-11-01 07:10:57/hadoop-hive-intro//hadoop-hive-intro/"-"12
0c371b1b-ccc7-4658-9c71-496e0ffc0f94115.236.11.1942020-11-01 12:25:572018-11-01 12:25:57/hadoop-mahout-roadmap//hadoop-mahout-roadmap/"http://f.dataguru.cn/thread-177375-1-1.html"8
--得到visits模型
hadoop jar /export/data/mapreduce/web_log.jar cn.itcast.bigdata.weblog.clickstream.ClickStreamVisit ? ?
---4、數據庫創建
create database if not exists ?web_log_ods;
create database if not exists ?web_log_dw;
create database if not exists ?web_log_app;
--5、創建ods層表
? --創建清洗后的原始日志表
? drop table if exists web_log_ods.ods_weblog_origin;
? create table web_log_ods.ods_weblog_origin(
? valid string , --有效標記
? remote_addr string, --訪客ip
? remote_user string, --訪客用戶信息
? time_local string, --請求時間
? request string, ?--請求url
? status string, --響應狀態碼
? body_bytes_sent string, --響應字節數
? http_referer string, --來源url
? http_user_agent string --訪客終端信息
? )?
? partitioned by (dt string)
? row format delimited fields terminated by '\001';
? --創建點擊流模型pageviews表
drop table if exists web_log_ods.ods_click_pageviews;
create table ?web_log_ods.ods_click_pageviews(
session string, --會話id
remote_addr string, --訪客ip
remote_user string, --訪客用戶信息
time_local string, --請求時間
request string, --請求url
visit_step string, --訪問步長
page_staylong string, --頁面停留時間(秒)
http_referer string, --來源url
http_user_agent string,--訪客終端信息
body_bytes_sent string,--響應字節數
status string --響應狀態碼
)
partitioned by (dt string)
row format delimited fields terminated by '\001';
--創建點擊流visits模型表
drop table if exists web_log_ods.ods_click_stream_visits;
create table web_log_ods.ods_click_stream_visits(
session ? ? string, --會話id
remote_addr string, --訪客ip
inTime ? ? ?string, --會話訪問起始時間
outTime ? ? string, --會話訪問離開時間
inPage ? ? ?string, --會話訪問起始頁面
outPage ? ? string, --會話訪問離開頁面
referal ? ? string, --來源url
pageVisits ?int --會話頁面訪問數量
)
partitioned by (dt string)
row format delimited fields terminated by '\001';
--表數據加載
load data inpath '/output/web_log/pre_web_log' overwrite into table ?web_log_ods.ods_weblog_origin partition(dt='2021-02-01');
?
load data inpath '/output/web_log/pageviews'overwrite into table web_log_ods.ods_click_pageviews partition(dt='2021-02-01');
?
load data inpath '/output/web_log/visits' overwrite into table web_log_ods.ods_click_stream_visits partition(dt='2021-02-01');
--明細表(寬表)實現
drop table web_log_dw.dw_weblog_detail;
create table web_log_dw.dw_weblog_detail(
valid ? ? ? ? ? string, --有效標識
remote_addr ? ? string, --來源IP
remote_user ? ? string, --用戶標識
time_local ? ? ?string, --訪問完整時間
daystr ? ? ? ? ?string, --訪問日期
timestr ? ? ? ? string, --訪問時間
month ? ? ? ? ? string, --訪問月
day ? ? ? ? ? ? string, --訪問日
hour ? ? ? ? ? ?string, --訪問時
request ? ? ? ? string, --請求的url
status ? ? ? ? ?string, --響應碼
body_bytes_sent string, --傳輸字節數
http_referer ? ?string, --來源url
ref_host ? ? ? ?string, --來源的host
ref_path ? ? ? ?string, --來源的路徑
ref_query ? ? ? string, --來源參數query
ref_query_id ? ?string, --來源參數query的值
http_user_agent string --客戶終端標識
)
partitioned by(dt string)
row format delimited fields terminated by '\001';
--明細表加載數據
2020-11-01 08:28:43
insert into table web_log_dw.dw_weblog_detail partition(dt='2021-02-01')
select c.valid,c.remote_addr,c.remote_user,c.time_local,
substring(c.time_local,1,10) as daystr, --2020-11-01
substring(c.time_local,12) as tmstr, --08:28:43
substring(c.time_local,6,2) as month, --11
substring(c.time_local,9,2) as day, ?---01
substring(c.time_local,12,2) as hour, --08
c.request,c.status,c.body_bytes_sent,c.http_referer,c.ref_host,c.ref_path,c.ref_query,c.ref_query_id,c.http_user_agent
from
(
? ? SELECT
? a.valid,a.remote_addr,a.remote_user,a.time_local,
? a.request,a.status,a.body_bytes_sent,a.http_referer,a.http_user_agent,b.ref_host,b.ref_path,b.ref_query,b.ref_query_id
? FROM web_log_ods.ods_weblog_origin a LATERAL VIEW
? parse_url_tuple(regexp_replace(http_referer, "\"", ""), 'HOST', 'PATH','QUERY', 'QUERY:id') b as ref_host, ref_path, ref_query,
? ref_query_id
?) c;
-- http://www.baidu.com/email/info/a.jpg?id=val1&key2=val2 ? www.baidu.com ? /email/info/a.jpg id=val1&key2=val2 ?val1
--執行sql
hive -f '/export/data/hive_sql/web_log_detail.sql'1
--6、基礎指標分析
192.168.88.5:
? 8:00 ?a.html
? 8:15 ?b.html
? 8:50 ?c.html
? 18:15 d.html
192.168.88.6:
? 8:00 ?c.html
? 8:15 ?d.html
? 8:50 ?e.html
? 18:15 f.html
?web_log_dw.dw_weblog_detail
?select * from ?web_log_dw.dw_weblog_detail limit 10
?--瀏覽頁面次數(pv) ?--->6
?select count(*) as pvs from ?web_log_dw.dw_weblog_detail where valid = true and dt='2021-02-01';
?--獨立訪客(uv) ----->2
?select count(distinct remote_addr) as uvs from ?web_log_dw.dw_weblog_detail where valid = true and dt='2021-02-01';
?--訪問次數(vv) ----->6
select count(session) from ods_click_stream_visits where ?dt='2021-02-01'?
--基礎指標入庫
drop table if exists web_log_app.app_webflow_basic_info;
create table web_log_app.app_webflow_basic_info(date_val string,pvs bigint,uvs bigint,vvs bigint) partitioned by(dt string);
?
--允許笛卡爾積
set spark.sql.crossJoin.enabled=true;
?
insert into table web_log_app.app_webflow_basic_info partition(dt='2021-02-01')
select '2021-02-01',a.*,b.* from
(?
? ?select count(*) as pvs,count(distinct remote_addr) as uvs from web_log_dw.dw_weblog_detail ?where dt='2021-02-01'
) a?
join?
(
?select count(session) as vvs from web_log_ods.ods_click_stream_visits where dt='2021-02-01'
) b;
--基礎指標多維度統計分析:注意gruop by語句的語法
?
1、多維度統計PV總量
--多維度統計分析:注意gruop by語句的語法
?
1.1. 多維度統計PV總量
--第一種方式:直接在web_log_dw.dw_weblog_detail 單表上進行查詢
1.1.1 計算該處理批次(一天)中的各小時pvs
drop table if exists web_log_app.app_pvs_everyhour_oneday;
create table web_log_app.app_pvs_everyhour_oneday(month string,day string,hour string,pvs bigint) partitioned by(dt string);
?
insert into table web_log_app.app_pvs_everyhour_oneday partition(dt='2021-02-01')
select a.month as month,a.day as day,a.hour as hour,count(*) as pvs from web_log_dw.dw_weblog_detail ?a
where ?a.dt='2021-02-01' group by a.month,a.day,a.hour;
?
--計算每天的pvs
drop table if exists web_log_app.app_pvs_everyday;
create table web_log_app.app_pvs_everyday(pvs bigint,month string,day string);
?
insert into table web_log_app.app_pvs_everyday
select count(*) as pvs,a.month as month,a.day as day from web_log_dw.dw_weblog_detail ?a
group by a.month,a.day;
--7、符合指標分析」
--復合指標統計分析
?
--人均瀏覽頁數(平均訪問深度)
?--需求描述:統計今日所有來訪者平均請求的頁面數。
?--總頁面請求數pv/去重總人數uv
?
drop table if exists web_log_app.app_avgpv_user_everydayy;
create table web_log_app.app_avgpv_user_everyday(day string, avgpv string);
? ? ? ??
?
--方式一:
insert into table web_log_app.app_avgpv_user_everyday
select '2021-02-01',pvs/uvs from web_log_app.app_webflow_basic_info;
?
--方式二:
?
insert ?into table web_log_app.app_avgpv_user_everyday
select '2021-02-01',sum(b.pvs)/count(b.remote_addr) from
(select remote_addr,count(*) as pvs from web_log_dw.dw_weblog_detail where dt='2021-02-01' group by remote_addr) b;
--平均訪問時長
?
? ? --平均每次訪問(會話)在網站上的停留時間。
? ? --體現網站對訪客的吸引程度。
? ? --平均訪問時長=訪問總時長/訪問次數。
?
--先計算每次會話的停留時長
?
select session, sum(page_staylong) as web_staylong from web_log_ods.ods_click_pageviews where dt='2021-02-01'
group by session;
?
?
--計算平均訪問時長
select
sum(a.web_staylong)/count(a.session)
from?
(select session, sum(page_staylong) as web_staylong from web_log_ods.ods_click_pageviews where dt='2021-02-01'
group by session) a;
--跳出率
? ? --跳出率是指用戶到網站上僅瀏覽了一個頁面就離開的訪問次數與所有訪問次數的百分比。
? ? --是評價網站性能的重要指標。
?
--/hadoop-mahout-roadmap/ 頁面跳出率
?
--總的訪問次數vv
select vvs from app_webflow_basic_info where dt='2021-02-01';
?
--瀏覽/hadoop-mahout-roadmap/一個頁面 并且離開的人數
select count(*) from web_log_ods.ods_click_stream_visits where dt='2021-02-01'
and pagevisits = 1 and outpage = "/hadoop-mahout-roadmap/";
?
?
--合并計算結果:
select
(b.nums/a.vvs)*100
from web_log_app.app_webflow_basic_info a join (select count(*) as nums from web_log_ods.ods_click_stream_visits where dt='2021-02-01'
and pagevisits = 1 and outpage = "/hadoop-mahout-roadmap/") b;
--熱門頁面統計
--統計最熱門的頁面top10
?
drop table if exists web_log_app.app_hotpages_everyday;
create table web_log_app.app_hotpages_everyday(day string,url string,pvs string);
?
--方式1
insert into table web_log_app.app_hotpages_everyday
select '2021-02-01',a.request,a.request_counts from
(select request as request,count(request) as request_counts?
from web_log_dw.dw_weblog_detail where dt='2021-02-01' group by request having request is not null
) a
order by a.request_counts desc limit 10;
--方式2
insert into table web_log_app.app_hotpages_everyday
select * from
(
SELECT?
? '2021-02-01',a.request,a.request_counts,
? RANK() OVER( ORDER BY a.request_counts desc) AS rn?
? FROM?
? (
? ? select request as request,count(request) as request_counts?
? ? from web_log_dw.dw_weblog_detail where dt='2021-02-01' group by request having request is not null
? )a
)b
where b.rn <= 10
?;
--漏斗模型
--添加模擬數據
load data local inpath '/export/data/hivedatas/click-part-r-00000' overwrite into table web_log_ods.ods_click_pageviews partition(dt='2021-02-01');
?
----------------------------------------------------------
---1、查詢每一個步驟的總訪問人數
--UNION All將多個SELECT語句的結果集合并為一個獨立的結果集
?
create table web_log_app.app_oute_numbs as?
select 'step1' as step,count(distinct remote_addr) ?as numbs from web_log_ods.ods_click_pageviews where dt='2021-02-01' and request like '/item%'
union all
select 'step2' as step,count(distinct remote_addr) ?as numbs from ?web_log_ods.ods_click_pageviews where dt='2021-02-01' and request like '/category%'
union all
select 'step3' as step,count(distinct remote_addr) ?as numbs from ?web_log_ods.ods_click_pageviews where dt='2021-02-01' and request like '/order%'
union all
select 'step4' as step,count(distinct remote_addr) ?as numbs from ?web_log_ods.ods_click_pageviews where dt='2021-02-01' and request like '/index%';
--2、查詢每一步驟相對于路徑起點人數的比例
--級聯查詢,自己跟自己join
?
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs ?from web_log_app.app_oute_numbs rn
inner join?
web_log_app.app_oute_numbs rr;
--每一步的人數/第一步的人數==每一步相對起點人數比例
select tmp.rnstep,tmp.rnnumbs/tmp.rrnumbs * 100 as abs_rate
from
(
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs ?from web_log_app.app_oute_numbs rn
inner join?
web_log_app.app_oute_numbs rr) tmp
where tmp.rrstep='step1'
--3、查詢每一步驟相對于上一步驟的漏出率
--首先通過自join表過濾出每一步跟上一步的記錄
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs ?from web_log_app.app_oute_numbs rn
inner join?
web_log_app.app_oute_numbs rr
where cast(substr(rn.step,5,1) as int)=cast(substr(rr.step,5,1) as int)-1;
?
--然后就可以非常簡單的計算出每一步相對上一步的漏出率
select tmp.rrstep as step,tmp.rrnumbs/tmp.rnnumbs as leakage_rate
from
(
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs ?from web_log_app.app_oute_numbs rn
inner join?
web_log_app.app_oute_numbs rr) tmp
where cast(substr(tmp.rnstep,5,1) as int)=cast(substr(tmp.rrstep,5,1) as int)-1;
--4、匯總以上兩種指標
drop table if exists web_log_app.app_bounce_rate;
create table web_log_app.app_bounce_rate?
(
?step_num string,
?numbs bigint,
?abs_rate double,
?leakage_rate double
);
insert into table web_log_app.app_bounce_rate?
select abs.step,abs.numbs,abs.rate as abs_rate,rel.leakage_rate as leakage_rate
from?
(
select tmp.rnstep as step,tmp.rnnumbs as numbs,tmp.rnnumbs/tmp.rrnumbs * 100 as rate
from
(
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs ?from web_log_app.app_oute_numbs rn
inner join?
web_log_app.app_oute_numbs rr) tmp
where tmp.rrstep='step1'
) abs
left outer join
(
select tmp.rrstep as step,tmp.rrnumbs/tmp.rnnumbs * 100 as leakage_rate
from
(
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs ?from web_log_app.app_oute_numbs rn
inner join?
web_log_app.app_oute_numbs rr) tmp
where cast(substr(tmp.rnstep,5,1) as int)=cast(substr(tmp.rrstep,5,1) as int)-1
) rel
on abs.step=rel.step;
--8、結果表導出------------------------
--創建mysql結果表
CREATE DATABASE web_log_result;
drop table if exists web_log_result.app_webflow_basic_info;
CREATE TABLE web_log_result.app_webflow_basic_info(MONTH VARCHAR(50),DAY VARCHAR(50),pv BIGINT,uv BIGINT,ip BIGINT,vv BIGINT);
/export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/web_log_result \
--username root \
--password 123456 \
--table app_webflow_basic_info \
--input-fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/web_log_app.db/app_webflow_basic_info/dt=2021-02-01
CREATE TABLE web_log_result.app_avgpv_user_everyday(
DAY VARCHAR(50),
avgpv VARCHAR(50)
);
/export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/web_log_result \
--username root \
--password 123456 \
--table app_avgpv_user_everyday \
--input-fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/web_log_app.db/app_avgpv_user_everyday
DROP TABLE IF EXISTS web_log_result.app_hotpages_everyday;
CREATE TABLE web_log_result.app_hotpages_everyday(
?day_value VARCHAR(20),
?url VARCHAR(100),
?pvs BIGINT
);
/export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/web_log_result \
--username root \
--password 123456 \
--table app_hotpages_everyday \
--input-fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/web_log_app.db/app_hotpages_everyday \
--input-null-string '\\N' --input-null-non-string '\\N'
drop table web_log_result.app_pvs_everyhour_oneday;
create table web_log_result.app_pvs_everyhour_oneday
(
month varchar(20),
day varchar(20),
hour varchar(20),
pvs bigint
);
/export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/web_log_result \
--username root \
--password 123456 \
--table app_pvs_everyhour_oneday \
--input-fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/web_log_app.db/app_pvs_everyhour_oneday/dt=2021-02-01 \
--input-null-string '\\N' --input-null-non-string '\\N'
drop table if exists web_log_result.app_bounce_rate;
create table web_log_result.app_bounce_rate?
(
?step_num varchar(50),
?numbs bigint,
?abs_rate double,
?leakage_rate double
);
/export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/web_log_result \
--username root \
--password 123456 \
--table app_bounce_rate \
--input-fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/web_log_app.db/app_bounce_rate \
--input-null-string '\\N' --input-null-non-string '\\N'
--9、數據可視化
--啟動superset
superset run -h 192.168.88.100 -p 8099 --with-threads --reload --debugger
--mysql的url地
? mysql+pymysql://root:123456@192.168.88.100/web_log_result?charset=utf8
--總pv量
? SELECT pv FROM web_log_result.app_webflow_basic_info
--一天各小時pvs的趨勢
SELECT STR_TO_DATE(CONCAT('2020-',MONTH,'-',DAY,HOUR,':00:00'),'%Y-%m-%d %H:%i:%s') AS date_val ,pvs?
FROM app_pvs_everyhour_oneday WHERE MONTH='11' AND DAY = '01';
--統計每日最熱門的頁面top10
SELECT * FROM web_log_result.app_hotpages_everyday;
--漏斗模型
SELECT * FROM web_log_result.app_bounce_rate ORDER BY step_num;
總結
以上是生活随笔為你收集整理的【无标题】数仓实战之亚马逊全球跨境电商分析平台的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 六轴机械臂DIY(三)开源项目介绍
- 下一篇: 北京大学 计算机 排名2015年,北京大