Skip to content

GitLab

  • Projects
  • Groups
  • Snippets
  • Help
    • Loading...
  • Help
    • Help
    • Support
    • Community forum
    • Submit feedback
    • Contribute to GitLab
  • Sign in / Register
W
wiki-hadoop
  • Project overview
    • Project overview
    • Details
    • Activity
  • Issues 0
    • Issues 0
    • List
    • Boards
    • Labels
    • Service Desk
    • Milestones
  • Merge requests 0
    • Merge requests 0
  • Operations
    • Operations
    • Incidents
  • Analytics
    • Analytics
    • Value Stream
  • Wiki
    • Wiki
  • Members
    • Members
  • Activity
  • Create a new issue
  • Issue Boards
Collapse sidebar
  • 老徐
  • wiki-hadoop
  • Wiki
  • Hive_Basic_Operation

Hive_Basic_Operation · Changes

Page history
yangbl created page: Hive_Basic_Operation authored Sep 08, 2015 by 杨宝龙's avatar 杨宝龙
Show whitespace changes
Inline Side-by-side
Showing with 111 additions and 0 deletions
+111 -0
  • Hive_Basic_Operation.md Hive_Basic_Operation.md +111 -0
  • No files found.
Hive_Basic_Operation.md 0 → 100644
View page @ 187d5bb0
# 配置信息:
## 使用gdpi.sada_gdpi_click库时必须加上这段权限
set /user/gdpi/public/sada_gdpi_click.password=GWDPI-SH;
## 解决分片数过多问题
set mapreduce.job.max.split.locations = 10000;
## 配置reduce数量
SET mapred.reduce.tasks = 80;
## 配置使用内存
SET mapreduce.map.memory.mb=3048;
SET mapreduce.reduce.memory.mb=7000;
## 配置Java虚拟机内存
SET mapreduce.map.java.opts='-Xmx6096m';
SET mapreduce.reduce.java.opts='-Xmx10000m';
## 配置mapjoin
SET hive.auto.convert.join=false;
SET hive.ignore.mapjoin.hint=false;
## 配置是否使用压缩
SET hive.exec.compress.output=true;
## 配置压缩格式(参考HIVE配置)
SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
## 加载自定义UDF
ADD JAR ${udf_path};
## 配置自定义UDF函数(参考自定义UDF库)
CREATE TEMPORARY FUNCTION my_dev AS 'udf.tag.ua.DeviceTypeUDF';
hive内置的udf参见:<https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF>
# 基本操作命令:
## 进入hive环境
hive;
## 创建数据仓库
create database e_pingan WITH DBPROPERTIES ('creator' = 'yangbl', 'date' = '20150430');
## 查看数据仓库
SHOW DATABASES;
## 进入数据库
use e_pingan;
## 查看数据表
show tables;
## 生成可用数据表
add jar /home/xuji/yang/hive/udf.jar;
create temporary function my_decode as 'udf.FullDecodeUDF';
create temporary function my_dev as 'udf.DeviceUDF';
create table zhangwj.touna_used
row format delimited fields terminated by '\t'
location '/user/xuji/yang/test/touna'
as
select srcip,datelabel,loadstamp,my_decode(url) as url,my_decode(cast(unbase64(ref) as string)) as ref,
case
when cast(unbase64(ref) as string) like '%.baidu.%/%wd=%' then my_decode(parse_url(cast(unbase64(ref) as string), 'QUERY', 'wd'))
when cast(unbase64(ref) as string) like '%.baidu.%/%word=%' then my_decode(parse_url(cast(unbase64(ref) as string), 'QUERY', 'word'))
when cast(unbase64(ref) as string) like '%.baidu.%/%kw=%' then my_decode(parse_url(cast(unbase64(ref) as string), 'QUERY', 'kw'))
when cast(unbase64(ref) as string) like '%.haosou.%/%q=%' then my_decode(parse_url(cast(unbase64(ref) as string), 'QUERY', 'q'))
when cast(unbase64(ref) as string) like '%.sogou.%/%query=%' then my_decode(parse_url(cast(unbase64(ref) as string), 'QUERY', 'query'))
when cast(unbase64(ref) as string) like '%.bing.%/%q=%' then my_decode(parse_url(cast(unbase64(ref) as string), 'QUERY', 'q'))
when cast(unbase64(ref) as string) like '%.google.%/%q=%' then my_decode(parse_url(cast(unbase64(ref) as string), 'QUERY', 'q'))
else null end as kw,
my_dev(cast(unbase64(ua) as string)) as ua
from touna;
## 查询结果保存到本地
insert overwrite local directory '/home/xuji/zhangwj/task' row format delimited fields terminated by '\t'
select search_keyword,sum(pv) as kpv from yangbl.pingan_data_kw_20150302 group by search_keyword order by kpv desc limit 20000;
# hive脚本的使用
参见具体的脚本
# shell脚本的使用
参见具体脚本
# mr程序的编写和运行
参见具体程序
Clone repository
  • Hive_Basic_Configuration
  • Hive_Basic_HQL
  • Hive_Basic_Operation
  • Hive_Table_Operation
  • Home