HBase 1

RDB优化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
【大数据不强调一致性,只需要最终一致即可】

-- Mysql优化:表分区,分表,分库,主从复制读写分离,集群
-- 分表
-- Mysql
-- Table_A 自增列 start=1, step=2
-- Table_B 自增列 start=1, step=2

-- 读写分离
-- 主从
-- MyCat
-- 增删改:写 => 主表 强一致性:InnoDB
-- 查询:读 => 从表 弱一致性:MyIsam
-- 主从表之间 => 配置 binglog实现主从复制,实现主从表的数据同步

-- 【缓存数据库:哨兵,集群】
-- Redis
-- 【定期】将关系型数据库中的数据【同步】至 Redis : 实时性(一致性)
-- 后端查询都从 Redis 走

-- 【HBase】
-- 吞(写)吐(读)
-- 吞:HLog,WAL
-- 吐:MS,StoreDFile,BlockCache,BloomFilter

Master

1
2
3
4
5
6
-- 1、是HBase集群中的主节点,可以配置多个,用来实现HA
-- 2、处理元数据的变更
-- 3、监控RegionServer(心跳)
-- 4、负责RegionServer的负载均衡
-- 5、处理RegionServer故障转移(根据Hlog)
-- 6、通过ZooKeeper发布自己的位置给客户端

RegionServer

1
2
3
4
5
6
7
8
9
10
11
12
13
·RegionServer辅助管理维护Region,负责存储HBase实际数据
·一个RegionServer包含一个WAL,一个BlockCache(读缓存)和多个Region
·一个Region包含多个存储区MenStore组成
·一个StoreFile对应于一个HFile和一个列族
·HFile和WAL作为序列化文件保存在HDFS上
·Client和RegionServer交互
功能:
·负责管理HBase的实际数据
·处理分配给他的Region
·刷新缓存到HDFS
·维护HLog
·执行Compaction
·负责处理Region分片

Region和Table关系

1
2
3
4
·单个Table(表)被分区成大小大致相同的Region
·Region是HBase集群分布数据的最小单位
·Region被分配给集群中的RegionServer
·一个Region只能分配给一个RegionServer

Hbase DDL操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#查看自己创建的命令空间列表
list_namespace
# default
# hbase
# hbase_test

# 查看所有表
list

# 查看指定命名空间(库)中的表
list_namespace_tables "hbase_test"

# 创建表:
# BLOOMFILTER_TYPE: ROW、ROWCOL、NONE
# TTL: TTL以秒为单位
create "hbase_test:student_info","base","score"
create "hbase_test:student_info",{NAME => 'base',BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1', KEEP_DELETED_CELLS => 'TRUE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'score',BLOOMFILTER => 'ROWCOL', IN_MEMORY => 'false', VERSIONS => '3', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE', TTL => '86400', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '655360', REPLICATION_SCOPE => '0'}

# 查看表定义
desc[dibe] "hbase_test:student_info"
# Table hbase_test:student_info is ENABLED
# hbase_test:student_info
# COLUMN FAMILIES DESCRIPTION
# {NAME => 'base', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1', KEEP_DELETED_CELLS => 'TRUE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE
', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
# {NAME => 'score', BLOOMFILTER => 'ROWCOL', IN_MEMORY => 'false', VERSIONS => '3', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION =>
'NONE', TTL => '86400 SECONDS (1 DAY)', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '655360', REPLICATION_SCOPE => '0'}

# 查看表的状态
is_enabled "hbase_test:student_info" # 是否已启用
is_disabled "hbase_test:student_info" # 是都已禁用
enabled "hbase_test:student_info" # 启用表
disabled "hbase_test:student_info" # 禁用表

# 删除表:禁用状态的表才可以删除
drop "hbase_test:student_info"

Hbase DML操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# 添加数据
put "ns:tn","rowkey","cf:cn","value"
put "hbase_test:student_info","1","base:name","张老三"
put "hbase_test:student_info","1","base:age",22
put "hbase_test:student_info","1","base:gender","男"
put "hbase_test:student_info","1","score:hive",72
put "hbase_test:student_info","1","score:hbase",88

put "hbase_test:student_info","2","base:name","张老二"
put "hbase_test:student_info","2","base:age",26
put "hbase_test:student_info","2","base:gender","女"
put "hbase_test:student_info","2","score:hive",66
put "hbase_test:student_info","2","score:hbase",35
put "hbase_test:student_info","3","base:name","张老大"
put "hbase_test:student_info","3","base:age",35
put "hbase_test:student_info","3","base:gender","男"
put "hbase_test:student_info","3","score:hive",89
put "hbase_test:student_info","3","score:hbase",90

# 删除数据
delete "hbase_test:student_info","1","base:name" # 删除一个单元格(Cell)
deleteall "hbase_test:student_info","2" # 删除整行
# ROWPREFIXLTER:支持行键前缀批量删除,CACHE: 修改批量的值
deleteall "hbase_test:student_info",{ROWPREFIXLTER=>"TS(时间戳)|STR",CACHE=>100}
truncate "hbase_test:student_info" # 删除表所有信息

# 查看全表数据
scan "hbase_test:student_info"
# 查看某列的值
get "ns:tn","rowkey","cf:cn"
scan "ns:tn"
get "hbase_test:student_info","1" # 查一行
get "hbase_test:student_info","1","base" # 查一行一个列族
get "hbase_test:student_info","1","base:age" # 查一行一个列族

# 自增
inqr "[namespace:]TABLE","ROW_KEY","sc:sn",N
# incr "hbase_test:student_info","4","score:count",2
get_counter "[namespace:]TABLE","ROW_KEY","cf:cn"
# incr "hbase_test:student_info","4","score:count",2

# 导入数据
hbase org.apache.hadoop.hbase.mapreduce.ImportTsv \
-Dimporttsv.separator="分隔符" \
-Dimporttsv.columns="字段映射关系" \
"命名空间:表名字" \
"文件路径"

hbase org.apache.hadoop.hbase.mapreduce.ImportTsv \
-Dimporttsv.separator="," \
-Dimporttsv.columns=HBASE_ROW_KEY,base:name,base:age,score:hive,score:hbase \
hbase_test:student_info \
file:///root/hbase/students_for_import.csv

# hive 表映射 hbase 表
create external table yb12211_2.student_from_hbase(
stu_id int,
stu_name string,
stu_age int,
score_hive int,
score_hbase int
)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties("hbase.columns.mapping"=":key,base:name,base:age,score:hive,score:hbase")
tblproperties("hbase.table.name"="hbase_test:student_info");

HBase 1
https://leaf-domain.gitee.io/2025/03/22/bigdata/hbase/RDB优化/
作者
叶域
发布于
2025年3月22日
许可协议