当前位置: 代码迷 >> 综合 >> python数据分析十五:pandas矩阵数据的删除重复数据和重命名(duplicated、rename)
  详细解决方案

python数据分析十五:pandas矩阵数据的删除重复数据和重命名(duplicated、rename)

热度:6   发布时间:2023-12-27 05:59:55.0
# -*- coding: utf-8 -*-
import pandas as pdfrom pandas import Series,DataFrameimport numpy as np'''
移除重复数据
'''
data=DataFrame({'k1':['one']*4+['two']*3,'k2':[1,1,2,3,3,4,4]})
print(data)
#   k1  k2
# 0  one   1
# 1  one   1
# 2  one   2
# 3  one   3
# 4  two   3
# 5  two   4
# 6  two   4#判断是否是重复行
print(data.duplicated())
# 0    False
# 1     True
# 2    False
# 3    False
# 4    False
# 5    False
# 6     True
# dtype: bool#返回去重行
print(data.drop_duplicates())
#   k1  k2
# 0  one   1
# 2  one   2
# 3  one   3
# 4  two   3
# 5  two   4#重新赋值一列
data['v1']=range(7)
print(data)
#   k1  k2  v1
# 0  one   1   0
# 1  one   1   1
# 2  one   2   2
# 3  one   3   3
# 4  two   3   4
# 5  two   4   5
# 6  two   4   6#指定重复列
print(data.drop_duplicates(['k1']))
#     k1  k2  v1
# 0  one   1   0
# 4  two   3   4#传入take_last保存最后一个
print(data.drop_duplicates(['k1','k2'],keep='last'))
#   k1  k2  v1
# 1  one   1   1
# 2  one   2   2
# 3  one   3   3
# 4  two   3   4
# 6  two   4   6'''
利用函数或者映射进行数据转换
'''
data=DataFrame({'key':['北京','上海','广州','深圳','上海','广州','深圳'],'value':[11,22,33,44,66,77,88]})#编写映射文档
ys={'北京':'烤鸭','上海':'娘娘腔','广州':'叶问','深圳':'腾讯'}data['logo']=data['key'].map(ys)
print(data)
#  key  value logo
# 0  北京     11   烤鸭
# 1  上海     22  娘娘腔
# 2  广州     33   叶问
# 3  深圳     44   腾讯
# 4  上海     66  娘娘腔
# 5  广州     77   叶问
# 6  深圳     88   腾讯#lambda函数
data['logo']=data['key'].map(lambda x:ys[x])print(data)
#  key  value logo
# 0  北京     11   烤鸭
# 1  上海     22  娘娘腔
# 2  广州     33   叶问
# 3  深圳     44   腾讯
# 4  上海     66  娘娘腔
# 5  广州     77   叶问
# 6  深圳     88   腾讯'''
替换值replace
'''
data2=Series([1,2,3,5,4,6,7])
print(data2)print(data2.replace([1],'换1'))print(data2.replace([1,2],['换1','换2']))print(data2.replace({2:'huan',3:000}))'''
修改索引名称
'''
data=DataFrame(np.arange(12).reshape(3,4),index=['Hao','Haong','Bo'],columns=list('name'))
print(data)
#Index(['HAO', 'HAONG', 'BO'], dtype='object')
print(data.index.map(str.upper))#获取索引名,将索引名字母大写uppper后面没有括号#赋值给索引
data.index=data.index.map(str.upper)
print(data)
#    n  a   m   e
# HAO    0  1   2   3
# HAONG  4  5   6   7
# BO     8  9  10  11#修改列名print(data.rename(index=str.title,columns=str.upper))
#      N  A   M   E
# Hao    0  1   2   3
# Haong  4  5   6   7
# Bo     8  9  10  11print(data.rename(index={'HAO':'郝'},columns={'n':'NN'}))
#    NN  a   m   e
# 郝       0  1   2   3
# HAONG   4  5   6   7
# BO      8  9  10  11