title: 数据去重
date: 2022-04-16 00:00:00
tags: Python SQL EXCEL
description: 数据去重 Python SQL EXCEL
整行去重
Python
简单列表数据,推荐使用集合特性。
data = [1, 3, 3, '3', '2', '3', 1, 3, 2, '1', '2', '3', 1, 2, 3, '1', '2', '3']
def fun1():
"""循环去重"""
new_data = list()
for item in data:
if item not in new_data:
new_data.append(item)
return new_data
def fun2():
"""集合特性set()"""
return list({item for item in data})
def fun3():
"""集合特性set()"""
return list(set(data))
def fun4():
"""字典keys()方式"""
return list({}.fromkeys(data).keys())
if __name__ == '__main__':
print(fun1())
print(fun2())
print(fun3())
print(fun4())
执行结果
[1, 3, '3', '2', 2, '1']
[1, 2, 3, '2', '1', '3']
[1, 2, 3, '2', '1', '3']
[1, 3, '3', '2', 2, '1']
"""字典keys()方式"""
print({}.fromkeys([1,2,3]))
{1: None, 2: None, 3: None}
print({1: None, 2: None, 3: None}.keys())
dict_keys([1, 2, 3])
print({1: None, 2: None, 3: None}.values())
dict_values([None, None, None])
print(list({1: None, 2: None, 3: None}.values()))
[None, None, None]
复杂列表数据,常见为数据库数据查询结果。
data = [
{'a': 'a'},
{'a': 'a'},
{'b': 'a'},
{'a': 'a'},
{'c': 'd'},
]
def fun1():
"""循环去重"""
new_data = list()
for item in data:
if item not in new_data:
new_data.append(item)
return new_data
# 结果 [{'a': 'a'}, {'b': 'a'}, {'c': 'd'}]
def fun2():
"""集合特性set()"""
return list(set(data))
# 报错 TypeError: unhashable type: 'dict'
def fun3():
"""字典keys()方式"""
return list({}.fromkeys(data).keys())
# 报错 TypeError: unhashable type: 'dict'
SQL
DISTINCT
SELECT DISTINCT t1.AlbumId, t1.Title,
FROM Album t1
left join Artist t2
order by t1.AlbumId ;
-- 注意:排序必须使用 DISTINCT 内的字段
SELECT DISTINCT t1.AlbumId, t1.Title,
FROM Album t1
left join Artist t2
order by t1.Title ;
SQL 错误 [1]: [SQLITE_ERROR] SQL error or missing database (near "FROM": syntax error)
EXCEL
数据–>删除重复值
按照某一列去重
或者是自定义筛选数据
Python
"""key 'a' 相同则取 key 'b' 最大的"""
data = [
{'a': 1, 'b': 2, 'c': 3},
{'a': 3, 'b': 2, 'c': 3},
{'a': 2, 'b': 2, 'c': 3},
{'a': 1, 'b': 5, 'c': 8},
{'a': 1, 'b': 0, 'c': 8},
]
def fun1():
l1 = list()
for da in data:
if not l1:
l1.append(da)
l2 = list({line['a'] for line in l1})
if da['a'] in l2:
for line in l1:
if da['a'] == line['a'] and da['b'] > line['b']:
l1.remove(line)
l1.append(da)
else:
l1.append(da)
return l1
if __name__ == '__main__':
for item in fun1():
print(item)
执行结果
{'a': 3, 'b': 2, 'c': 3}
{'a': 2, 'b': 2, 'c': 3}
{'a': 1, 'b': 5, 'c': 8}
发表回复
要发表评论,您必须先登录。