页面加载中...
集合(set)是 Python 中的一种数据类型,它是一个无序的、不重复的元素序列。集合中的元素必须是不可变类型(如数字、字符串、元组),但集合本身是可变的。
集合的主要特点:无序性(集合中的元素没有固定的顺序)、唯一性(集合中不能有重复的元素)、可变性(可以添加或删除元素)、元素不可变(集合中的元素必须是不可变类型)。
Python 提供了多种创建集合的方法:
# 1. 使用花括号创建集合
my_set = {1, 2, 3, 4, 5}
print(my_set) # 输出: {1, 2, 3, 4, 5}
# 2. 使用 set() 函数创建空集合
empty_set = set()
print(empty_set) # 输出: set()
# 3. 从列表创建集合(自动去重)
list_data = [1, 2, 2, 3, 3, 4]
set_from_list = set(list_data)
print(set_from_list) # 输出: {1, 2, 3, 4}
# 4. 从字符串创建集合
string_set = set("hello")
print(string_set) # 输出: {'h', 'e', 'l', 'o'}
# 5. 从元组创建集合
tuple_set = set((1, 2, 3, 4))
print(tuple_set) # 输出: {1, 2, 3, 4}注意:不能使用 创建空集合,因为 会创建一个空字典。要创建空集合必须使用 set()。
# 使用 add() 添加单个元素
fruits = {"apple", "banana"}
fruits.add("orange")
print(fruits) # 输出: {'apple', 'banana', 'orange'}
# 添加已存在的元素不会有任何效果
fruits.add("apple")
print(fruits) # 输出: {'apple', 'banana', 'orange'}# 使用 update() 添加多个元素
numbers = {1, 2, 3}
numbers.update([4, 5, 6])
print(numbers) # 输出: {1, 2, 3, 4, 5, 6}
# 可以同时添加多个可迭代对象
numbers.update([7, 8], {9, 10}, "ab")
print(numbers) # 输出: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 'a', 'b'}# 使用 remove() 删除指定元素
colors = {"red", "green", "blue", "yellow"}
colors.remove("blue")
print(colors) # 输出: {'red', 'green', 'yellow'}
# 如果元素不存在会抛出 KeyError
try:
colors.remove("purple")
except KeyError:
print("元素不存在")# 使用 discard() 删除指定元素
colors = {"red", "green", "blue", "yellow"}
colors.discard("blue")
print(colors) # 输出: {'red', 'green', 'yellow'}
# 如果元素不存在不会抛出错误
colors.discard("purple") # 不会报错
print(colors) # 输出: {'red', 'green', 'yellow'}# 使用 pop() 随机删除并返回一个元素
numbers = {1, 2, 3, 4, 5}
removed = numbers.pop()
print(f"删除的元素: {removed}")
print(f"剩余元素: {numbers}")
# 对空集合使用 pop() 会抛出 KeyError
empty_set = set()
try:
empty_set.pop()
except KeyError:
print("空集合无法 pop")# 使用 clear() 清空集合
animals = {"cat", "dog", "bird"}
print(f"清空前: {animals}")
animals.clear()
print(f"清空后: {animals}") # 输出: set()# 使用 in 和 not in 检查元素是否存在
fruits = {"apple", "banana", "orange"}
print("apple" in fruits) # 输出: True
print("grape" in fruits) # 输出: False
print("grape" not in fruits) # 输出: True
# 集合的成员检测比列表更高效
import time
# 创建大型数据集
large_list = list(range(100000))
large_set = set(range(100000))
# 测试列表查找时间
start = time.time()
99999 in large_list
list_time = time.time() - start
# 测试集合查找时间
start = time.time()
99999 in large_set
set_time = time.time() - start
print(f"列表查找时间: {list_time:.6f}秒")
print(f"集合查找时间: {set_time:.6f}秒")# 获取集合长度
numbers = {1, 2, 3, 4, 5}
print(len(numbers)) # 输出: 5
# 复制集合
original = {1, 2, 3}
copied = original.copy()
print(copied) # 输出: {1, 2, 3}
# 验证是不同的对象
print(original is copied) # 输出: False
copied.add(4)
print(f"原集合: {original}") # 输出: {1, 2, 3}
print(f"复制集合: {copied}") # 输出: {1, 2, 3, 4}集合支持数学上的集合运算,这是集合最强大的功能之一。
# 使用 union() 方法求并集
set1 = {1, 2, 3}
set2 = {3, 4, 5}
set3 = {5, 6, 7}
# 两个集合的并集
union_result = set1.union(set2)
print(union_result) # 输出: {1, 2, 3, 4, 5}
# 多个集合的并集
union_multiple = set1.union(set2, set3)
print(union_multiple) # 输出: {1, 2, 3, 4, 5, 6, 7}# 使用 | 运算符求并集
set1 = {1, 2, 3}
set2 = {3, 4, 5}
union_result = set1 | set2
print(union_result) # 输出: {1, 2, 3, 4, 5}
# 多个集合的并集
set3 = {5, 6, 7}
union_multiple = set1 | set2 | set3
print(union_multiple) # 输出: {1, 2, 3, 4, 5, 6, 7}# 使用 intersection() 方法求交集
set1 = {1, 2, 3, 4}
set2 = {3, 4, 5, 6}
set3 = {4, 5, 6, 7}
# 两个集合的交集
intersection_result = set1.intersection(set2)
print(intersection_result) # 输出: {3, 4}
# 多个集合的交集
intersection_multiple = set1.intersection(set2, set3)
print(intersection_multiple) # 输出: {4}# 使用 & 运算符求交集
set1 = {1, 2, 3, 4}
set2 = {3, 4, 5, 6}
intersection_result = set1 & set2
print(intersection_result) # 输出: {3, 4}
# 多个集合的交集
set3 = {4, 5, 6, 7}
intersection_multiple = set1 & set2 & set3
print(intersection_multiple) # 输出: {4}# 使用 difference() 方法求差集
set1 = {1, 2, 3, 4, 5}
set2 = {4, 5, 6, 7}
# set1 - set2:在 set1 中但不在 set2 中的元素
diff_result = set1.difference(set2)
print(diff_result) # 输出: {1, 2, 3}
# set2 - set1:在 set2 中但不在 set1 中的元素
diff_result2 = set2.difference(set1)
print(diff_result2) # 输出: {6, 7}# 使用 - 运算符求差集
set1 = {1, 2, 3, 4, 5}
set2 = {4, 5, 6, 7}
diff_result = set1 - set2
print(diff_result) # 输出: {1, 2, 3}
diff_result2 = set2 - set1
print(diff_result2) # 输出: {6, 7}# 使用 symmetric_difference() 方法求对称差集
set1 = {1, 2, 3, 4}
set2 = {3, 4, 5, 6}
# 对称差集:在两个集合中但不在交集中的元素
sym_diff = set1.symmetric_difference(set2)
print(sym_diff) # 输出: {1, 2, 5, 6}# 使用 ^ 运算符求对称差集
set1 = {1, 2, 3, 4}
set2 = {3, 4, 5, 6}
sym_diff = set1 ^ set2
print(sym_diff) # 输出: {1, 2, 5, 6}# 子集判断
set1 = {1, 2}
set2 = {1, 2, 3, 4}
# issubset() 方法或 <= 运算符
print(set1.issubset(set2)) # 输出: True
print(set1 <= set2) # 输出: True
# 真子集判断(严格子集)
print(set1 < set2) # 输出: True
print(set2 < set2) # 输出: False(自己不是自己的真子集)
# 超集判断
print(set2.issuperset(set1)) # 输出: True
print(set2 >= set1) # 输出: True
# 真超集判断
print(set2 > set1) # 输出: True# 判断两个集合是否不相交(没有共同元素)
set1 = {1, 2, 3}
set2 = {4, 5, 6}
set3 = {3, 4, 5}
print(set1.isdisjoint(set2)) # 输出: True(不相交)
print(set1.isdisjoint(set3)) # 输出: False(有交集)类似于列表推导式,Python 也支持集合推导式:
# 基本集合推导式
squares = {x**2 for x in range(1, 6)}
print(squares) # 输出: {1, 4, 9, 16, 25}
# 带条件的集合推导式
even_squares = {x**2 for x in range(1, 11) if x % 2 == 0}
print(even_squares) # 输出: {4, 16, 36, 64, 100}
# 从字符串创建字符集合
text = "hello world"
unique_chars = {char.lower() for char in text if char.isalpha()}
print(unique_chars) # 输出: {'h', 'e', 'l', 'o', 'w', 'r', 'd'}
# 嵌套集合推导式
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
all_elements = {element for row in matrix for element in row}
print(all_elements) # 输出: {1, 2, 3, 4, 5, 6, 7, 8, 9}冻结集合是不可变的集合类型,可以作为字典的键或其他集合的元素:
# 创建冻结集合
frozen1 = frozenset([1, 2, 3, 4])
frozen2 = frozenset("hello")
print(frozen1) # 输出: frozenset({1, 2, 3, 4})
print(frozen2) # 输出: frozenset({'h', 'e', 'l', 'o'})
# 冻结集合支持所有集合运算
frozen3 = frozenset([3, 4, 5, 6])
print(frozen1 & frozen3) # 输出: frozenset({3, 4})
print(frozen1 | frozen3) # 输出: frozenset({1, 2, 3, 4, 5, 6})
# 冻结集合可以作为字典的键
dict_with_frozen_keys = {
frozenset([1, 2]): "first",
frozenset([3, 4]): "second"
}
print(dict_with_frozen_keys)
# 冻结集合可以作为集合的元素
set_of_frozensets = {
frozenset([1, 2]),
frozenset([3, 4]),
frozenset([5, 6])
}
print(set_of_frozensets)# 列表去重
def remove_duplicates(lst):
return list(set(lst))
numbers = [1, 2, 2, 3, 3, 4, 5, 5]
unique_numbers = remove_duplicates(numbers)
print(unique_numbers) # 输出: [1, 2, 3, 4, 5]
# 保持原始顺序的去重
def remove_duplicates_ordered(lst):
seen = set()
result = []
for item in lst:
if item not in seen:
seen.add(item)
result.append(item)
return result
ordered_unique = remove_duplicates_ordered(numbers)
print(ordered_unique) # 输出: [1, 2, 3, 4, 5]# 查找用户的共同兴趣
user1_interests = {"编程", "音乐", "电影", "旅行"}
user2_interests = {"音乐", "电影", "读书", "运动"}
user3_interests = {"编程", "电影", "游戏", "旅行"}
# 找到所有用户的共同兴趣
common_interests = user1_interests & user2_interests & user3_interests
print(f"共同兴趣: {common_interests}") # 输出: {'电影'}
# 找到至少两个用户共享的兴趣
shared_by_two = (user1_interests & user2_interests) | (user1_interests & user3_interests) | (user2_interests & user3_interests)
print(f"至少两人共享: {shared_by_two}") # 输出: {'编程', '音乐', '电影', '旅行'}# 用户权限管理系统
class User:
def __init__(self, name, permissions):
self.name = name
self.permissions = set(permissions)
def has_permission(self, permission):
return permission in self.permissions
def add_permission(self, permission):
self.permissions.add(permission)
def remove_permission(self, permission):
self.permissions.discard(permission)
def get_common_permissions(self, other_user):
return self.permissions & other_user.permissions
# 创建用户
admin = User("Admin", ["read", "write", "delete", "execute"])
editor = User("Editor", ["read", "write"])
viewer = User("Viewer", ["read"])
# 检查权限
print(admin.has_permission("delete")) # 输出: True
print(editor.has_permission("delete")) # 输出: False
# 查找共同权限
common = admin.get_common_permissions(editor)
print(f"Admin和Editor的共同权限: {common}") # 输出: {'read', 'write'}# 分析网站访问数据
def analyze_visitors(daily_visitors):
"""分析每日访客数据"""
all_visitors = set()
daily_sets = []
# 将每日访客转换为集合
for day, visitors in daily_visitors.items():
day_set = set(visitors)
daily_sets.append((day, day_set))
all_visitors.update(day_set)
print(f"总访客数: {len(all_visitors)}")
# 找到每天都访问的忠实用户
if daily_sets:
loyal_visitors = daily_sets[0][1]
for day, visitors in daily_sets[1:]:
loyal_visitors &= visitors
print(f"忠实访客: {loyal_visitors}")
# 找到只访问一天的用户
single_day_visitors = set()
for i, (day1, visitors1) in enumerate(daily_sets):
day_only = visitors1.copy()
for j, (day2, visitors2) in enumerate(daily_sets):
if i != j:
day_only -= visitors2
if day_only:
single_day_visitors.update(day_only)
print(f"单日访客: {single_day_visitors}")
# 示例数据
visitor_data = {
"Monday": ["user1", "user2", "user3", "user4"],
"Tuesday": ["user2", "user3", "user5", "user6"],
"Wednesday": ["user1", "user3", "user4", "user7"]
}
analyze_visitors(visitor_data)# 集合操作的时间复杂度演示
import time
import random
def time_operation(operation, data_size):
"""测试操作的执行时间"""
# 创建测试数据
test_list = list(range(data_size))
test_set = set(range(data_size))
# 随机选择要查找的元素
target = random.randint(0, data_size - 1)
if operation == "membership_list":
start = time.time()
result = target in test_list
return time.time() - start
elif operation == "membership_set":
start = time.time()
result = target in test_set
return time.time() - start
# 比较不同数据大小下的性能
sizes = [1000, 10000, 100000]
for size in sizes:
list_time = time_operation("membership_list", size)
set_time = time_operation("membership_set", size)
print(f"数据大小: {size}")
print(f"列表查找时间: {list_time:.6f}秒")
print(f"集合查找时间: {set_time:.6f}秒")
print(f"性能提升: {list_time/set_time:.1f}倍")
print("-" * 30)# 1. 需要去重时
def get_unique_words(text):
words = text.lower().split()
return set(words)
# 2. 需要快速成员检测时
def has_common_elements(list1, list2):
set1 = set(list1)
return any(item in set1 for item in list2)
# 3. 需要集合运算时
def find_missing_permissions(required, current):
return set(required) - set(current)
# 4. 需要统计唯一元素数量时
def count_unique_visitors(log_entries):
visitors = {entry['user_id'] for entry in log_entries}
return len(visitors)# 1. 集合元素必须是不可变类型
valid_set = {1, "hello", (1, 2), frozenset([3, 4])}
# 以下会报错:
# invalid_set = {[1, 2], {3, 4}} # 列表和集合是可变的
# 2. 集合是无序的,不要依赖元素顺序
numbers = {3, 1, 4, 1, 5, 9, 2, 6}
print(numbers) # 输出顺序可能不同
# 3. 空集合的创建
empty_set = set() # 正确
# empty_set = {} # 错误,这是空字典
# 4. 集合不支持索引
my_set = {1, 2, 3}
# print(my_set[0]) # 错误,集合不支持索引
# 如果需要获取集合中的元素,可以转换为列表或使用循环
first_element = next(iter(my_set)) # 获取任意一个元素
print(first_element)Python 集合是一种强大的数据结构,具有以下特点:
通过掌握集合的这些特性和用法,你可以编写更高效、更简洁的 Python 代码。