BeautifulSoup删除html文档中所有属性 发表于 2017-07-14 | 阅读次数 删除html文档中所有属性1234567from bs4 import BeautifulSoup# remove all attributesdef _remove_all_attrs(soup): for tag in soup.find_all(True): tag.attrs = {} return soup 删除html文档中除了某些tag(<a> <img>)的所有属性123456789from bs4 import BeautifulSoup# remove all attributes except some tagsdef _remove_all_attrs_except(soup): whitelist = ['a','img'] for tag in soup.find_all(True): if tag.name not in whitelist: tag.attrs = {} return soup 删除html文档中除了某些tag(<a> <img>)保留特定属性(href src)的所有属性1234567891011121314from bs4 import BeautifulSoup# remove all attributes except some tags(only saving ['href','src'] attr)def _remove_all_attrs_except_saving(soup): whitelist = ['a','img'] for tag in soup.find_all(True): if tag.name not in whitelist: tag.attrs = {} else: attrs = dict(tag.attrs) for attr in attrs: if attr not in ['src','href']: del tag.attrs[attr] return soup 欢迎关注我的微信公众号:Python大师兄