python比较两个二进制文件_python三种方法判断文件是否为二进制文件
兩種方法判斷文件是否為二進制文件,最準確的就是把這兩種方法結合起來。
(1)方法一:
它首先檢查文件是否以BOM開始,如果不在初始8192字節內查找零字節:
import codecs
file_path = "/home/ubuntu/zgd/ztest/_gs418_510txp_v6.6.2.7.stk.extracted/test"
#: BOMs to indicate that a file is a text file even if it contains zero bytes.
_TEXT_BOMS = (
codecs.BOM_UTF16_BE,
codecs.BOM_UTF16_LE,
codecs.BOM_UTF32_BE,
codecs.BOM_UTF32_LE,
codecs.BOM_UTF8,
)
def is_binary_file(file_path):
with open(file_path, 'rb') as file:
initial_bytes = file.read(8192)
file.close()
return not any(initial_bytes.startswith(bom) for bom in _TEXT_BOMS) and b'\0' in initial_bytes
if __name__ == "__main__":
print is_binary_file(file_path)
上面is_binary_file()函數也可以改成下面的方式:
def is_binary_file(file_path):
with open(file_path, 'rb') as file:
initial_bytes = file.read(8192)
file.close()
for bom in _TEXT_BOMS:
if initial_bytes.startswith(bom):
continue
else:
if b'\0' in initial_bytes:
return True
return False
(2)方法二:
使用magic
pip install python-magic
def getFileType(ff):
mime_kw = 'x-executable|x-sharedlib|octet-stream|x-object' ###可執行文件、鏈接庫、動態流、對象
try:
magic_mime = magic.from_file(ff, mime=True)
magic_hit = re.search(mime_kw, magic_mime, re.I)
if magic_hit:
return True
else:
return False
except Exception, e:
print e.message
較好的方法是對兩種類型同時進行處理:
# -*- coding:utf-8 -*-
# @Author:zgd
# @time:2019/6/21
# @File:operateSystem.py
import magic
import re
import codecs
def is_binary_file_1(ff):
'''
根據text文件數據類型判斷是否是二進制文件
:param ff: 文件名(含路徑)
:return: True或False,返回是否是二進制文件
'''
TEXT_BOMS = (
codecs.BOM_UTF16_BE,
codecs.BOM_UTF16_LE,
codecs.BOM_UTF32_BE,
codecs.BOM_UTF32_LE,
codecs.BOM_UTF8,
)
with open(file_path, 'rb') as file:
CHUNKSIZE = 8192
initial_bytes = file.read(CHUNKSIZE)
file.close()
#: BOMs to indicate that a file is a text file even if it contains zero bytes.
return not any(initial_bytes.startswith(bom) for bom in TEXT_BOMS) and b'\0' in initial_bytes
def is_binwary_file_2(ff):
'''
根據magic文件的魔術判斷是否是二進制文件
:param ff: 文件名(含路徑)
:return: True或False,返回是否是二進制文件
'''
mime_kw = 'x-executable|x-sharedlib|octet-stream|x-object' ###可執行文件、鏈接庫、動態流、對象
try:
magic_mime = magic.from_file(ff, mime=True)
magic_hit = re.search(mime_kw, magic_mime, re.I)
if magic_hit:
return True
else:
return False
except Exception, e:
return False
if __name__ == "__main__":
file_path = "/home/ubuntu/zgd/ztest/_gs418_510txp_v6.6.2.7.stk.extracted/D0"
print is_binary_file_1(file_path)
print is_binwary_file_2(file_path)
print any((is_binary_file_1(file_path), is_binwary_file_2(file_path)))
(3)方法三:
獲取ELF標識信息。
根據文件中是否有ELF頭進行判斷文件是否為二進制文件
# 判斷文件是否是elf文件
def is_ELFfile(filepath):
if not os.path.exists(filepath):
logger.info('file path {} doesnot exits'.format(filepath))
return False
# 文件可能被損壞,捕捉異常
try:
FileStates = os.stat(filepath)
FileMode = FileStates[stat.ST_MODE]
if not stat.S_ISREG(FileMode) or stat.S_ISLNK(FileMode): # 如果文件既不是普通文件也不是鏈接文件
return False
with open(filepath, 'rb') as f:
header = (bytearray(f.read(4))[1:4]).decode(encoding="utf-8")
# logger.info("header is {}".format(header))
if header in ["ELF"]:
# print header
return True
except UnicodeDecodeError as e:
# logger.info("is_ELFfile UnicodeDecodeError {}".format(filepath))
# logger.info(str(e))
pass
return False
參考鏈接:
https://code.lotpdf.com/code-examples.net/zh-CN/q/db66d.html
https://oomake.com/question/153346
http://blog.donews.com/limodou/archive/2004/08/30/83538.aspx
https://blog.csdn.net/xc_zhou/article/details/86544346
總結
以上是生活随笔為你收集整理的python比较两个二进制文件_python三种方法判断文件是否为二进制文件的全部內容,希望文章能夠幫你解決所遇到的問題。
 
                            
                        - 上一篇: python 3.6.0新语法_详解Py
- 下一篇: sum 去重_Excel函数,用到什么学
