我必须使用一个文本文件,提取最频繁的ip地址,并计算它们出现的次数
def anaylse_log(parameter):
myfile = open("sample_log_1 test.txt", "r")
iPdata = myfile.readlines()
mydict = {}
ipAddress = []
item_list = []
result_file = []
counter = ()
def extract_log(myfile):
#split the file line by line
for line in myfile:
splitData = line.split()
ipAddress = splitData[0]
numbers = splitData[1]
ipAddress.append(ipAddress)
numbers.append(numbers)
if numbers in mydict:
#if numbers is already a key in the dictionary
#increase the count
mydict[numbers] += 1
else:
# Otherwise if it's not yet in the dictionary
# Initialise it to 1
mydict[numbers] = 1
return numbers
myfile.close()
def find_most_frequent(maximum,iPdata):
with open("sample_log_1 text", "r") as myfile:
for text in myfile:
if str(maximum) in text:
return maximum
with open("resultss.csv", "w") as file:
file.write(maximum(maximum))
#This will put the dictionary into tuples and give each key a value
item_list = [(k, v) for k, v in mydict.items()]
#This will sort the list by v
item_list.sort(key=lambda x:x[1], reverse=True)
maximum = mydict()
def main(myfile,mydict,iPdata):
result_file = open("resultss.csv", "w")
main()
我不得不修改要编辑的代码的间距,我希望这是可以的,你可以运行它,我已经坚持了一段时间,我认为我调用的函数太了
假设您的日志文件类似
15.25.7.3
25.25.2.5
25.25.2.5
115.25.7.3
215.25.7.3
25.25.2.5
这里有一个简单的方法来计算ips
ip_count_dict = {}
with open('ip.log', 'r') as f:
ip_file = f.read()
# if separated by coma
# ip_list = ip_file.split(',')
# if separated by n new line
ip_list = ip_file.splitlines()
for ip in ip_list:
ip = ip.strip()
if ip in ip_count_dict:
ip_count_dict[ip] += 1
else:
ip_count_dict[ip] = 1
print(ip_count_dict)
输出:{'15.25.7.3':1,'25.25.2.5':3,'115.25.7.3':1,'215.25.7.3':1}
与其在日志循环时手动计数IP,不如尝试以下操作:
from collections import Counter
log_entries = open("resultss.csv").read().split("n")
ip_list = [log.split(",")[0] for log in log_entries]
counts = Counter(ip_list)
print(counts)
这适用于CSV文件格式,如:
10.10.10.1,asdf,31
5.9.7.11,aajbczxz,54
5.9.7.11,zzzzz,2