Python脚本,以突出显示两个CSV文件中的差异(而不是添加)



我有一个基本脚本,可以比较两个CSV文件,并将更改记录在一个新文件中(daily-diff.CSV(:

CSV(导入1(:

价格
地址
101 Main Street 50000英镑
102 Main Street 100000英镑

这是我发现在数据库中最有效/最简单的方法。使用Python内置数据库sqlite3

cat import_1.csv 
Address,Price
101 Main Street,"£50,000"
102 Main Street,"£100,000"
cat import_2.csv 
Address,Price
101 Main Street,"£55,000"
102 Main Street,"£100,000"
103 Main Street,"£200,000"

import csv
import sqlite3
con_sqlite = sqlite3.connect(":memory:")
cur_sqlite = con_sqlite.cursor()
cur_sqlite.execute("create table import_1 (address varchar, price varchar)")
cur_sqlite.execute("create table import_2 (address varchar, price varchar)")
with open("import_1.csv") as csv_1:
reader = csv.reader(csv_1)
next(reader)
for row in reader:
cur_sqlite.execute("insert into import_1 values(?, ?)", row)

with open("import_2.csv") as csv_2:
reader = csv.reader(csv_2)
next(reader)
for row in reader:
cur_sqlite.execute("insert into import_2 values(?, ?)", row)
with open('daily-diff.csv', 'w') as out_file:
cur_sqlite.execute("select i1.address, i1.price AS old_price, i2.price AS new_price from import_1 AS i1 join import_2 AS i2 on i1.address = i2.address where i1.price != i2.price")
diff_list = cur_sqlite.fetchall()
writer = csv.writer(out_file)
writer.writerow(["Address", "Old Price", "New Price"])
writer.writerows(diff_list)
cat daily-diff.csv 
Address,Old Price,New Price
101 Main Street,"£50,000","£55,000"

下面是一个如何使用标准库工具实现这一点的工作示例:

import csv
from pathlib import Path
from typing import Any, TypeAlias

AnyDict: TypeAlias = dict[str, Any]
FIRST_COLUMN_NAME = "Address"

def load_dict_from_csv(file_path: Path, **reader_kwargs: Any) -> AnyDict:
"""
Reads CSV file and returns contents as a dictionary.
Keys are the contents of the first column,
values are lists of contents in the remaining columns.
"""
with file_path.open("r") as file:
reader = csv.reader(file, **reader_kwargs)
return {row[0]: row[1:] for row in reader}

def dict_intersect_diff(dict_1: AnyDict, dict_2: AnyDict) -> AnyDict:
"""
Returns the difference of two dictionaries as a new dictionary.
Only keeps identical keys. (Discards keys not present in both.)
"""
common_keys = tuple(key for key in dict_1.keys() if key in dict_2.keys())
return {
key: (dict_1[key], dict_2[key])
for key in common_keys
if dict_1[key] != dict_2[key]
}

def write_dict_diff_to_csv(
dict_diff: AnyDict,
title_row: list[str],
file_path: Path,
**writer_kwargs: Any,
) -> None:
"""
Writes a "difference dictionary" to a csv file.

Creates two columns for each column in the title row,
one for the "old" values and one for the "new" values.
"""
diff_title = [title_row[0]]
for col in title_row[1:]:
diff_title.append(f"{col} old")
diff_title.append(f"{col} new")
with file_path.open("w") as file:
writer = csv.writer(file, **writer_kwargs)
writer.writerow(diff_title)
for key, (values_old, values_new) in dict_diff.items():
writer.writerow([key] + values_old + values_new)

def main() -> None:
file_path_old = Path("01.csv")
file_path_new = Path("02.csv")
file_path_diff = Path("diff.csv")
dict_old = load_dict_from_csv(file_path_old)
dict_new = load_dict_from_csv(file_path_new)
title_row = [FIRST_COLUMN_NAME] + dict_old[FIRST_COLUMN_NAME]
dict_diff = dict_intersect_diff(dict_old, dict_new)
write_dict_diff_to_csv(dict_diff, title_row, file_path_diff)

if __name__ == '__main__':
main()

将其与示例CSV输入文件一起运行会得到所需的结果。

注意:这假设第一列具有唯一的值(即,在一个CSV文件中没有两行具有相同的"地址"。

这也适用于附加列(除了"价格"(。


如果你要这样做的话,可能建议使用特定的工具来达到Pandas这样的目的(如评论中所建议的(;"按比例";。但是对于这个简单的用例,类似上面的脚本应该是可以的。

希望这能有所帮助。

相关内容

  • 没有找到相关文章

最新更新