Python csv 导出包括二进制标志(如何删除?



我试图将测试表导出到 csv...下面的代码有效.. 但是,当我打开 test1.csv 文件时,有些行有"b 标志(看起来像二进制标志( 即使我删除了编码('utf8'(,仍然会得到 b 标志。 如何删除这些 b 标志并拥有一个干净的 csv 文件?

这是整个代码:

from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import csv
my_url = 'http://www.igobychad.com/test_table.html'
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
page_soup.find("table", { "id" : "Emp_sum" })
table = page_soup.find("table", { "id" : "Emp_sum" })
for row in table.findAll("tr"):
   cells = row.findAll("td")
headers = [header.text for header in table.find_all('th')]
rows = []
for row in table.find_all('tr'):
    rows.append([val.text.encode('utf8') for val in row.find_all('td')])
with open('test1.csv', 'w') as f:
       writer = csv.writer(f)
       writer.writerow(headers)
       writer.writerows(row for row in rows if row)

结果如下所示:

Category,June2016,Apr.2017,May2017,June2017,Change from:May2017-June2017,Estatus,CN pop,Clf,Prate,Em,Ep ratio,Unem,Un rate
b''
"b'253,397'","b'254,588'","b'254,767'","b'254,957'",b'190'
"b'158,889'","b'160,213'","b'159,784'","b'160,145'",b'361'

我冒昧地对您的代码进行了一些更改,希望适合 csv 输出中的表格格式。由于 csvwriter 只能编写一行数据,因此我不得不四处寻找以匹配 csv 中的表格格式。它不完整,但您可以修复它以适合您的格式。

from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import csv
my_url = 'http://www.igobychad.com/test_table.html'
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
page_soup.find("table", { "id" : "Emp_sum" })
table = page_soup.find("table", { "id" : "Emp_sum" })
for row in table.findAll("tr"):
cells = row.findAll("td")
headers = [header.text for header in table.find_all('th')]
rows = []
for row in table.find_all('tr'):
rows.append([val.text for val in row.find_all('td') if val])
with open('test1.csv', 'w') as f:
writer = csv.writer(f)
# Your table headers are only the first 6 elements of "headers" so we write them
writer.writerow(headers[:6])
# Next will will have to compose the row to write to the csv
index = 1
for txt in headers[7:]:
index += 1
# Every element in headers after the 6'th are actually row start
# So we add it to an empty list called "string" (bad name, you can change it)
string = [txt]
# We extend the list with the list of values corresponding to the list index taken from the rows list
# To get float values we replace coma with dot from the rows string then cast to float
string.extend([float(x.replace(',', '.')) for x in rows[index]])
writer.writerow(string)

最新更新