**我试图解析这个XML链接,但每当我试图通过使用geroot()函数获得根时,它说'NoneType'对象不可调用。我还想知道如何遍历这个XML并打印entry标记内的所有字符串标记。* *
import pandas as pd
import urllib.request
from urllib.request import urlopen
import json
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import requests
url='https://www.omicsdi.org/ws/dataset/pride/PXD002885?debug=false' // this is the XML url you can refer to
print(url)
response =requests.get(url) //getting some output as: b'{"database":"Pride","file_versions":[{"files":{"Mgf":["ftp://ftp.pride.ebi.ac.uk/pride/da...........
soup = BeautifulSoup(response.content, 'lxml')
print(soup) //getting output as: <html><body><p>{"database":"Pride","file_versions":[{"files"................
myroot = soup.getroot() // getting error: 'NoneType' object is not callable
b'{"database":"Pride"...
是JSON,不是XML,所以你不需要beautifulsoup。
response = requests.get(url)
data = response.json()
print(data["database"]) # 'Pride'
for fv in data["file_versions"]:
print(fv["files"])