我正试图为从setlist中刮出的每一组show (set1, set2, encore)制作另一组键值对。而不仅仅是FM的歌曲列表没有分离。我弄不明白的是如何访问显示节目集的元素,然后在它后面附加歌曲列表,直到它到达下一集。这是我正在访问的html:HTML代码从setlist fm
目前,我的JSON文件看起来像这样:
"‘{
"artist": "Sample Artist",
"day": 20,
"month": 1,
"songs": ["Song A","Song B","Song C"
],
"tour": "2000 U.S. Tour",
"venue": "Sample Venue, Atlanta, GA, USA",
"year": 2000
},`
而我想让它看起来像这样:
"artist": "Sample Artist",
"day": 20,
"month": 1,
"songs": ["Song A","Song B","Song C"
],
"set1": ["Song A"],
"set2": ["Song B"],
"encore":["Song C"],
"tour": "2000 U.S. Tour",
"venue": "Sample Venue, Atlanta, GA, USA",
"year": 2000
},`
这是我用来生成JSON的歌曲列表的代码,但我不确定如何单独获得集合:
def getConcertData(i, url, concerts):
try:
soup = getSoup(url)
dateBlock = soup.find_all("div", {"class": "dateBlock"})[0]
infoContainer = soup.find_all("div", {"class": "infoContainer"})[0]
headLineDiv = infoContainer.find_all("div", {"class": "setlistHeadline"})[0]
setlistDiv = soup.find_all("div", {"class": "setlistList"})[0]
#removed unrelated code for question
songs = []
for a in setlistDiv.find_all("a", {"class": "songLabel"}):
songs.append(a.getText().strip())
print(str(year)+"."+str(month).zfill(2)+"."+str(day).zfill(2)+": "+venue)
data = dict()
data["artist"] = artist
data["year"] = year
data["month"] = month
data["day"] = day
data["venue"] = venue
data["tour"] = tour
data["songs"] = songs
# data["set1"] = 0
# data["set2"] = 0
# data["encore"] = 0
concerts[i] = data
如果我没理解错的话,你是想"分组"各段歌曲:
import requests
from bs4 import BeautifulSoup
url = "https://www.setlist.fm/setlist/phish/2022/ruoff-home-mortgage-music-center-noblesville-in-3b4e5a7.html"
soup = BeautifulSoup(requests.get(url).content, "html.parser")
out = {}
out["artist"] = soup.h1.a.get_text(strip=True)
out["month"] = soup.select_one(".month").text
out["day"] = soup.select_one(".day").text
out["year"] = soup.select_one(".year").text
out["venue"] = soup.select_one('a[href*="/venue/"]').text
for li in soup.select(".setlistList li.song"):
song_name = li.a.get_text(strip=True)
section = (
li.find_previous("li", class_="highlight")
.get_text(strip=True)
.strip(" :")
)
out.setdefault("songs", []).append(song_name)
out.setdefault(section, []).append(song_name)
print(out)
打印:
{
"artist": "Phish",
"month": "Jun",
"day": "5",
"year": "2022",
"venue": "Ruoff Home Mortgage Music Center, Noblesville, IN, USA",
"songs": [
"While My Guitar Gently Weeps",
"My Soul",
"Rift",
"Horn",
"Wombat",
"Evolve",
"Guyute",
"Limb by Limb",
"Mercury",
"The Moma Dance",
"Sand",
"Sigma Oasis",
"Twenty Years Later",
"The Mango Song",
"Rise/Come Together",
"Free",
"Grind",
"Slave to the Traffic Light",
],
"Set 1": [
"While My Guitar Gently Weeps",
"My Soul",
"Rift",
"Horn",
"Wombat",
"Evolve",
"Guyute",
"Limb by Limb",
"Mercury",
"The Moma Dance",
],
"Set 2": [
"Sand",
"Sigma Oasis",
"Twenty Years Later",
"The Mango Song",
"Rise/Come Together",
"Free",
],
"Encore": ["Grind", "Slave to the Traffic Light"],
}