我想删除dygraph函数的参数(主要是日期的长行),因为它们是图上的点。到目前为止,我正在刮擦其他类型的标签,这些标签通过使用Findall函数很容易获得,但是,我似乎需要更深入地研究这个问题。
<script type="text/javascript">
g = new Dygraph(
// containing div
document.getElementById('DailySubscribers'),
// CSV or path to a CSV file.
"Date,Daily Subsn" + "2016-07-31,1n" + "2016-08-01,1n" + "2016-08-02,0n" + "2016-08-03,1n" + "2016-08-04,0n" + "2016-08-05,2n" + "2016-08-06,10n" + "2016-08-07,5n" + "2016-08-08,1n" + "2016-08-09,1n" + "2016-08-10,2n" + "2016-08-11,0n" + "2016-08-12,0n" + "2016-08-13,0n" + "2016-08-14,0n" + "2016-08-15,1n" + "2016-08-16,1n" + "2016-08-17,0n" + "2016-08-18,0n" + "2016-08-19,1n" + "2016-08-20,0n" + "2016-08-21,1n" + "2016-08-22,0n" + "2016-08-23,0n" + "2016-08-24,7n" + "2016-08-25,2n" + "2016-08-26,0n" + "2016-08-27,1n" + "2016-08-28,1n" + "2016-08-29,0n" + "2016-08-30,0n" + "2016-08-31,0n" + "2016-09-01,0n" + "2016-09-02,0n" + "2016-09-03,0n" + "2016-09-04,0n" + "2016-09-05,1n" + "2016-09-06,0n" + "2016-09-07,0n" + "2016-09-08,0n", {
title: 'Daily Subs Gained for UCZx2vmIsQQLwzqwGWUbfqQA ',
legend: 'always',
ylabel: 'Daily Subs',
titleHeight: 20,
labelsDivStyles: {
'background': 'none',
'margin-top': '-10px',
'text-align': 'right',
},
strokeWidth: 1,
colors: ["#dd2323",
"#dd2323",
"#dd2323",
"#dd2323"],
labelsKMB: true,
maxNumberWidth: 10
}
);
</script>
这是解决它的快速方法(bruteforce但工作)
bs = BeautifulSoup(data, 'html.parser')
print(bs)
values = (str(bs).split('"Date,Daily Subs\n" +')[1].split(', {')[0].replace('\n" + "', " ").replace('\n', " ").replace(""", "").split(" "))[1:-1]
print(values)
输出:
<script type="text/javascript">g = new Dygraph(// containing divdocument.getElementById('DailySubscribers'),// CSV or path to a CSV file."Date,Daily Subsn" + "2016-07-31,1n" + "2016-08-01,1n" + "2016-08-02,0n" + "2016-08-03,1n" + "2016-08-04,0n" + "2016-08-05,2n" + "2016-08-06,10n" + "2016-08-07,5n" + "2016-08-08,1n" + "2016-08-09,1n" + "2016-08-10,2n" + "2016-08-11,0n" + "2016-08-12,0n" + "2016-08-13,0n" + "2016-08-14,0n" + "2016-08-15,1n" + "2016-08-16,1n" + "2016-08-17,0n" + "2016-08-18,0n" + "2016-08-19,1n" + "2016-08-20,0n" + "2016-08-21,1n" + "2016-08-22,0n" + "2016-08-23,0n" + "2016-08-24,7n" + "2016-08-25,2n" + "2016-08-26,0n" + "2016-08-27,1n" + "2016-08-28,1n" + "2016-08-29,0n" + "2016-08-30,0n" + "2016-08-31,0n" + "2016-09-01,0n" + "2016-09-02,0n" + "2016-09-03,0n" + "2016-09-04,0n" + "2016-09-05,1n" + "2016-09-06,0n" + "2016-09-07,0n" + "2016-09-08,0n", { title: 'Daily Subs Gained for UCZx2vmIsQQLwzqwGWUbfqQA ', legend: 'always', ylabel: 'Daily Subs', titleHeight: 20, labelsDivStyles: { 'background': 'none', 'margin-top': '-10px', 'text-align': 'right', }, strokeWidth: 1, colors: ["#dd2323", "#dd2323", "#dd2323", "#dd2323"], labelsKMB: true, maxNumberWidth: 10 });</script>
['2016-07-31,1', '2016-08-01,1', '2016-08-02,0', '2016-08-03,1', '2016-08-04,0', '2016-08-05,2', '2016-08-06,10', '2016-08-07,5', '2016-08-08,1', '2016-08-09,1', '2016-08-10,2', '2016-08-11,0', '2016-08-12,0', '2016-08-13,0', '2016-08-14,0', '2016-08-15,1', '2016-08-16,1', '2016-08-17,0', '2016-08-18,0', '2016-08-19,1', '2016-08-20,0', '2016-08-21,1', '2016-08-22,0', '2016-08-23,0', '2016-08-24,7', '2016-08-25,2', '2016-08-26,0', '2016-08-27,1', '2016-08-28,1', '2016-08-29,0', '2016-08-30,0', '2016-08-31,0', '2016-09-01,0', '2016-09-02,0', '2016-09-03,0', '2016-09-04,0', '2016-09-05,1', '2016-09-06,0', '2016-09-07,0', '2016-09-08,0']