正则表达式解析json对象的第一个匹配



我有很多具有不同结构的javascript代码,但在每个js代码中,有多个具有相似结构的json。

我只想解析那些键为"@context": "https://schema.org",

的对象我已经写了这个模式,但它与一些javascript代码匹配。我只想抓取json对象,仅此而已。有人能帮我吗?

({sS@context":s"https://schema.org",[sS]+)

示例代码:

);
jQuery(function ($) {
$('.cst_CERTIFIED_DEALER .cstBtn').append('<img src="//pictures.dealer.com/p/pellegrinoautosalesllc/0869/88d3d9a6608ebd537372b5db5fdde7b1x.jpg" alt="" />').css({'background':'none'});
});
jQuery(function ($) {
var $ddcValueStatementHeader = $('.value-statement-header [data-widget-id="template-header1"]');
if($ddcValueStatementHeader.length) {
$ddcValueStatementHeader.append($(".cst_CARFAX")).find(".cstBtn").css('z-index','950');
} else {
$('.header-default[data-widget-id="template-header1"]').append($(".cst_CARFAX")).find(".cstBtn").css('z-index','950');
}
$(".cst_CARFAX .cstBtn").css('cursor','auto');
});
jQuery(function ($) {
$('.cst_CARFAX .cstBtn').append('<img src="/sites/p/pellegrinoautosalesllc/images/carfax-logo.png" alt="CARFAX" />').css({'background':'none'});
});
jQuery(function ($) {
var $ddcValueStatementHeader = $('.value-statement-header [data-widget-id="template-header1"]');
if($ddcValueStatementHeader.length) {
$ddcValueStatementHeader.append($(".cst_EDMUNDS_AWARD")).find(".cstBtn").css('z-index','950');
} else {
$('.header-default[data-widget-id="template-header1"]').append($(".cst_EDMUNDS_AWARD")).find(".cstBtn").css('z-index','950');
}
$(".cst_EDMUNDS_AWARD .cstBtn").css('cursor','auto');
});
jQuery(function ($) {
$('.cst_EDMUNDS_AWARD .cstBtn').append('<img src="//pictures.dealer.com/p/pellegrinoautosalesllc/0002/6f1c3979ac0afac9b8d12ffb90e5af73x.jpg" alt="" />').css({'background':'none'});
});
var el = document.createElement('script');
el.type = 'application/ld+json';
el.id = 'ddc-schemaorg-integration';
tpsSchemaJson = ( typeof tpsSchemaJson != 'undefined' && tpsSchemaJson instanceof Array ) ? tpsSchemaJson : [];
// Car specific data
var vehicleSchema = {
"@context": "https://schema.org",
"@type": "Car",
"description": document.head.querySelector("[name=description]") ? document.head.querySelector("[name=description]").content : "",
"vehicleModelDate": "2015",
"manufacturer": "Ram",
"model": "1500",
"sku": "08c765ea0a0e0a922cefdf66496c54cd",
"bodyType": "Truck Crew Cab",
"itemCondition": "used",
"url": location.origin + location.pathname,
"vehicleIdentificationNumber": "3C6RR7LT1FG710130",
"fuelEfficiency": ["16","23"],
"driveWheelConfiguration":"4x4",
"vehicleEngine": "V-8 cyl",
"color": "Bright White",
"vehicleInteriorColor": "Diesel Gray/Black",
"fuelType": "Regular Unleaded",
"mileageFromOdometer": "60455",
"vehicleTransmission": "8 speed automatic",
"name": "Ram 1500 Truck Crew Cab",
"image": "https://images.dealer.com/autodata/us/large_stockphoto-color/2015/USC50RMT11CB0/PW7.jpg",
"offers": {
"@type": "Offer",
"priceCurrency": "USD",
"price": "31000.0",
"availability": "http://schema.org/InStock"
}
};
tpsSchemaJson.push(vehicleSchema);
var hours = "[09:00 to 7:00pm-${pmtime}=Tu Mo Th, 09:00 to 4:00pm-${pmtime}=Sa, 09:00 to 5:00pm-${pmtime}=Fr We]".trim().replace(/]/g," ").replace(/[[][]/g,"").replace(/=/g, " ").replace(/, /g, ",").trim().split(",");
var social = [];
var social = "https://www.facebook.com/PellegrinoAuto,https://www.youtube.com/channel/UCsVaRr3q6TVBeiIfgByQo7g".replace(/"/g, "").trim().split(",");
tpsSchemaJson = ( typeof tpsSchemaJson != 'undefined' && tpsSchemaJson instanceof Array ) ? tpsSchemaJson : [];
var autodealer = {
"@context" : "http://schema.org",
"@type" : "AutoDealer",
"openingHours" : hours,
"name" : "Pellegrino Auto Sales",
"url" : location.origin,
"address": {
"@type": "PostalAddress",
"addressLocality": "Batavia",
"addressRegion": "NY",
"postalCode": "14020",
"streetAddress": "4060 Pearl St Rd"
},
"image": "https://pictures.dealer.com/p/pellegrinoautosalesllc/1186/6c3181b62e95f47569cab0f5772980ddx.jpg",
"hasMap": "https://www.google.com/maps/place/Pellegrino+Auto+Sales/@42.9944888,-78.2148906,17z/data=!3m1!4b1!4m5!3m4!1s0x89d3edd202106ad7:0xf37ec17084302960!8m2!3d42.9944888!4d-78.2126966",
"description": "Used car dealership in Batavia, NY carries a wide variety of quality and affordable pre-owned vehicles from top makers like Chevrolet, Ford, Nissan, Toyota and more. Apply online for car loans or browse inventory now!",
"logo": "https://pictures.dealer.com/p/pellegrinoautosalesllc/1627/907d4e642e3374952183d6026dc0d492x.jpg",
"sameAs" : social,
"geo" : {
"@type" : "GeoCoordinates",
"latitude" : "42.994680",
"longitude" : "-78.212698"
},
"contactPoint": {
"@type": "ContactPoint",
"contactType": "Customer Service",
"telephone": "+15853442658"
},
"telephone": "+15853442658",
"priceRange": "Call for quote",
"areaServed": ["Batavia", " Le Roy", " Medina NY", " Buffalo", " Rochester", " NY"]
};
tpsSchemaJson.push(autodealer);
el.text = JSON.stringify(tpsSchemaJson);
console.log("DDC Schema.org code loaded.")
jQuery(function($) {
$('body').append(el);
});
$('[data-widget-id="template-header1"]').append($('.socialheader-header-container').removeClass('hidden').removeClass('hide'));
window.DDC = window.DDC || {};
DDC.dataLayer = (DDC.dataLayer || {});
DDC.dataLayer.site = (DDC.dataLayer.site || {});
DDC.dataLayer.site.siteInfo = (DDC.dataLayer.site.siteInfo || {});
DDC.dataLayer.site.siteInfo.vinLensAccountId = 19634;
window.DDC = window.DDC || {};
var trackerNames = [];
trackerNames.push('UA1436281301');
ga('create', {trackingId: 'UA-143628130-1', cookieDomain: 'auto', name: 'UA1436281301'});
ga(function() {
for (var i=0; i < trackerNames.length; ++i) {
var name = trackerNames[i];
ga(name+'.send', 'pageview');
}
});
if( (Math.random() * 100) < 5 ) {
DDC.getScripts({ js: ['/v9/media/js/web-vitals-tracking/google-analytics/index.js'] });
}
if (jQuery.cookie !== undefined && (!jQuery.cookie('ddc_abc_cache') || jQuery.cookie('ddc_abc_cache') === '[object Object]' || !jQuery.cookie('ddc_abcamm_cache'))) {
$(function() {
jQuery.ajax({
url: "//pixall.esm1.net/cookie",
xhrFields: {
withCredentials: true
},
success: function (data) {
var eoCookieExists = !!jQuery.cookie('ddc_abc_cache');
var adCookies = ['abc', 'abcc', 'abcamm', 'abcg'];
for (var i = adCookies.length - 1; i >= 0; i--) {
var cookie = adCookies[i];
if (typeof data[cookie] !== undefined) {
var expirationDays = (data[cookie] === "") ? 1 : 7;
jQuery.cookie('ddc_' + cookie + '_cache', data[cookie], { expires: expirationDays, path: '/' });
}
}
if (data['abc']) {
sessionStorage.setItem("pixallCookieIsSet", true);
}
if (!eoCookieExists && jQuery.cookie('ddc_abc_cache')) {
window.DDC = window.DDC || {};
window.DDC.tracking = window.DDC.tracking || {};
window.DDC.tracking.ddc_abc_cache = data['abc'];
jQuery.publish('ddc-eo-cookies-set');
}
}
});
});
}
window.DDC = window.DDC || {};
window.DDC.i18n = window.DDC.i18n || {};
window.DDC.i18n.labels = Object.assign(window.DDC.i18n.labels || {}, {
'NO': 'No',
'OOPS_EXCLAMATION_YOU_MISSED_THIS_ONE': 'Oopsx21x20Youx20missedx20thisx20one.',
'PLEASE_CORRECT_THIS_VALUE': 'Pleasex20correctx20thisx20value.',
'PLEASE_ENTER_A_NUMERIC_VALUE': 'Pleasex20enterx20ax20numericx20value.',
'PLEASE_ENTER_A_VALID_EMAIL_ADDRESS': 'Pleasex20enterx20ax20validx20emailx20address.',
'PLEASE_ENTER_A_VALID_URL': 'Pleasex20enterx20ax20validx20URL.',
'PLEASE_ENTER_A_VALUE_LARGER_THAN_ONE_DOLLAR': 'Pleasex20enterx20ax20valuex20largerx20thanx20x241.',
'PLEASE_ENTER_A_VALUE_SMALLER_THAN_ONE_DOLLAR': 'Pleasex20enterx20ax20valuex20smallerx20thanx20x241.',
'YOU_HAVE_XX_INVALID_ENTRIES_IN_THE_FORM': 'Youx20havex20XXx20invalidx20entriesx20inx20thex20form',
'TCPA_CONSENT_ERROR_VERBIAGE': 'Yourx20consentx20isx20requiredx20tox20completex20thisx20action.x20Ifx20youx20choosex20tox20notx20optx2Din,x20pleasex20selectx20ax20differentx20contactx20method.x20',
'VIDEOPLAYER_CAPTIONS_TITLE': 'Captions',
'VIDEOPLAYER_CAPTIONS_ON': 'On',
'VIDEOPLAYER_CAPTIONS_OFF': 'Off',
'VIDEOPLAYER_FULLSCREEN': 'Fullscreen',
'VIDEOPLAYER_QUALITY_TITLE': 'Quality',
'VIDEOPLAYER_QUALITY_HIGH': 'High',
'VIDEOPLAYER_QUALITY_LOW': 'Low',
'INVALID_DATE': 'Invalidx20date'
});
};
jQuery(scripts);
/*]]>*/
</script>
<div data-location="page-fo

它应该像这样匹配所有的对象:

{
"@context": "https://schema.org",
"@type": "Car",
"description": document.head.querySelector("[name=description]") ? document.head.querySelector("[name=description]").content : "",
"vehicleModelDate": "2015",
"manufacturer": "Ram",
"model": "1500",
"sku": "08c765ea0a0e0a922cefdf66496c54cd",
"bodyType": "Truck Crew Cab",
"itemCondition": "used",
"url": location.origin + location.pathname,
"vehicleIdentificationNumber": "3C6RR7LT1FG710130",
"fuelEfficiency": ["16","23"],
"driveWheelConfiguration":"4x4",
"vehicleEngine": "V-8 cyl",
"color": "Bright White",
"vehicleInteriorColor": "Diesel Gray/Black",
"fuelType": "Regular Unleaded",
"mileageFromOdometer": "60455",
"vehicleTransmission": "8 speed automatic",
"name": "Ram 1500 Truck Crew Cab",
"image": "https://images.dealer.com/autodata/us/large_stockphoto-color/2015/USC50RMT11CB0/PW7.jpg",
"offers": {
"@type": "Offer",
"priceCurrency": "USD",
"price": "31000.0",
"availability": "http://schema.org/InStock"
}
};

将有多个对象与相同的字符串,我想匹配所有的对象,有人可以帮助吗?

与大多数正则表达式问题一样,您必须进行一些猜测(尽管是有根据的)。假设:

  1. json对象以分号
  2. 结尾
  3. 中间没有悬空的分号(几乎永远不会出现这种情况)

这应该可以工作:

r".*({.*?['"]@context['"]s*:s*['"]https://schema.org['"].*?}(?=[nrs]*;s*[nr])).*"

用法如下:

# src = the entire input
regex = r".*({.*?['"]@context['"]s*:s*['"]https://schema.org['"].*?}(?=[nrs]*;s*[nr])).*"
print(re.findall(regex, src, flags=re.DOTALL)[0])

这给了我:

{
"@context": "https://schema.org",
"@type": "Car",
"description": document.head.querySelector("[name=description]") ? document.head.querySelector("[name=description]").content : "",     
"vehicleModelDate": "2015",
"manufacturer": "Ram",
"model": "1500",
"sku": "08c765ea0a0e0a922cefdf66496c54cd",
"bodyType": "Truck Crew Cab",
"itemCondition": "used",
"url": location.origin + location.pathname,
"vehicleIdentificationNumber": "3C6RR7LT1FG710130",
"fuelEfficiency": ["16","23"],
"driveWheelConfiguration":"4x4",
"vehicleEngine": "V-8 cyl",
"color": "Bright White",
"vehicleInteriorColor": "Diesel Gray/Black",
"fuelType": "Regular Unleaded",
"mileageFromOdometer": "60455",
"vehicleTransmission": "8 speed automatic",
"name": "Ram 1500 Truck Crew Cab",
"image": "https://images.dealer.com/autodata/us/large_stockphoto-color/2015/USC50RMT11CB0/PW7.jpg",
"offers": {
"@type": "Offer",
"priceCurrency": "USD",
"price": "31000.0",
"availability": "http://schema.org/InStock"
}
}
<标题>

由于逐个解释这个标记太复杂而且实际上没有帮助,所以我将在这里解释重要的部分:

['"]负责使用任何类型的引号
(?=[nrs]*;s*[nr])确保对象后跟一定数量的换行或空格,然后是分号,然后在它之后换行(基本上,它应该是JS表达式的结束)
re.DOTALL确保点操作符匹配所有内容,包括换行

最新更新