用SAS分析Json文件



我第一次尝试使用SAS解析Json文件。我在互联网上找到了一些脚本,并在SAS上尝试了这个CODE,但无法在我的JSON数据文件上运行。

data parsed; 
infile data lrecl=32000 truncover scanover dlm=",";
input @'"driver_id":{' driver_id $255.
  @'"publication_site_id":{' publication_site_id $255.
  @'"confirmed_seat_count":{' confirmed_seat_count $255.
  @'"published_date":{' published_date $255.
  @'"trip_date":{' trip_date $255.
  @'"first_publication_date":{' first_publication_date $255.
  @'"profile_picture_added_date":{' profile_picture_added_date $255.;
driver_id=scan(driver_id, 1, '",');
published_date=scan(published_date, 1, '",');   
trip_date   =scan(trip_date, 1, '",');
publication_site_id=scan(publication_site_id, 1, '",');
confirmed_seat_count    =scan(confirmed_seat_count, 1, '",');
first_publication_date  =scan(first_publication_date, 1, '",');
profile_picture_added_date  =scan(profile_picture_added_date, 1, '",');
run;

这是我的Json文件的概述

{
"driver_id": {"0":11450650,"1":4705453,"2":18873210,"3":4892472,"4":47572650,"5":5242675}, 
"publication_site_id":{"0":1056,"1":1056,"2":1056,"3":5056,"4":1056,"5":1056},
"confirmed_seat_count":{"0":2.0,"1":0.0,"2":1.0,"3":0.0,"4":0.0,"5":0.0},
"published_date":{"0":"2015-06-16 00:00:00","1":"2015-06-16 00:00:00","2":"2015-06-16 00:00:00","3":"2015-06-16 00:00:00","4":"2015-06-16 00:00:00","5":"2015-06-16 00:00:00"},
"trip_date":{"0":"2015-06-21 19:00:00","1":"2015-06-21 12:00:00","2":"2015-06-19 17:00:00","3":"2015-06-17 15:30:23","4":"2015-06-21 15:00:00","5":"2015-06-21 17:00:00"},
"first_publication_date":{"0":"2015-01-10 00:00:00","1":"2014-01-25 00:00:00","2":"2014-12-22 00:00:00","3":"2014-10-22 00:00:00","4":"2015-06-16 00:00:00","5":"2014-11-20 00:00:00"},
"profile_picture_added_date":{"0":null,"1":null,"2":"2015-09-22 00:00:00","3":"2013-11-19 00:00:00","4":"2015-06-17 00:00:00","5":null}
}

我非常感谢你的帮助。

问候SM

这应该会对您有所帮助:

options noquotelenmax;
filename data temp lrecl=32000;
data _null_;
file data;
put '{
"driver_id": {"0":11450650,"1":4705453,"2":18873210,"3":4892472,"4":47572650,"5":5242675}, 
"publication_site_id":{"0":1056,"1":1056,"2":1056,"3":5056,"4":1056,"5":1056},
"confirmed_seat_count":{"0":2.0,"1":0.0,"2":1.0,"3":0.0,"4":0.0,"5":0.0},
"published_date":{"0":"2015-06-16 00:00:00","1":"2015-06-16 00:00:00","2":"2015-06-16 00:00:00","3":"2015-06-16 00:00:00","4":"2015-06-16 00:00:00","5":"2015-06-16 00:00:00"},
"trip_date":{"0":"2015-06-21 19:00:00","1":"2015-06-21 12:00:00","2":"2015-06-19 17:00:00","3":"2015-06-17 15:30:23","4":"2015-06-21 15:00:00","5":"2015-06-21 17:00:00"},
"first_publication_date":{"0":"2015-01-10 00:00:00","1":"2014-01-25 00:00:00","2":"2014-12-22 00:00:00","3":"2014-10-22 00:00:00","4":"2015-06-16 00:00:00","5":"2014-11-20 00:00:00"},
"profile_picture_added_date":{"0":null,"1":null,"2":"2015-09-22 00:00:00","3":"2013-11-19 00:00:00","4":"2015-06-17 00:00:00","5":null}
}';
run;
data parsed; 
infile data lrecl=32000 truncover scanover dlm='}' ;
input @'"driver_id": {' driver_id: $255.
  @'"publication_site_id":{' publication_site_id: $255.
  @'"confirmed_seat_count":{' confirmed_seat_count: $255.
  @'"published_date":{' published_date: $255.
  @'"trip_date":{' trip_date: $255.
  @'"first_publication_date":{' first_publication_date: $255.
  @'"profile_picture_added_date":{' profile_picture_added_date: $255.@@;
run;

我更新了delimeter(允许您保留内部对象,这些对象本身是定界的),添加了冒号输入修饰符(允许您通过说"从下一个非空白列开始读取"来读取非标准值),并使用@@trailing列指针来保存记录,以输出后续观察结果(您的json很可能会作为一个长流到达)。哦,我还为@'"driver_id": {'添加了一个空格——这是打字错误吗?

我省略了扫描部分,以为你已经控制住了!

这不是我最好的作品,但它会给你带来你想要的!。需要重新组织列和一些dq,但您的JSON将看起来像SAS数据集。我建议您查看python pandas包。使用pandas,您可以用一行代码对其进行排序。

data parsed; 
infile "/your_path/test.json" lrecl=32500 truncover scanover dlm="," firstobs=2;
input   @'"driver_id"' _driver_id $255.
        @'"publication_site_id"' _publication_site_id $255.
        @'"confirmed_seat_count"' _confirmed_seat_count $255.
        @'"published_date"' _published_date $255.
        @'"trip_date"' _trip_date $255.
        @'"first_publication_date"' _first_publication_date $255.
        @'"profile_picture_added_date"' _profile_picture_added_date $255.;
array v{*} _:;
do i=1 to dim(v);
    v[i]=prxchange('s/"d+"//',-1,v[i]);
    v[i]=compress(v[i],'{}"');
    v[i]=substr(v[i],3); 
end;
run;
proc transpose data=parsed out=t_parsed ;
var _:;
run;
data want(drop=i);
set t_parsed;
array split(6) $50.;
do i=1 to dim(split);
 split[i] = scan(col1,i,',');
end;
run;
proc sort data=want; by _name_;run;
proc transpose data=want out=want_t(drop=_name_);
var split:;
id _name_;
run;

最新更新