我不知道如何正确查找(SAS)



在Base SAS中,我有一个带有哈希对象的脚本来执行表查找。情况如下。表A是原始的主表,它将使用表B进行查找。查找键为AssetName and Voltage(资产名称和电压(。电压始终为33或11。

因此,现在我们可以想象,通过使用AssetName和Voltage从表B中获取一些数据,这是表A与表B的查找。

让我们来看看我的示例代码。

data ncpdm.ncp_load_re (drop=excp_code re_state re_supply_zone)
work.excp_ncp_load_re;
length excp_code $50 re_state re_supply_zone $30;
if _n_=1 then do;
declare hash pmu_list(dataset:"ncpdm.ncp_asset_pmu");
pmu_list.definekey('assetname','voltage');
pmu_list.definedata('region','zone','state_code','state',
'business_area_code','business_area',
'supply_zone_code','supply_zone',
'sub_supply_zone_code','sub_supply_zone',
'pmu_name','substation_name_tnbt','functional_location');
pmu_list.definedone();
call missing(region,zone,state_code,state,
business_area_code,business_area,
supply_zone_code,supply_zone,
sub_supply_zone_code,sub_supply_zone,
pmu_name,substation_name_tnbt,functional_location);
end;
set asset_re (RENAME=(pmu=assetname voltage=voltage_));
data_dttm=datetime();
voltage_=strip(voltage_);
voltage=cats('132/',voltage_);
mnemonic_tnbt=strip(mnemonic_tnbt);
assetname=mnemonic_tnbt;

rc=pmu_list.find();
if (rc^=0) then do;
excp_code='Exception: Mnemonic_tnbt and Voltage not mapped to PMU master list';
output work.excp_ncp_load_re;
end;
else do;
output ncpdm.ncp_load_re;
end;
keep mnemonic_tnbt excp_code re_state re_supply_zone
region zone state_code state business_area_code business_area
supply_zone_code supply_zone sub_supply_zone_code sub_supply_zone
pmu_name substation_name_tnbt functional_location voltage
re_state
re_station
re_ca_no
re_customer_name
re_capacity
re_commission_date
re_technology
pmu
ppu
ssu_pe
re_switch_no
voltage
period
data_dttm
active_flag                    
program             
scod_date               
kick_off_date       
iom_date
geo_longitude
geo_latitude;
run;

从上面的代码中,我将那些无法映射/查找到输出的设置为excp表。然后,我使用相同的散列对象代码,但使用excp作为数据源,再次查找相同的表,代码如下。(i将电压更改为33或11(与现有电压相反(。

/*2nd round lookup for failed record*/
data ncpdm.ncp_load_rev2 (drop=excp_code re_state re_supply_zone)
work.excp_ncp_load_re (drop=excp_code re_state re_supply_zone);
length excp_code $50 re_state re_supply_zone $30;
if _n_=1 then do;
declare hash pmu_list(dataset:"ncpdm.ncp_asset_pmu");
pmu_list.definekey('assetname','voltage');
pmu_list.definedata('region','zone','state_code','state',
'business_area_code','business_area',
'supply_zone_code','supply_zone',
'sub_supply_zone_code','sub_supply_zone',
'pmu_name','substation_name_tnbt','functional_location');
pmu_list.definedone();
call missing(region,zone,state_code,state,
business_area_code,business_area,
supply_zone_code,supply_zone,
sub_supply_zone_code,sub_supply_zone,
pmu_name,substation_name_tnbt,functional_location);
end;
set work.excp_ncp_load_re;
data_dttm=datetime();
if voltage='132/11' then voltage = '132/33';
else if voltage='132/33' then voltage='132/11';
mnemonic_tnbt=strip(mnemonic_tnbt);
assetname=mnemonic_tnbt;
re_state=state;
re_station=station;
re_ca_no=ca_no;
re_customer_name=applicant_name;
re_capacity=capacity;
re_commission_date=commission_date;
re_technology=technology;
geo_latitude=lat;
geo_longitude=lng;
rc=pmu_list.find();
if (rc^=0) then do;
excp_code='Exception: Mnemonic_tnbt and Voltage not mapped to PMU master list';
output work.excp_ncp_load_re;
end;
else do;
output ncpdm.ncp_load_rev2;
end;
keep mnemonic_tnbt excp_code re_state re_supply_zone
region zone state_code state business_area_code business_area
supply_zone_code supply_zone sub_supply_zone_code sub_supply_zone
pmu_name substation_name_tnbt functional_location voltage
re_state
re_station
re_ca_no
re_customer_name
re_capacity
re_commission_date
re_technology
pmu
ppu
ssu_pe
re_switch_no
voltage
period
data_dttm
active_flag                    
program             
scod_date               
kick_off_date       
iom_date
geo_longitude
geo_latitude;
run;

问题是,对于那些在我的第一个哈希对象中没有匹配电压的对象,我设法在第二个哈希对象代码中查找,但我仍然得到未映射的记录。一旦我附加了从第一个散列对象和第二个散列对象生成的表,我仍然会得到比我想要的结果更少的记录。

我想不出如何运用更好的逻辑。不知怎的,我觉得我使用第二个散列对象查找的方法是不必要的,但我只是不知道什么是更好的方法。

有更好的方法吗?

您可以在一个步骤中执行这两个查找。在第一个find()失败后,将key2值更改为其备用值并执行另一个find()。注意:对于未检测到匹配的情况,查找附属变量应明确设置为缺失,以便输出数据集准确无误——如果未重置,附属查找变量将包含最近一次匹配的值。

样本代码:

此示例生成主表和查找表的数据。每个表都有一些要携带到输出数据集中的卫星变量。还会创建一个状态变量来指示发生的匹配或不匹配的类型。

data have(keep=key1 key2 have:);
length key1 $6 key2 $2;
do _n_ = 1 to 5000;
key1 = repeat(byte(26*ranuni(123)+rank('A')),5);
key2 = ifc(ranuni(123) < 0.35, '11', '33');
if ranuni(123) < 0.02 then key2 = '22';
array have(3); * satellite (non-key) variables in have;
do i = 1 to 3;
have(i) = _n_ * 10000 + i;
end;
output;
end;
format have: 8.;
run;
data lookup(keep=key1 key2 look:);
length key1 $6 key2 $2;
array look(5); * satellite (non-key) variables in lookup;
do i = 1 to 26;
key1 = repeat(byte(i-1+rank('A')),5);
array key2s(2) $2 _temporary_ ('11','33');
do j = 1 to 2;      
key2 = key2s(j);
do k = 1 to 5;
look(k) = i * 1000 + j * 100 + k;
end;
if ranuni(123) < 0.40 then output;
end;
end;
format look: 8.;
run;
data match no_match;
length key1 $6 key2 $2 match_status $30;
if _n_ = 1 then do;
if 0 then set lookup; * prep pdv at compilation time, set is never executed at runtime;
declare hash lookup(dataset:'lookup');
lookup.defineKey('key1', 'key2');
do i = 1 to 5; drop i;
lookup.defineData(cats('look',i));
end;
lookup.defineDone();
end;
set have;
rc = lookup.find();
if rc = 0 then do;
match_status = 'match on actual keys';
output match;
return; * to top of step;
end;
* first lookup failed, try the alternate key2;
if key2 = '11' then 
key2='33';
else
if key2 = '33' then
key2 = '11';
else do;
call missing (of look:); * clear lookup values that find() loaded at last prior match;
match_status = 'no match, key2 invalid';
output no_match;
return; * to top of step;
end;
rc = lookup.find();
if rc = 0 then do;
match_status = 'match after key2 swap';
output match;
return; * to top of step;
end;
else do;
call missing (of look:); * clear lookup values that find() loaded at last prior match;
match_status = 'no match with 11 or 33';
output no_match;
return; * to top of step;
end;
run;

最新更新