如何使用正则表达式分配分类变量



我想创建一个分类变量type,其中样本标记为";肿瘤";如果指数的最后一个分量在01和09之间(即01-09=肿瘤(。否则,样品为";正常的";(即10-19=正常(。

clin_kipan = clin_kipan.assign(type=(
    clin_kipan.index
    .str.split('-')
    .str[-1]
    .str.contains(r'^0[1-9]', regex=True)   
)).replace({'type':{True:'tumor', False:'normal'}}).dropna(axis=1)

数据(作为字典(注意:我删除了部分数据以适应stackoverflow。

meth_450_10k_kipan.iloc[:,0].to_dict((

{'TCGA-2K-A9WE-01A': 0.461440642939772,
 'TCGA-2Z-A9J1-01A': 0.595894468074615,
 'TCGA-2Z-A9J2-01A': 0.481304782143526,
 'TCGA-2Z-A9J3-01A': 0.553849599144766,
 'TCGA-2Z-A9J5-01A': 0.184349035247422,
 'TCGA-2Z-A9J6-01A': 0.368324568853624,
 'TCGA-2Z-A9J7-01A': 0.17338171975844,
 'TCGA-2Z-A9J8-01A': 0.805217631945182,
 'TCGA-2Z-A9J9-01A': 0.113336454896178,
 'TCGA-2Z-A9JD-01A': 0.451013557269167,
 'TCGA-2Z-A9JE-01A': 0.0734337852294042,
 'TCGA-2Z-A9JG-01A': 0.452956990020555,
 'TCGA-2Z-A9JI-01A': 0.606187976542221,
 'TCGA-2Z-A9JJ-01A': 0.0960629248660591,
 'TCGA-2Z-A9JK-01A': 0.588377732872447,
 'TCGA-2Z-A9JL-01A': 0.379404812465867,
 'TCGA-2Z-A9JM-01A': 0.33009261875889,
 'TCGA-2Z-A9JN-01A': 0.123562375369795,
 'TCGA-2Z-A9JO-01A': 0.486619960274097,
 'TCGA-2Z-A9JP-01A': 0.612453055148181,
 'TCGA-A4-7997-01A': 0.575299539756208,
 'TCGA-A4-8098-01A': 0.872411944149227,
 'TCGA-A4-8310-01A': 0.620356595911493,
 'TCGA-A4-8311-01A': 0.175836950400384,
 'TCGA-A4-8312-01A': 0.460360548972967,
 'TCGA-AK-3425-01A': 0.553596581023431,
 'TCGA-AK-3428-01A': 0.200576631432299,
 'TCGA-AK-3431-01A': 0.460568866100955,
 'TCGA-AK-3433-01A': 0.0667130360899875,
 'TCGA-AK-3434-01A': 0.468187497808806,
 'TCGA-AK-3440-01A': 0.0574998532505545,
 'TCGA-AK-3445-01A': 0.736033027928183,
 'TCGA-AK-3450-01A': 0.552013269922762,
 'TCGA-AK-3453-01A': 0.80388456484843,
 'TCGA-AK-3454-01A': 0.235022181426789,
 'TCGA-AK-3458-01A': 0.414595807883066,
 'TCGA-AK-3460-01A': 0.545975484391243,
 'TCGA-AK-3461-01A': 0.477282040746989,
 'TCGA-AL-7173-01A': 0.497670872505808,
 'TCGA-AL-A5DJ-01A': 0.414268048210923,
 'TCGA-AT-A5NU-01A': 0.0844509944090884,
 'TCGA-B0-4688-01A': 0.126552246407372,
 'TCGA-B0-4688-11A': 0.371027570539727,
 'TCGA-B0-4690-01A': 0.664167690871162,
 'TCGA-B0-4690-11A': 0.359369329912497,
 'TCGA-B0-4691-01A': 0.726171893496605,
 'TCGA-B0-4691-11A': 0.462636912871508,
 'TCGA-B0-4693-01A': 0.715476004275991,
 'TCGA-B0-4693-11A': 0.391904892559453,
 'TCGA-B0-4694-01A': 0.541144132542648,
 'TCGA-B0-4694-11A': 0.35202102740962,
 'TCGA-B0-4696-01A': 0.0844268180611597,
 'TCGA-B0-4696-11A': 0.450653351366657,
 'TCGA-B0-4697-01A': 0.654457352712322,
 'TCGA-B0-4697-11A': 0.171069365867474,
 'TCGA-B0-4698-01A': 0.357248803549941,
 'TCGA-B0-4698-11A': 0.48351949752012,
 'TCGA-B0-4699-01A': 0.477376612682637,
 'TCGA-B0-4699-11A': 0.456852387157548,
 'TCGA-B0-4700-01A': 0.627663610531787,
 'TCGA-B0-4701-01A': 0.535784042345219,
 'TCGA-B0-4701-11A': 0.443149883166454,
 'TCGA-B0-4703-01A': 0.413705851805378,
 'TCGA-B0-4703-11A': 0.370343129988362,
 'TCGA-B0-4706-01A': 0.427010203528901,
 'TCGA-B0-4706-11A': 0.457528590590756,
 'TCGA-B0-4707-01A': 0.554585932481344,
 'TCGA-B0-4707-11A': 0.494392640852805,
 'TCGA-B0-4710-01A': 0.746479723091997,
 'TCGA-B0-4710-11A': 0.411702603771858,
 'TCGA-B0-4712-01A': 0.473302353424005,
 'TCGA-B0-4712-11A': 0.270372691978926,
 'TCGA-B0-4713-01A': 0.560429143461328,
 'TCGA-B0-4713-11A': 0.40376141612359,
 'TCGA-B0-4714-01A': 0.735945131182954,
 'TCGA-B0-4714-11A': 0.263599330925764,
 'TCGA-B0-4718-01A': 0.833718331254908,
 'TCGA-B0-4718-11A': 0.379442535687981,
 'TCGA-B0-4810-01A': 0.699637311093991,
 'TCGA-B0-4810-11A': 0.479396092302295,
 'TCGA-B0-4811-01A': 0.531909129769779,
 'TCGA-B0-4811-11A': 0.54226596375653,
 'TCGA-B4-5835-01A': 0.717016504186623,
 'TCGA-B4-5836-01A': 0.693534883907721,
 'TCGA-B4-5838-01A': 0.508256362230539,
 'TCGA-B4-5843-01A': 0.487548027975282,
 'TCGA-B4-5844-01A': 0.649681573187955,
 'TCGA-B8-4146-01B': 0.82401932868328,
 'TCGA-B8-4153-01B': 0.460999849292442,
 'TCGA-B8-4621-01A': 0.381802967801001,
 'TCGA-B8-4622-01A': 0.670973304749735,
 'TCGA-B8-5158-01A': 0.467914415050535,
 'TCGA-B8-5159-01A': 0.779251722281253,
 'TCGA-B8-5162-01A': 0.351204810616952,
 'TCGA-B8-5163-01A': 0.327307753655143,
 'TCGA-B8-5164-01A': 0.609486026395931,
 'TCGA-B8-5165-01A': 0.526798777018024,
 'TCGA-B8-5545-01A': 0.465850022943473,
 'TCGA-B8-5546-01A': 0.650085752864622,
 'TCGA-B8-5549-01A': 0.601420278828655,
 'TCGA-B8-5550-01A': 0.36926263565775,
 'TCGA-B8-5551-01A': 0.510539456362485,
 'TCGA-B8-5552-01B': 0.561914284053566,
 'TCGA-B8-5553-01A': 0.721040148269779,
 'TCGA-B8-A54D-01A': 0.569601921914092,
 'TCGA-B8-A54E-01A': 0.287289206223702,
 'TCGA-B8-A54F-01A': 0.757363680231869,
 'TCGA-B8-A54G-01A': 0.835557016199056,
 'TCGA-B8-A54H-01A': 0.495744985734965,
 'TCGA-B8-A54I-01A': 0.395012393085408,
 'TCGA-B8-A54J-01A': 0.406091467819355,
 'TCGA-B8-A54K-01A': 0.833006431093696,
 'TCGA-B8-A7U6-01A': 0.61310478518293,
 'TCGA-B8-A8YJ-01A': 0.364804929088756,
 'TCGA-B9-5155-01A': 0.360402370593871,
 'TCGA-B9-5156-01A': 0.48035755488036,
 'TCGA-B9-7268-01A': 0.188844826257322,
 'TCGA-B9-A44B-01A': 0.362254621945938,
 'TCGA-B9-A5W7-01A': 0.703654140729989,
 'TCGA-B9-A5W8-01A': 0.270043280449106,
 'TCGA-B9-A5W9-01A': 0.824048307585993,
 'TCGA-B9-A69E-01A': 0.61780736246742,
 'TCGA-B9-A8YH-01A': 0.226240912502849,
 'TCGA-B9-A8YI-01A': 0.690382207755967,
 'TCGA-BP-4177-01A': 0.725375025466823,
 'TCGA-BP-4177-11A': 0.422332454108841,
 'TCGA-BP-4760-01A': 0.609359979687519,
 'TCGA-BP-4760-11A': 0.422306044637011,
 'TCGA-BP-4770-01A': 0.121487152318834,
 'TCGA-BP-4770-11A': 0.594633150328805,
 'TCGA-BP-4782-01A': 0.704514842537063,
 'TCGA-BP-4782-11A': 0.471298373336338,
 'TCGA-BP-4795-01A': 0.527854356635493,
 'TCGA-BP-4795-11A': 0.323334566013902,
 'TCGA-BP-4801-01A': 0.616813262608032,
 'TCGA-BP-4801-11A': 0.402769700728329,
 'TCGA-BP-4993-01A': 0.433660406360131,
 'TCGA-BP-4993-11A': 0.464248458487772,
 'TCGA-BP-5010-01A': 0.695455272482929,
 'TCGA-BP-5010-11A': 0.378443420075559,
 'TCGA-BP-5168-01A': 0.721716901812953,
 'TCGA-BP-5168-11A': 0.505147623192572,
 'TCGA-BP-5169-01A': 0.756965491596902,
 'TCGA-BP-5169-11A': 0.513432590250273,
 'TCGA-BP-5170-01A': 0.642932583313587,
 'TCGA-BP-5170-11A': 0.362978215086028,
 'TCGA-BP-5173-01A': 0.560431745976407,
 'TCGA-BP-5173-11A': 0.259258497085793,
 'TCGA-BP-5174-01A': 0.551016395137727,
 'TCGA-BP-5174-11A': 0.456165104612702,
 'TCGA-BP-5175-01A': 0.602399249567945,
 'TCGA-BP-5175-11A': 0.43568311659361,
 'TCGA-BP-5176-01A': 0.467503226665519,
 'TCGA-BP-5176-11A': 0.509898723396126,
 'TCGA-BP-5177-01A': 0.497077578280269,
 'TCGA-BP-5177-11A': 0.468852423144684,
 'TCGA-BP-5178-01A': 0.512204953206687,
 'TCGA-BP-5178-11A': 0.248976112713065,
 'TCGA-BP-5180-01A': 0.763982300295148,
 'TCGA-BP-5180-11A': 0.542525346367913,
 'TCGA-BP-5181-01A': 0.525400685051857,
 'TCGA-BP-5181-11A': 0.3964210852542,
 'TCGA-BP-5182-01A': 0.694192771302357,
 'TCGA-BP-5182-11A': 0.554368990300157,
 'TCGA-BP-5183-01A': 0.684795562770571,
 'TCGA-BP-5183-11A': 0.45631723136511,
 'TCGA-BP-5184-01A': 0.435925608039958,
 'TCGA-BP-5184-11A': 0.553059239606735,
 'TCGA-BP-5185-01A': 0.83564546162613,
 'TCGA-BP-5185-11A': 0.4974584854162,
 'TCGA-BP-5186-01A': 0.53135728380227,
 'TCGA-BP-5186-11A': 0.492131357121141,
 'TCGA-BP-5187-01A': 0.250321959887726,
 'TCGA-BP-5187-11A': 0.412696016372533,
 'TCGA-BP-5189-01A': 0.599134975432647,
 'TCGA-BP-5189-11A': 0.505475863167215,
 'TCGA-BP-5190-01A': 0.78355345451088,
 'TCGA-BP-5190-11A': 0.57824878357057,
 'TCGA-BP-5191-01A': 0.466016114035713,
 'TCGA-BP-5191-11A': 0.294652544602874,
 'TCGA-BP-5192-01A': 0.797804858762056,
 'TCGA-BP-5192-11A': 0.517008791002576,
 'TCGA-BP-5194-01A': 0.485244177965985,
 'TCGA-BP-5194-11A': 0.499490535573446,
 'TCGA-BP-5195-01A': 0.270428601591792,
 'TCGA-BP-5195-11A': 0.355561602589935,
 'TCGA-BP-5196-01A': 0.549048458426934,
 'TCGA-BP-5196-11A': 0.524199657327286,
 'TCGA-BP-5198-01A': 0.558528762182107,
 'TCGA-BP-5198-11A': 0.546715513566714,
 'TCGA-BP-5199-01A': 0.750248801045707,
 'TCGA-BP-5199-11A': 0.585095560159837,
 'TCGA-BP-5200-01A': 0.587103421935098,
 'TCGA-BP-5200-11A': 0.372272400962597,
 'TCGA-BP-5201-01A': 0.713099111515259,
 'TCGA-BP-5201-11A': 0.438646950830249,
 'TCGA-BP-5202-01A': 0.505220777919004,
 'TCGA-BP-5202-11A': 0.385560136158654,
 'TCGA-BQ-5875-01A': 0.89331960011634,
 'TCGA-BQ-5875-11A': 0.57770084470179,
 'TCGA-BQ-5876-01A': 0.411402602354243,
 'TCGA-BQ-5876-11A': 0.411666572925372,
 'TCGA-BQ-5877-01A': 0.807432551658334,
 'TCGA-BQ-5877-11A': 0.658255601141211,
 'TCGA-BQ-5878-01A': 0.407318983929467,
 'TCGA-BQ-5878-11A': 0.552116584037671,
 'TCGA-BQ-5879-01A': 0.704088057170876,
 'TCGA-BQ-5879-11A': 0.495031748602989,
 'TCGA-BQ-5880-01A': 0.466510386832544,
 'TCGA-BQ-5880-11A': 0.346514881483004,
 'TCGA-BQ-5881-01A': 0.31247891319475,
 'TCGA-BQ-5881-11A': 0.548745619575565,
 'TCGA-BQ-5882-01A': 0.831031871910882,
 'TCGA-BQ-5882-11A': 0.396805818601612,
 'TCGA-BQ-5883-01A': 0.195453381864565,
 'TCGA-BQ-5883-11A': 0.576017335308896,
 'TCGA-BQ-5884-01A': 0.809908604656978,
 'TCGA-BQ-5884-11A': 0.524588293635589,
 'TCGA-BQ-5885-01A': 0.387266730372273,
 'TCGA-BQ-5885-11A': 0.535875409243481,
 'TCGA-BQ-5886-01A': 0.301894488273832,
 'TCGA-BQ-5886-11A': 0.480241258675749,
 'TCGA-BQ-5887-01A': 0.697756389517944,
 'TCGA-BQ-5887-11A': 0.41895519894623,
 'TCGA-BQ-5888-01A': 0.563389467606809,
 'TCGA-BQ-5888-11A': 0.577545007792039,
 'TCGA-BQ-5889-01A': 0.510856855773724,
 'TCGA-BQ-5889-11A': 0.51669117347703,
 'TCGA-BQ-5890-01A': 0.6202039578815,
 'TCGA-BQ-5890-11A': 0.507789756430189,
 'TCGA-BQ-5891-01A': 0.361541737638687,
 'TCGA-BQ-5891-11A': 0.362333838079948,
 'TCGA-BQ-5892-01A': 0.643664622306718,
 'TCGA-BQ-5892-11A': 0.518723479644402,
 'TCGA-BQ-5893-01A': 0.87370291580085,
 'TCGA-BQ-5893-11A': 0.497549792828741,
 'TCGA-BQ-5894-01A': 0.39981062259206,
 'TCGA-BQ-5894-11A': 0.483842060046995,
 'TCGA-BQ-7044-01A': 0.706785440794508,
 'TCGA-BQ-7044-11A': 0.423206958977879,
 'TCGA-BQ-7045-01A': 0.312953900509226,
 'TCGA-BQ-7045-11A': 0.43242214931862,
 'TCGA-BQ-7046-01A': 0.361218048753202,
 'TCGA-BQ-7046-11A': 0.520595069371813,
 'TCGA-BQ-7048-01A': 0.776172562995166,
 'TCGA-BQ-7048-11A': 0.506144217162374,
 'TCGA-BQ-7049-01A': 0.509881202262492,
 'TCGA-BQ-7049-11A': 0.48670776234331,
 'TCGA-BQ-7050-01A': 0.638500198979321,
 'TCGA-BQ-7050-11A': 0.607636282163788,
 'TCGA-BQ-7051-01A': 0.554390885406225,
 'TCGA-BQ-7051-11A': 0.523155141317924,
 'TCGA-BQ-7053-01A': 0.480544308909383,
 'TCGA-BQ-7053-11A': 0.502245593128557,
 'TCGA-BQ-7055-01A': 0.482047484138711,
 'TCGA-BQ-7055-11A': 0.524133942110707,
 'TCGA-BQ-7056-01A': 0.147095459553773,
 'TCGA-BQ-7056-11A': 0.481272102150538,
 'TCGA-BQ-7058-01A': 0.411094777994713,
 'TCGA-BQ-7058-11A': 0.437886409988998,
 'TCGA-BQ-7059-01A': 0.265778230947236,
 'TCGA-BQ-7059-11A': 0.436389553055449,
 'TCGA-BQ-7060-01A': 0.389030036861405,
 'TCGA-BQ-7060-11A': 0.452381690712968,
 'TCGA-BQ-7061-01A': 0.609614401788156,
 'TCGA-BQ-7061-11A': 0.253485020805768,
 'TCGA-BQ-7062-01A': 0.326729837157483,
 'TCGA-BQ-7062-11A': 0.492424519548599,
 'TCGA-CJ-4869-01A': 0.507644292544232,
 'TCGA-CJ-4869-11A': 0.482295318152711,
 'TCGA-CJ-4882-01A': 0.50247006824954,
 'TCGA-CJ-4882-11A': 0.511812779848697,
 'TCGA-CJ-4897-01A': 0.573023970345808,
 'TCGA-CJ-4897-11A': 0.600878203441805,
 'TCGA-CJ-4901-01A': 0.603659993962423,
 'TCGA-CJ-4901-11A': 0.582516483453658,
 'TCGA-CJ-4902-01A': 0.556959482342673,
 'TCGA-CJ-4902-11A': 0.279505429158514,
 'TCGA-CJ-4903-01A': 0.769268478984662,
 'TCGA-CJ-4903-11A': 0.416462296625046,
 'TCGA-CJ-4904-01A': 0.336034211570061,
 'TCGA-CJ-4904-11A': 0.600825068380052,
 'TCGA-CJ-4905-01A': 0.531468986357492,
 'TCGA-CJ-4905-11A': 0.347600971468584,
 'TCGA-CJ-4907-01A': 0.632320592076825,
 'TCGA-CJ-4907-11A': 0.397201890422251,
 'TCGA-CJ-4908-01A': 0.579893433246103,
 'TCGA-CJ-4908-11A': 0.349320517229673,
 'TCGA-CJ-4912-01A': 0.272789529653607,
 'TCGA-CJ-4912-11A': 0.396355105598929,
 'TCGA-CJ-4913-01A': 0.814084703950186,
 'TCGA-CJ-4913-11A': 0.460998591869039,
 'TCGA-CJ-4916-01A': 0.65193511108109,
 'TCGA-CJ-4916-11A': 0.477372052398885,
 'TCGA-CJ-4918-01A': 0.68186164105509,
 'TCGA-CJ-4918-11A': 0.577425544329086,
 'TCGA-CJ-4920-01A': 0.726934575112203,
 'TCGA-CJ-4920-11A': 0.273993572622575,
 'TCGA-CJ-4923-01A': 0.419346695829553,
 'TCGA-CJ-4923-11A': 0.520509311134425,
 'TCGA-CJ-5671-01A': 0.506648481033262,
 'TCGA-CJ-5672-01A': 0.603279252130419,
 'TCGA-CJ-5675-01A': 0.796377879483712,
 'TCGA-CJ-5676-01A': 0.635555481824627,
 'TCGA-CJ-5677-01A': 0.348375647953194,
 'TCGA-CJ-5678-01A': 0.650523562351478,
 'TCGA-CJ-5679-01A': 0.736117642401856,
 'TCGA-CJ-5680-01A': 0.692194216848037,
 'TCGA-CJ-5681-01A': 0.323132374788494,
 'TCGA-CJ-5682-01A': 0.817030963666056,
 'TCGA-CJ-5683-01A': 0.178780433415072,
 'TCGA-CJ-5684-01A': 0.534574658649495,
 'TCGA-CJ-5686-01A': 0.624124651982696,
 'TCGA-CJ-5689-01A': 0.589923304849847,
 'TCGA-CJ-6027-01A': 0.581664589821501,
 'TCGA-CJ-6028-01A': 0.653841628026692,
 'TCGA-CJ-6030-01A': 0.60645927708782,
 'TCGA-CJ-6031-01A': 0.420156808105783,
 'TCGA-CJ-6032-01A': 0.699718479011353,
 'TCGA-CJ-6033-01A': 0.535938325316408,
 'TCGA-CW-5580-01A': 0.304056672320722,
 'TCGA-CW-5581-01A': 0.758785185217327,
 'TCGA-CW-5583-01A': 0.595190075885844,
 'TCGA-CW-5584-01A': 0.216972241251929,
 'TCGA-CW-5585-01A': 0.552227997140708,
 'TCGA-CW-5587-01A': 0.769849390134453,
 'TCGA-CW-5588-01A': 0.425550401992947,
 'TCGA-CW-5589-01A': 0.588070601136695,
 'TCGA-CW-5590-01A': 0.629026111191934,
 'TCGA-CW-5591-01A': 0.744669435041416,
 'TCGA-CW-6087-01A': 0.438323838498345,
 'TCGA-CW-6088-01A': 0.648480526354966,
 'TCGA-CW-6090-01A': 0.496941164544484,
 'TCGA-CW-6093-01A': 0.322685649862715,
 'TCGA-CW-6097-01A': 0.63238332642799,
 'TCGA-CZ-4853-01A': 0.844756852226174,
 'TCGA-CZ-4853-11A': 0.324892487021495,
 'TCGA-CZ-4856-01A': 0.668590632795688,
 'TCGA-CZ-4856-11A': 0.216424404863489,
 'TCGA-CZ-4859-01A': 0.801094930689936,
 'TCGA-CZ-4859-11A': 0.433942030040132,
 'TCGA-CZ-4863-01A': 0.598954994898928,
 'TCGA-CZ-4863-11A': 0.232667612910604,
 'TCGA-CZ-4864-01A': 0.875435908326675,
 'TCGA-CZ-4864-11A': 0.474078919309152,
 'TCGA-CZ-4865-01A': 0.573830670002439,
 'TCGA-CZ-4865-11A': 0.231801446526244,
 'TCGA-CZ-4866-01A': 0.180964815279877,
 'TCGA-CZ-4866-11A': 0.316744057807132,
 'TCGA-CZ-5451-01A': 0.828145238318987,
 'TCGA-CZ-5451-11A': 0.526881390879804,
 'TCGA-CZ-5452-01A': 0.667462394585583,
 'TCGA-CZ-5452-11A': 0.254629239249419,
 'TCGA-CZ-5453-01A': 0.440006729995793,
 'TCGA-CZ-5453-11A': 0.259076651505509,
 'TCGA-CZ-5454-01A': 0.897635943332373,
 'TCGA-CZ-5454-11A': 0.374760604510504,
 'TCGA-CZ-5455-01A': 0.805631753647801,
 'TCGA-CZ-5455-11A': 0.365095504674219,
 'TCGA-CZ-5456-01A': 0.869201498524618,
 'TCGA-CZ-5456-11A': 0.378596630373605,
 'TCGA-CZ-5457-01A': 0.72942470578732,
 'TCGA-CZ-5457-11A': 0.271907817017886,
 'TCGA-CZ-5458-01A': 0.552096331396446,
 'TCGA-CZ-5458-11A': 0.40719086930854,
 'TCGA-CZ-5459-01A': 0.492438542518539,
 'TCGA-CZ-5459-11A': 0.402893906444899,
 'TCGA-CZ-5460-01A': 0.662893585420463,
 'TCGA-CZ-5460-11A': 0.565769070301987,
 'TCGA-CZ-5461-01A': 0.806245436468521,
 'TCGA-CZ-5461-11A': 0.506024467387406,
 'TCGA-CZ-5462-01A': 0.817244537373636,
 'TCGA-CZ-5462-11A': 0.3328390030665,
 'TCGA-CZ-5463-01A': 0.645130282240038,
 'TCGA-CZ-5463-11A': 0.357580297534917,
 'TCGA-CZ-5464-01A': 0.383119897258251,
 'TCGA-CZ-5464-11A': 0.417096268341179,
 'TCGA-CZ-5465-01A': 0.787806222730685,
 'TCGA-CZ-5465-11A': 0.299868685561265,
 'TCGA-CZ-5466-01A': 0.480279859002063,
 'TCGA-CZ-5466-11A': 0.621373528278737,
 'TCGA-CZ-5467-01A': 0.515102184406232,
 'TCGA-CZ-5467-11A': 0.404974232422824,
 'TCGA-CZ-5468-01A': 0.344640810759208,
 'TCGA-CZ-5468-11A': 0.373639811934184,
 'TCGA-CZ-5469-01A': 0.292792011753067,
 'TCGA-CZ-5469-11A': 0.295707799748936,
 'TCGA-CZ-5470-01A': 0.691029521746532,
 'TCGA-CZ-5470-11A': 0.430982162816474,
 'TCGA-CZ-5982-01A': 0.752715081264483,
 'TCGA-CZ-5984-01A': 0.716020004408966,
 'TCGA-CZ-5985-01A': 0.749895548222396,
 'TCGA-CZ-5986-01A': 0.477054451772978,
 'TCGA-CZ-5987-01A': 0.506774522252512,
 'TCGA-CZ-5988-01A': 0.763365276222767,
 'TCGA-CZ-5989-01A': 0.580063584183683,
 'TCGA-DV-5565-01A': 0.743229714801937,
 'TCGA-DV-5566-01A': 0.472819446029683,
 'TCGA-DV-5567-01A': 0.80774672206204,
 'TCGA-DV-5568-01A': 0.492355027729216,
 'TCGA-DV-5569-01A': 0.554206474536619,
 'TCGA-DV-5573-01A': 0.479129993301344,
 'TCGA-DV-5574-01A': 0.5454564270562,
 'TCGA-DV-5575-01A': 0.300840312658773,
 'TCGA-DV-5576-01A': 0.396094908623387,
 'TCGA-DV-A4VX-01A': 0.661176523229657,
 'TCGA-DV-A4VZ-01A': 0.424401496828784,
 'TCGA-DV-A4W0-01A': 0.130790352908783,
 'TCGA-DV-A4W0-05A': 0.658764080632588,
 'TCGA-DW-5560-01A': 0.545234936156874,
 'TCGA-DW-5561-01A': 0.701232476980455,
 'TCGA-DW-7834-01A': 0.461061746018902,
 'TCGA-DW-7836-01A': 0.139761867298173,
 'TCGA-DW-7837-01A': 0.376769267651157,
 'TCGA-DW-7838-01A': 0.428999286463751,
 'TCGA-DW-7839-01A': 0.749952976994014,
 'TCGA-DW-7840-01A': 0.860678110896234,
 'TCGA-DW-7841-01A': 0.22950260196733,
 'TCGA-DW-7842-01A': 0.565253780369983,
 'TCGA-DW-7963-01B': 0.355486347483485,
 'TCGA-DZ-6131-01A': 0.697061673189591,
 'TCGA-DZ-6131-11A': 0.477231463838637,
 'TCGA-DZ-6132-01A': 0.405476522027334,
 'TCGA-DZ-6132-11A': 0.491009182333024,
 'TCGA-DZ-6133-01A': 0.566638940237294,
 'TCGA-DZ-6133-11A': 0.471048532832285,
 'TCGA-DZ-6134-01A': 0.57700165991119,
 'TCGA-DZ-6134-11A': 0.520218821332691,
 'TCGA-DZ-6135-01A': 0.166826796117889,
 'TCGA-DZ-6135-11A': 0.582631271405091,
 'TCGA-EU-5904-01A': 0.741672527904383,
 'TCGA-EU-5905-01A': 0.521461094721148,
 'TCGA-EU-5906-01A': 0.678523661229629,
 'TCGA-EU-5907-01A': 0.848592482797222,
 'TCGA-EV-5901-01A': 0.494509553900211,
 'TCGA-EV-5902-01A': 0.675329855147289,
 'TCGA-EV-5903-01A': 0.570757259835872,
 'TCGA-F9-A4JJ-01A': 0.841137471440885,
 'TCGA-F9-A7Q0-01A': 0.124855324694813,
 'TCGA-F9-A7VF-01A': 0.180143654390322,
 'TCGA-F9-A8NY-01A': 0.893383896438037,
 'TCGA-F9-A97G-01A': 0.711437777549736,
 'TCGA-G6-A5PC-01A': 0.686420235667118,
 'TCGA-G6-A8L6-01A': 0.777440702241368,
 'TCGA-G6-A8L7-01A': 0.322348469931076,
 'TCGA-G6-A8L8-01A': 0.765131060851177,
 'TCGA-G7-6789-01A': 0.584660822042182,
 'TCGA-G7-6790-01A': 0.152802914991029,
 'TCGA-G7-6792-01A': 0.327206338827434,
 'TCGA-G7-6793-01A': 0.259453199217205,
 'TCGA-G7-6795-01A': 0.280243611549103,
 'TCGA-G7-6796-01A': 0.502569365309792,
 'TCGA-G7-6797-01A': 0.545020553235576,
 'TCGA-G7-7501-01A': 0.595182584292499,
 'TCGA-G7-7502-01A': 0.459607740592352,
 'TCGA-G7-A4TM-01A': 0.175259587439316,
 'TCGA-G7-A8LB-01A': 0.259248755985059,
 'TCGA-G7-A8LC-01A': 0.555307265340165,
 'TCGA-G7-A8LD-01A': 0.223198243874014,
 'TCGA-G7-A8LE-01A': 0.639609667198732,
 'TCGA-GK-A6C7-01A': 0.399005680602885,
 'TCGA-GL-6846-01A': 0.508008812756347,
 'TCGA-GL-6846-11A': 0.444484549318499,
 'TCGA-GL-7773-01A': 0.132667335709253,
 'TCGA-GL-7966-01A': 0.783256453801296,
 'TCGA-GL-8500-01A': 0.718434345063133,
 'TCGA-GL-A4EM-01A': 0.550036963252564,
 'TCGA-GL-A59R-01A': 0.249018829219539,
 'TCGA-GL-A59T-01A': 0.471791700056051,
 'TCGA-GL-A9DC-01A': 0.622832687116004,
 'TCGA-GL-A9DD-01A': 0.33760710715732,
 'TCGA-GL-A9DE-01A': 0.149805672175538,
 'TCGA-HE-7128-01A': 0.562179856077386,
 'TCGA-HE-7129-01A': 0.606000572271,
 'TCGA-HE-7130-01A': 0.0891240057378008,
 'TCGA-HE-A5NF-01A': 0.395657972794237,
 'TCGA-HE-A5NH-01A': 0.605510296100774,
 'TCGA-HE-A5NI-01A': 0.129166640422305,
 'TCGA-HE-A5NJ-01A': 0.800791176872394,
 'TCGA-HE-A5NK-01A': 0.65393796237111,
 'TCGA-HE-A5NL-01A': 0.382878425593167,
 'TCGA-IA-A40U-01A': 0.569947564388281,
 'TCGA-IA-A40X-01A': 0.475312225589095,
 'TCGA-IA-A40Y-01A': 0.831218735807873,
 'TCGA-IA-A83S-01A': 0.439652525202758,
 'TCGA-IA-A83T-01A': 0.599104160581583,
 'TCGA-IA-A83V-01A': 0.420705729911249,
 'TCGA-IA-A83W-01A': 0.215440226031323,
 'TCGA-IZ-8195-01A': 0.865842169056583,
 'TCGA-IZ-8196-01A': 0.549741475002616,
 'TCGA-IZ-A6M8-01A': 0.25271617160718,
 'TCGA-IZ-A6M9-01A': 0.434438793380535,
 'TCGA-J7-6720-01A': 0.309912563623243,
 'TCGA-J7-8537-01A': 0.494306064475885,
 'TCGA-J7-A8I2-01A': 0.0719731112756232,
 'TCGA-KL-8323-01A': 0.0545366588040571,
 'TCGA-KL-8324-01A': 0.635089205078394,
 'TCGA-KL-8325-01A': 0.0581085373332275,
 'TCGA-KL-8326-01A': 0.449216287451325,
 'TCGA-KL-8327-01A': 0.0581576554660675,
 'TCGA-KL-8328-01A': 0.493482981841145,
 'TCGA-KL-8329-01A': 0.174018608936061,
 'TCGA-KL-8330-01A': 0.57722306359515,
 'TCGA-KL-8331-01A': 0.130370939461255,
 'TCGA-KL-8332-01A': 0.346306766468467,
 'TCGA-KL-8333-01A': 0.489870254682944,
 'TCGA-KL-8334-01A': 0.0788774956955756,
 'TCGA-KL-8335-01A': 0.903571055999083,
 'TCGA-KL-8336-01A': 0.83186971915355,
 'TCGA-KL-8337-01A': 0.497783408175333,
 'TCGA-KL-8338-01A': 0.547517752966926,
 'TCGA-KL-8339-01A': 0.73161502248135,
 'TCGA-KL-8340-01A': 0.787009956661646,
 'TCGA-KL-8341-01A': 0.517926024552244,
 'TCGA-KL-8342-01A': 0.0802441506896805,
 'TCGA-KL-8343-01A': 0.551682673117622,
 'TCGA-KL-8344-01A': 0.562958053887764,
 'TCGA-KL-8345-01A': 0.20209122258019,
 'TCGA-KL-8346-01A': 0.0750517489141825,
 'TCGA-KM-8438-01A': 0.877251033159479,
 'TCGA-KM-8439-01A': 0.885275058217331,
 'TCGA-KM-8440-01A': 0.510676761354819,
 'TCGA-KM-8441-01A': 0.595199753384359,
 'TCGA-KM-8442-01A': 0.203595949676229,
 'TCGA-KM-8443-01A': 0.0469182827420437,
 'TCGA-KM-8476-01A': 0.122291824715389,
 'TCGA-KM-8477-01A': 0.130639411556753,
 'TCGA-KM-8639-01A': 0.782313520995588,
 'TCGA-KN-8418-01A': 0.911754009413901,
 'TCGA-KN-8419-01A': 0.0757960636193063,
 'TCGA-KN-8421-01A': 0.915243409217579,
 'TCGA-KN-8422-01A': 0.611297095548302,
 'TCGA-KN-8423-01A': 0.122411696015503,
 'TCGA-KN-8424-01A': 0.593849772917336,
 'TCGA-KN-8425-01A': 0.161006446846293,
 'TCGA-KN-8426-01A': 0.436382344191302,
 'TCGA-KN-8427-01A': 0.259089647939905,
 'TCGA-KN-8428-01A': 0.455862669424194,
 'TCGA-KN-8429-01A': 0.440006437058149,
 'TCGA-KN-8430-01A': 0.115498932226111,
 'TCGA-KN-8431-01A': 0.453333401483895,
 'TCGA-KN-8432-01A': 0.118506737677759,
 'TCGA-KN-8433-01A': 0.772509080765302,
 'TCGA-KN-8434-01A': 0.821155877422829,
 'TCGA-KN-8435-01A': 0.468940587936739,
 'TCGA-KN-8436-01A': 0.643425935186096,
 'TCGA-KN-8437-01A': 0.818898020320686,
 'TCGA-KO-8403-01A': 0.114272135415219,
 'TCGA-KO-8404-01A': 0.299631093226721,
 'TCGA-KO-8405-01A': 0.344072433901487,
 'TCGA-KO-8406-01A': 0.0800429517817645,
 'TCGA-KO-8407-01A': 0.629472254139722,
 'TCGA-KO-8408-01A': 0.616087518978633,
 'TCGA-KO-8409-01A': 0.553049404632709,
 'TCGA-KO-8410-01A': 0.571084525127336,
 'TCGA-KO-8411-01A': 0.0643343669782179,
 'TCGA-KO-8413-01A': 0.111518527587711,
 'TCGA-KO-8414-01A': 0.830020618565435,
 'TCGA-KO-8415-01A': 0.0710619850971478,
 'TCGA-KO-8416-01A': 0.828248503739966,
 'TCGA-KO-8417-01A': 0.0680841589337828,
 'TCGA-KV-A6GD-01A': 0.242033041588891,
 'TCGA-KV-A6GE-01A': 0.279577294675176,
 'TCGA-KV-A74V-01A': 0.171779325988153,
 'TCGA-MH-A55W-01A': 0.34926441759184,
 'TCGA-MH-A55Z-01A': 0.423600000487898,
 'TCGA-MH-A560-01A': 0.877263345291152,
 'TCGA-MH-A561-01A': 0.745959049022391,
 'TCGA-MH-A562-01A': 0.211110255959455,
 'TCGA-MH-A854-01A': 0.0936361142992902,
 'TCGA-MH-A855-01A': 0.613652768557748,
 'TCGA-MH-A856-01A': 0.163210578219575,
 'TCGA-MH-A857-01A': 0.792663613988801,
 'TCGA-MM-A563-01A': 0.765674651369929,
 'TCGA-MM-A564-01A': 0.638477033104696,
 'TCGA-MM-A84U-01A': 0.635660339239491,
 'TCGA-MW-A4EC-01A': 0.583341333104074,
 'TCGA-O9-A75Z-01A': 0.812312497838463,
 'TCGA-P4-A5E6-01A': 0.604088915496894,
 'TCGA-P4-A5E7-01A': 0.608753326943067,
 'TCGA-P4-A5E8-01A': 0.478395098487952,
 'TCGA-P4-A5EA-01A': 0.343705567580045,
 'TCGA-P4-A5EB-01A': 0.369503358386031,
 'TCGA-P4-A5ED-01A': 0.368635991912998,
 'TCGA-P4-AAVK-01A': 0.702109551775435,
 'TCGA-P4-AAVL-01A': 0.76582886409803,
 'TCGA-P4-AAVM-01A': 0.311579343957395,
 'TCGA-P4-AAVO-01A': 0.76796958095923,
 'TCGA-PJ-A5Z8-01A': 0.579550745972331,
 'TCGA-PJ-A5Z9-01A': 0.331934069481273,
 'TCGA-PJ-A8JU-01A': 0.711798867387763,
 'TCGA-Q2-A5QZ-01A': 0.74268571211292,
 'TCGA-SX-A71R-01A': 0.219816764888758,
 'TCGA-SX-A71S-01A': 0.614145895332858,
 'TCGA-SX-A71U-01A': 0.380309094793006,
 'TCGA-SX-A71V-01A': 0.189714355705623,
 'TCGA-SX-A71W-01A': 0.566560178879027,
 'TCGA-SX-A7SL-01A': 0.112760074787077,
 'TCGA-SX-A7SM-01A': 0.611081980718799,
 'TCGA-SX-A7SN-01A': 0.604814469458766,
 'TCGA-SX-A7SO-01A': 0.704280732297459,
 'TCGA-SX-A7SP-01A': 0.499272945067089,
 'TCGA-SX-A7SQ-01A': 0.27560434372363,
 'TCGA-SX-A7SR-01A': 0.443154954689914,
 'TCGA-SX-A7SS-01A': 0.289455354697542,
 'TCGA-SX-A7SU-01A': 0.562751430143992,
 'TCGA-T7-A92I-01A': 0.776703394225906,
 'TCGA-UN-AAZ9-01A': 0.0985018259152259,
 'TCGA-UZ-A9PJ-01A': 0.446007421131707,
 'TCGA-UZ-A9PK-01A': 0.211136225901134,
 'TCGA-UZ-A9PL-01A': 0.0761019412866727,
 'TCGA-UZ-A9PM-01A': 0.19344757575802,
 'TCGA-UZ-A9PN-01A': 0.43633928752996,
 'TCGA-UZ-A9PO-01A': 0.823181732769592,
 'TCGA-UZ-A9PP-01A': 0.494462534041415,
 'TCGA-UZ-A9PQ-01A': 0.866589129409013,
 'TCGA-UZ-A9PR-01A': 0.403293123399204,
 'TCGA-UZ-A9PS-01A': 0.636564739304163,
 'TCGA-UZ-A9PS-05A': 0.694733799645393,
 'TCGA-UZ-A9PU-01A': 0.19556500021027,
 'TCGA-UZ-A9PV-01A': 0.1365057185543,
 'TCGA-UZ-A9PX-01A': 0.324207614117673,
 'TCGA-UZ-A9PZ-01A': 0.200317023824517,
 'TCGA-UZ-A9Q0-01A': 0.0748411333800872,
 'TCGA-UZ-A9Q1-01A': 0.770655981586349,
 'TCGA-V9-A7HT-01A': 0.400421235791175,
 'TCGA-WN-A9G9-01A': 0.713709756524377,
 'TCGA-WN-AB4C-01A': 0.447091219579516,
 'TCGA-Y8-A894-01A': 0.742301356187307,
 'TCGA-Y8-A895-01A': 0.10049731725205,
 'TCGA-Y8-A896-01A': 0.276383911326578,
 'TCGA-Y8-A897-01A': 0.374250450339407,
 'TCGA-Y8-A898-01A': 0.310842285574522,
 'TCGA-Y8-A8RY-01A': 0.552137271695761,
 'TCGA-Y8-A8RZ-01A': 0.497271965133314,
 'TCGA-Y8-A8S0-01A': 0.264547845506278,
 'TCGA-Y8-A8S1-01A': 0.581694254362101}

预期结果:

行的总数为867,其中662个预期为tumor,其余为normal

len(meth_450_10k_kipan)
867
meth_450_10k_kipan.index.str.split('-').str[-1].str.contains(r'^0[1-9]', regex=True).sum()
662

您可以分两步完成,提取标签,然后根据其值进行过滤,以分配所需的输出:

df = pd.Series(d).to_frame() # assuming d to be the dict with the data
labels = df.index.str.extract(r"-(d+)D+$")[0].astype(int)
df["labels"] = pd.Categorical((labels < 10).map({True: "tumor", False: "normal"}))
>> df["labels"].unique()
['tumor', 'normal']
Categories (2, object): ['normal', 'tumor']
>> df["labels"]
TCGA-2K-A9WE-01A    tumor
TCGA-2Z-A9J1-01A    tumor
TCGA-2Z-A9J2-01A    tumor
TCGA-2Z-A9J3-01A    tumor
TCGA-2Z-A9J5-01A    tumor
                    ...  
TCGA-Y8-A898-01A    tumor
TCGA-Y8-A8RY-01A    tumor
TCGA-Y8-A8RZ-01A    tumor
TCGA-Y8-A8S0-01A    tumor
TCGA-Y8-A8S1-01A    tumor
Name: labels, Length: 637, dtype: category
Categories (2, object): ['normal', 'tumor']

最新更新