博文

WSL doc2docx 并进行替换

  import os import subprocess import tempfile import shutil from docx import Document from doc2docx import convert # 定义替换对照表 REPLACEMENT_DICT = {     '陕西省' : '甘肃省' ,     '西安' : '兰州' ,     '咸阳' : '天水' ,     'SXCDCSOP' : 'GSCDCSOP' , } def convert_doc_to_docx ( doc_path ):     """使用 Windows PowerShell 调用 Word 来转换文件,并在转换成功后删除原doc文件"""     try :         # 将 Linux 路径转换为 Windows 路径         win_path = subprocess.check_output([ 'wslpath' , '-w' , doc_path]).decode().strip()         docx_path = os.path.splitext(doc_path)[ 0 ] + ".docx"         win_docx_path = subprocess.check_output([ 'wslpath' , '-w' , docx_path]).decode().strip()                 # 检查目标文件是否已存在         if os.path.exists(docx_path):             print ( f "目标文件已存在: { docx_path } " ) ...

polars 长转宽

import polars as pl # pl.show_versions() yg = pl . read_excel ( "/mnt/c/Users/xuefliang/Downloads/乙肝.xlsx" ). with_columns (     pl . col ( "有效证件号" ). str . strip_chars ( "'" ),     pl . col ( '卡片ID' ). str . strip_chars ( "'" ) )   jz = (     jz . sort ( 'jz_sj' )     . with_columns (         pl . col ( 'grda_et_lsh' )         . cum_count ()         . over ( 'grda_et_lsh' )         . alias ( 'jc' )     ) ) #pivot 长转宽 jz = (     pl . read_database_uri ( query = query , uri = uri )     . rename ( lambda col : col . lower ())     . with_columns ( pl . col ( "jz_zc" ). cast ( pl . Int32 ))     . pivot ( index = "zjhm" , on = "jz_zc" , values = "jz_sj" , aggregate_function = "first" ) ) (     yg . select (         pl . concat_str ([ pl . lit ( "'" ), pl . col ( '有效证件号' ), pl . lit ...

detect_encoding

  import chardet def detect_encoding ( filename : str ) -> str :     with open (filename, 'rb' ) as f:         raw_data = f.read()         result = chardet.detect(raw_data)         return result[ 'encoding' ] detect_encoding( 'data/directors.csv' ) directors = pl.read_csv( "data/directors.csv" , encoding = "EUC-JP" )

生成1列条形码,将条形码图片插入excel

  import os from pystrich.code128 import Code128Encoder import polars as pl from PIL import Image import io import openpyxl from openpyxl.utils import get_column_letter def generate_barcode ( value , output_dir ):     """生成条形码图片并返回图片对象"""     if not os.path.exists(output_dir):         os.makedirs(output_dir)         filename = f " { output_dir } / { value } .png"         encoder = Code128Encoder( str (value))     encoder.save(filename)         img = Image.open(filename)     img = img.resize(( 200 , 75 ))         img_byte_arr = io.BytesIO()     img.save(img_byte_arr, format = 'PNG' )     img_byte_arr = img_byte_arr.getvalue()         os.remove(filename)         return img_byte_arr def create_excel_with_barcodes ( df , barcode_values , output_dir , output_file ): ...

polars管道

  def lowercase ( df : pl.DataFrame) -> pl.DataFrame:     """将 DataFrame 所有列名转换为小写"""     return df.rename({col: col.lower() for col in df.columns})     person = (         pl.read_csv(             "/mnt/c/Users/Administrator/Downloads/标准库接种率+v1.0.9-2024-12-27/标准库数据/person_standard.csv" ,         )         .pipe(lowercase)         .with_columns(             pl.col( "birth_weight" ).replace( "" , None )         )         .cast({             "id" : pl.String,             "birth_date" : pl.String,             "hepatitis_mothers" : pl.String,             "current_management_code" : pl.String,             "birth_weight" : pl.F...

免疫史计算

  vaccine_history = (     person.select( 'id_x' ).unique()     .join(         vaccine_tbl         .filter(pl.col( '大类编码' ).cast(pl.UInt32) < 20 )         .select( 'vaccine_name' )         .unique()         .filter(pl.col( 'vaccine_name' ).is_not_null()),         how = 'cross'     )     .join(         person         .group_by([ 'id_x' , 'vaccine_name' ])         .agg(pl.col( 'id_x' ).len().alias( '剂次数' )),         on = [ 'id_x' , 'vaccine_name' ],         how = 'left'     )     .with_columns(pl.col( '剂次数' ).fill_null( 0 ))     .sort([ 'id_x' , 'vaccine_name' ]) )

R 字段加密解密

  library ( tidyverse ) library ( janitor ) library ( hablar ) library ( openssl ) library ( magrittr ) library ( stringi ) # 加密函数 encrypt_column <- function ( data , column_names , key ) {   for ( column_name in column_names ) {     data [[ column_name ]] <- sapply ( data [[ column_name ]] , function ( x ) {       if ( is.na ( x ) || x == "" ) {         return ( NA )       }       tryCatch ({         x_utf8 <- stri_encode ( as.character ( x ) , "" , "UTF-8" )         iv <- rand_bytes ( 16 )         encrypted <- aes_cbc_encrypt ( charToRaw ( x_utf8 ) , key = key , iv = iv )         paste ( openssl :: base64_encode ( iv ) , openssl :: base64_encode ( encrypted ) , sep = ":" )       } , error = function ( e ) {         warning ( paste ( "Error ...