博文

目前显示的是 2023的博文

wsl 安装输入法

   一、准备  sudo vim /etc/locale.gen 移除这行的注释 "zh_CN.UTF8 UTF8" 下载语言: sudo locale-gen 二、安装 sudo apt install ibus ibus-libpinyin  ibus-gtk* ibus-qt* 三、配置 im-config select "OK" select "Yes" to question "Do you explicitly select the user configuration?" select "ibus" select "OK" select "OK" 四、启用 ibus service dbus-launch ibus-daemon -drx dbus-launch ibus-setup 五、重启 sudo pkill ibus-daemon dbus-launch ibus-daemon -drx 六、自启动  To automate the service Create file /etc/profile.d/ibus.sh, with the following content: export LC_CTYPE="zh_CN.UTF-8" export XIM=ibus export XIM_PROGRAM=/usr/bin/ibus export QT_IM_MODULE=ibus export GTK_IM_MODULE=ibus export XMODIFIERS=@im=ibus export DefaultIMModule=ibus ibus-daemon -drx 七、 移除fcitx, edit /etc/profile.d/fcitx.sh to remove fcitx-autostart and fcitx's variables. sudo rm /etc/profile.d/fcitx*  

Dask示例

 import numpy as np import pandas as pd import dask.dataframe as dd from dask.diagnostics import ProgressBar def windows_to_wsl_path(windows_path):     wsl_prefix = '/mnt/'     path = windows_path.replace(':', '').replace('\\', '/').lower()     wsl_path = wsl_prefix  + path     return wsl_path pv=dd.read_csv(windows_to_wsl_path(r"D:\bookbmstdr-main\PV.csv"),encoding='GB18030',dtype={'GRDA_CODE':str,'GRDA_XM':str,'YM_MC':str,'YM_BM':str,'JZ_ZC':int,'GLDW_MC':str, 'GLDW_BM':str}) pv = pv.map_partitions(lambda df: df.rename(columns={col: col.lower() for col in df.columns})) pv['csrq'] = pv['csrq'].map_partitions(pd.to_datetime, errors='coerce') pv['jz_sj'] = pv['jz_sj'].str.split(' ').str[0] pv['jz_sj']=pv['jz_sj'].map_partitions(pd.to_datetime, format='%Y-%m-%d',errors='coerce') pv['age']=

计算年龄且分组

 df <- read.csv(windows_to_wsl_path('C:\\Users\\xuefeng\\Desktop\\个案.csv'),fileEncoding = "GB18030") %>%    clean_names() %>%    filter(ym_mc=='麻腮风疫苗') df %>%    separate(jz_sj, into = c("jz_sj", "rest"), sep = "\\s+", extra = "drop") %>%    mutate(jz_sj=ymd(jz_sj),csrq=ymd(csrq)) %>%    mutate(age = as.numeric(difftime(jz_sj, csrq, units = "days")/365)) %>%    filter(age<10 & age>0 & jz_zc<=2) %>%    mutate(jz_grp = case_when(     year(jz_sj) == 2018 ~ "2018",     year(jz_sj) == 2019 ~ "2019",     year(jz_sj) == 2020 & month(jz_sj) <= 5 ~ "2020(1-5月)",     year(jz_sj) == 2020 & month(jz_sj) >= 6 ~ "2020(6-12月)",     year(jz_sj) == 2021 ~ "2021",     year(jz_sj) == 2022 ~ "2022",     TRUE ~ "其他")) -> test df=pd.read_csv(windows_to_wsl_path( r "C: \U sers \x uefeng\Desktop \个 案.csv

计算构成比

 gs %>%   hablar::convert(num(辖区人口总数)) %>%    group_by(服务周期) %>%   summarise(辖区人口总数 = sum(辖区人口总数,na.rm = T)) %>%   adorn_percentages('col') %>%    adorn_pct_formatting(digits = 2) %>%    adorn_ns(format_func = function(x) format(x,big.mark = " ", decimal.mark = "."))

倍速后台静音播放

 // ==UserScript== // @name         倍速后台静音播放 // @namespace    http://tampermonkey.net/ // @version      0.1 // @description  try to take over the world! // @author       You // @match        https://lzksj.zgzjzj.com/* // @icon         data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw== // @grant        none // ==/UserScript== (function() {     'use strict';     const setVideoHandler = () => {     setInterval(() => {       try {         var video = document.querySelector("video");         if (!!video) {           video.muted = true;           video.playbackRate = 4;           video.play().catch((err) => {             console.log(666, err);           });         }       }       catch (err) {         console.log(666, `视频播放错误: ${err}`)       }     }, 1000)   }   //下面开始运行脚本   console.log(666, "开始执行脚本")   setVideoHandler(); })();

ubuntu 使用 Viu 在终端 Terminal 显示图片

sudo apt install cargo cargo install viu vim ~/.bashrc export PATH="$HOME/.cargo/bin:$PATH" source ~/.bashrc viu myplot.png

The Plots in Rstudio do not display graphics.

dev.off() options(device = "RStudioGD") plot(1:5,1:5) dev.new() plot(mtcars)

关联分析指标

 arules::interestMeasure() support 前后项同时出现的占比 confidence 发生前项后出现后项的概率 lift 前项和后项一起发生的机率提升多少 count 前后项同时出现的次数 chiSquared 前项和后项是否独立 jaccard 前项和后项的相似度是多少 leverage 前项和后项是否有关联

pandas 计算年龄分组计算非重复计数和年龄小于7岁非重复计数

import pandas as pd import numpy as np import janitor sc = (     pd . read_excel ( "/mnt/c/Users/xuefeng/Downloads/非重卡删除.xlsx" , dtype = { 'SC_GLDW_BM' : 'object' , 'YM_BM' : 'object' }).clean_names() ) (     sc .query( 'ym_mc.str.startswith("新冠")' )     .astype({ 'sc_gldw_bm' : 'string' })     .assign( shi = lambda x : x .sc_gldw_bm.str[ 0 : 4 ])     .groupby( 'shi' )     .agg( count = ( 'shi' , 'count' ))     .reset_index()     .sort_values( 'shi' )     .to_excel( "/mnt/c/Users/xuefeng/Downloads/非重卡删除1.xlsx" ) ) test = (     sc .assign( xian = sc .sc_gldw_bm.str[: 6 ], shi = sc .sc_gldw_bm.str.slice( 0 , 4 ), csrq = sc .zjhm.str[ 6 : 14 ])     .query( "sc_gldw_bm.str.startswith('6211') & csrq.str.len()==8 & csrq.str.slice(0,2) in ('19','20')" )     .assign( age = lambda x :( pd . to_datetime ( x .jz_sj) - pd . to_da

python出生队列接种率

  # -*- coding: utf-8 -*- import math import pandas as pd import numpy as np import janitor shi_bm = pd . DataFrame . from_dict ({ '地区名称' : { 0 : '兰州市' ,           1 : '嘉峪关市' ,           2 : '金昌市' ,           3 : '白银市' ,           4 : '天水市' ,           5 : '武威市' ,           6 : '张掖市' ,           7 : '平凉市' ,           8 : '酒泉市' ,           9 : '庆阳市' ,           10 : '定西市' ,           11 : '陇南市' ,           12 : '临夏回族自治州' ,           13 : '甘南藏族自治州' ,           14 : '兰州新区' },           '地区编码' : { 0 : 6201 ,           1 : 6202 ,           2 : 6203 ,           3 : 6204 ,           4 : 6205 ,           5 : 6206 ,           6 : 6207 ,           7 : 6208 ,           8 : 6209 ,           9 : 6210 ,           10 : 6211 ,           11 : 6212 ,           12 : 6229 ,           13 : 6230 ,           14 : 6232 }} ). astype ({ '地区编码' : 'str' })

openai 代理

  import openai # openai.api_key = "abc" # openai.api_base = "http://172.30.48.1:8085/openai/v1" openai . proxy = {     "http" : "http://172.30.48.1:7890" ,     "https" : "http://172.30.48.1:7890" } openai . api_key = "sk-" #获取模型名称 model_list = [ item [ 'id' ] for item in openai . Model . list ()[ 'data' ]] # response = openai.ChatCompletion.create( #             model="gpt-3.5-turbo", #             messages=[ #                 {"role": "system", "content": "Describe a short paragraph about tamil."}, #             ] #         ) # for solution in response.choices: #     print(solution.message.content) def get_completion ( prompt , model = 'gpt-3.5-turbo' ):     messages = [{ 'role' : "user" , "content" : prompt }]     if model in model_list :         response = openai . ChatCompletion . create (            

pandas 使用np完成条件赋值

import pandas as pd import numpy as np import janitor df = (     pd . read_excel ( r "/mnt/c/users/xuefeng/desktop/rk.xlsx" )     .clean_names()     .astype({ 'xt_rksj' : 'datetime64[ns]' })     .query( "xt_rkjgdm.notnull()" ) ) df = (     df .assign(     gp = np . select (         [             df .sum_x_rksl_ < 10000 ,             ( df .sum_x_rksl_ >= 10000 ) & ( df .sum_x_rksl_ < 20000 ),             df .sum_x_rksl_ >= 20000 ,         ],         [ '低' , '中' , '高' ]         , default = '不详'     ) ) ) (     df     .assign( grp = np . select ([ df .xt_rksj <= '2023-01-17' , df .xt_rksj > '2023-01-17' ],                            [ '前' , '后' ], default = '不详' ))     .groupby([ 'grp' , 'sccj_mc' , 'ym_mc' ], as_index = False )     .agg( s = pd . NamedAgg ( 'sum_x_rksl_' , lambda x : np . sum ( x * df .ymgg_bm)))

pyramid_chart解决中文乱码

library(tidyverse) library(lubridate) library(showtext) library(janitor) library(openxlsx) library(magrittr) library(hablar) library(readxl) library(ggcharts) showtext_auto(enable = TRUE) df <- readxl::read_excel('/mnt/c/users/xuefe/Downloads/grda_fnl_tj.xlsx',skip = 1) %>%    pivot_longer(cols=`0岁`:`100岁`,names_to = 'age',values_to = 'count') %>%    filter(单位=='合计' & age!='0岁')  %>%    mutate(age=str_remove_all(age,'岁')) %>%    convert(num(age)) df_m <- df %>%    mutate(age_group=cut(age, breaks =seq(0,110,by=3),include.lowest = F,right=T)) %>%    group_by(age_group) %>%    summarise(sumn=sum(count,na.rm = T)) %>%    mutate(sex='男性') # right=T 左开右闭 # rihht=F 左闭右开 p_1 <- ggplot(data = df_m) +   geom_bar(aes(age_group,sumn), fill = 'skyblue', stat="identity", position="dodge")+   xlab('年龄组') +   ylab('男性人口数')+   coord_flip()  df <- readxl::read_exce

Quarto使用docx支持中文

 --- title: "Test" format: docx editor: visual --- pdf需要指定中文字体,使用fc-list :lang=zh查看已安装的字体。 可以使用 --- title: "test" format: pdf editor: visual documentclass: scrreprt pdf-engine: xelatex CJKmainfont: WenQuanYi Micro Hei --- 这种方法因为仅指定一种字体, 效果不太美观。可以更换 其他方式 --- title: "测试" author: "李东风" date: "2023-04-26" lang: zh format:   html:     toc: true     toc-location: body     toc-depth: 3     number-sections: true     html-math-method: katex   docx:     toc: true     toc-depth: 3     number-sections: true   pdf:     documentclass: article     toc: true     toc-depth: 3     include-in-header:        text: |         \usepackage{ctex}         \usepackage{amsthm,mathrsfs} ---

在 Pandas DataFrame 中增加总计行(列)

  def add_total ( df , axis = 0 ):     """     在 Pandas DataFrame 中增加总计行(列)     参数:         df (pandas.DataFrame):需要增加总计行(列)的 DataFrame。         axis (int, optional): 0 表示对列进行求和,增加总计行;1 表示对行进行求和,增加总计列。默认为0。     抛出:         ValueError:如果 axis 参数不为 0 或 1,则报错。     返回:         pandas.DataFrame:增加了总计行(列)的新 DataFrame。     """     df = df .copy()     if axis == 0 :         df .loc[ 'Total' ] = df .sum( axis = 0 , numeric_only = True )     elif axis == 1 :         df [ 'Total' ] = df .sum( axis = 1 , numeric_only = True )     else :         raise ValueError ( 'axis parameter should be 0 or 1' )     return df # 0 增加1行对列进行求和,增加1 列对行进行求和 df = (     df . pipe ( add_total , axis = 1 ) )

pandas 合并列拆分列

  import pandas as pd import re df = (       pd . read_excel ( "/mnt/c/users/xuefe/Downloads/就诊统计.xlsx" , sheet_name = 3 )       . drop ([ 'Unnamed: 0' , 'Unnamed: 4' ], axis = 1 )       ) df [[ 'A' , 'B' , 'C' ]] = df [ '分类诊断名称' ]. str . split ( pat = '[;| |/|;|,|,|!|、|-|\.|?]' ,                                             n = 2 , expand = True ) # n=1 表示分割成2列,n=2 表示分割成3列。 # 当expand=True时,str.split()方法返回DataFrame,并将结果自动拆分为若干列作为新列。 # 当expand=False时,str.split()方法返回一个Series对象,每个元素是一个列表,其中的内容取决于拆分的结果。 df [[ 'A' , 'B' , 'C' ]] = df [ '分类诊断名称' ]. str . split ( pat = '[;| |/|;|,|,|!|、|-|\.|?]' ,                                             n = 2 , expand = True ) df = ( pd . read_excel ( "/mnt/c/users/xuefe/Downloads/就诊统计.xlsx" , sheet_name = 3 ) . drop ([ 'Unnamed: 0' , 'Unnamed: 4' ], axis = 1 ) . assign ( A = lambda x : x [ '分类诊断名称' ].str.sp

xx-net chatgpt 使用

import pandas as pd import numpy as np import openai import re import time # 查看宿主机IP # cat /etc/resolv.conf # 测试本地端口是否打开 # 配置中允许远程访问 # tcping 172.20.80.1 8085 # 防火墙新增8085端口的入站规则 # 认证API密钥 with open( '/etc/resolv.conf' ) as file:     resolv_conf_data = file.read() ip_regex = r '\b(?:\d {1,3} \.) {3} \d {1,3} \b' host_ip=re.search(ip_regex, resolv_conf_data).group( 0 ) openai.api_key = "abc" # openai.api_base = "http://172.20.80.1:8085/openai/v1" openai.api_base = f "http:// {host_ip} :8085/openai/v1" completion = openai.ChatCompletion.create(     model= "gpt-3.5-turbo" ,     messages=[{ "role" : "user" , "content" : "感冒最可能的ICD10的诊断编码" }]) print(completion.choices[ 0 ].message.content)