pandas 操作

# -*- coding: utf-8 -*-
import pandas as pd
data = {'Country': ['Belgium', 'India', 'Brazil'],
           'Capital': ['Brussels', 'New Delhi', 'Brasília'],
           'Population': [11190846, 1303171035, 207847528]}
df = pd.DataFrame(data, columns=['Country', 'Capital', 'Population'])

s = pd.Series([3, -5, 7, 4], index=['a', 'b', 'c', 'd'])

df['Country']

#By Position
# Select single value by row & column
df.iloc[0,0]
df.iat[0,0]

#By Label
#Select single value by row &  column labels
df.loc[0,'Country']
df.at[0, 'Country']

# By Label/Position
# Select single row of subset of rows
df.ix[2]
#Select a single column of  subset of columns
df.ix[:,'Capital']
# Select rows and columns
df.ix[1,'Capital']

#Boolean Indexing
# Series s   where value is not >1
s[~(s > 1)]
#s where value is <-1 or >2
s[(s < -1) | (s > 2)]
# Use filter to adjust DataFrame
df[df['Population']>1200000000]

#Dropping
#Drop values from rows (axis=0)
s.drop(['a', 'c'])
#Drop values from columns(axis=1)
df.drop('Country', axis=1)

#Sort & Rank
# Sort by row or column index
df.sort_index(by='Country')
# Sort a series by its values
s.order()
#Assign ranks to entries
df.rank()

#Retrieving Series/DataFrame Information
#(rows,columns)
df.shape
# Describe index
df.index
# Describe DataFrame columns
df.columns
#Info on DataFrame
df.info()
# Number of non-NA values
 df.count()

# Sum of values
df.sum()
# Cummulative sum of values
df.cumsum()
# Minimum/maximum values
df.min()/df.max()
#Minimum/Maximum index value
df.idxmax
df.idxmin
#Summary statistics
df.describe()
# Mean of values
df.mean()
#Median of values
df.median()

#Applying Functions
f = lambda x: x*2
df.apply(f)
df.applymap(f)

#Internal Data Alignment
s3 = pd.Series([7, -2, 3], index=['a', 'c', 'd'])
s + s3

#Arithmetic Operations with Fill Methods
s.add(s3, fill_value=0)
s.sub(s3, fill_value=2)
s.div(s3, fill_value=4)
s.mul(s3, fill_value=3)

#Read and Write to CSV
pd.read_csv('file.csv', header=None, nrows=5)
pd.to_csv('myDataFrame.csv')

#Read and Write to Excel
pd.read_excel('file.xlsx')
pd.to_excel('dir/myDataFrame.xlsx', sheet_name='Sheet1')
##Read multiple sheets from the same file
xlsx = pd.ExcelFile('file.xls')
df = pd.read_excel(xlsx, 'Sheet1')

#Read and Write to SQL Query or Database Table
from sqlalchemy import create_engine
engine = create_engine('sqlite:///:memory:')
pd.read_sql("SELECT * FROM my_table;", engine)
pd.read_sql_table('my_table', engine)
pd.read_sql_query("SELECT * FROM my_table;", engine)
pd.to_sql('myDf', engine)

评论

此博客中的热门博文

V2ray websocket(ws)+tls+nginx分流

Rstudio 使用代理