import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
s = pd.Series([1, 3, 5, np.nan, 6, 8]) # 一维序列
s
0 1.0 1 3.0 2 5.0 3 NaN 4 6.0 5 8.0 dtype: float64
dates = pd.date_range('20130101', periods=6) # 时间序列
dates
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', '2013-01-05', '2013-01-06'], dtype='datetime64[ns]', freq='D')
df = pd.DataFrame(np.random.randn(6,4),
index=dates,
columns=list('ABCD')) # 二维数据框
df
A | B | C | D | |
---|---|---|---|---|
2013-01-01 | 0.642333 | -0.772970 | -0.823404 | 0.495468 |
2013-01-02 | 0.185380 | -0.350632 | 0.220256 | -0.082338 |
2013-01-03 | -0.437222 | -0.183861 | -0.901640 | 0.518012 |
2013-01-04 | 0.108046 | -1.118019 | 1.342771 | -0.902833 |
2013-01-05 | 2.197607 | 1.352511 | 1.631912 | 0.098881 |
2013-01-06 | 0.421616 | 1.796068 | -0.811890 | 1.328256 |
df2 = pd.DataFrame({'A' : 1.,
'B' : pd.Timestamp('20130102'),
'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
'D' : np.array([3] * 4,dtype='int32'),
'E' : pd.Categorical(["test","train","test","train"]),
'F' : 'foo' })
df2
A | B | C | D | E | F | |
---|---|---|---|---|---|---|
0 | 1.0 | 2013-01-02 | 1.0 | 3 | test | foo |
1 | 1.0 | 2013-01-02 | 1.0 | 3 | train | foo |
2 | 1.0 | 2013-01-02 | 1.0 | 3 | test | foo |
3 | 1.0 | 2013-01-02 | 1.0 | 3 | train | foo |
df2.dtypes # 字段类型
A float64 B datetime64[ns] C float32 D int32 E category F object dtype: object
df2.head(2) # 前两行
A | B | C | D | E | F | |
---|---|---|---|---|---|---|
0 | 1.0 | 2013-01-02 | 1.0 | 3 | test | foo |
1 | 1.0 | 2013-01-02 | 1.0 | 3 | train | foo |
df2.tail(3) # 后两行
A | B | C | D | E | F | |
---|---|---|---|---|---|---|
1 | 1.0 | 2013-01-02 | 1.0 | 3 | train | foo |
2 | 1.0 | 2013-01-02 | 1.0 | 3 | test | foo |
3 | 1.0 | 2013-01-02 | 1.0 | 3 | train | foo |
df.index
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', '2013-01-05', '2013-01-06'], dtype='datetime64[ns]', freq='D')
df2.index
Int64Index([0, 1, 2, 3], dtype='int64')
df.values
array([[ 0.64233272, -0.77297006, -0.82340364, 0.49546812], [ 0.18537978, -0.35063155, 0.22025568, -0.08233841], [-0.4372219 , -0.18386053, -0.9016397 , 0.51801185], [ 0.10804587, -1.11801911, 1.34277136, -0.90283271], [ 2.19760726, 1.35251076, 1.6319119 , 0.09888119], [ 0.42161594, 1.79606758, -0.81189005, 1.32825639]])
df.T # 转置
2013-01-01 00:00:00 | 2013-01-02 00:00:00 | 2013-01-03 00:00:00 | 2013-01-04 00:00:00 | 2013-01-05 00:00:00 | 2013-01-06 00:00:00 | |
---|---|---|---|---|---|---|
A | 0.642333 | 0.185380 | -0.437222 | 0.108046 | 2.197607 | 0.421616 |
B | -0.772970 | -0.350632 | -0.183861 | -1.118019 | 1.352511 | 1.796068 |
C | -0.823404 | 0.220256 | -0.901640 | 1.342771 | 1.631912 | -0.811890 |
D | 0.495468 | -0.082338 | 0.518012 | -0.902833 | 0.098881 | 1.328256 |
df.sort_values(by='B') # 按某一列排序
A | B | C | D | |
---|---|---|---|---|
2013-01-04 | 0.108046 | -1.118019 | 1.342771 | -0.902833 |
2013-01-01 | 0.642333 | -0.772970 | -0.823404 | 0.495468 |
2013-01-02 | 0.185380 | -0.350632 | 0.220256 | -0.082338 |
2013-01-03 | -0.437222 | -0.183861 | -0.901640 | 0.518012 |
2013-01-05 | 2.197607 | 1.352511 | 1.631912 | 0.098881 |
2013-01-06 | 0.421616 | 1.796068 | -0.811890 | 1.328256 |
df['A'] # 投影
2013-01-01 0.642333 2013-01-02 0.185380 2013-01-03 -0.437222 2013-01-04 0.108046 2013-01-05 2.197607 2013-01-06 0.421616 Freq: D, Name: A, dtype: float64
df[0:2] # 按行下标切片
A | B | C | D | |
---|---|---|---|---|
2013-01-01 | 0.642333 | -0.772970 | -0.823404 | 0.495468 |
2013-01-02 | 0.185380 | -0.350632 | 0.220256 | -0.082338 |
df['20130102':'20130104'] # 按索引切片
A | B | C | D | |
---|---|---|---|---|
2013-01-02 | 0.185380 | -0.350632 | 0.220256 | -0.082338 |
2013-01-03 | -0.437222 | -0.183861 | -0.901640 | 0.518012 |
2013-01-04 | 0.108046 | -1.118019 | 1.342771 | -0.902833 |
df.loc[:,['A','B']] # 按列切片
A | B | |
---|---|---|
2013-01-01 | 0.642333 | -0.772970 |
2013-01-02 | 0.185380 | -0.350632 |
2013-01-03 | -0.437222 | -0.183861 |
2013-01-04 | 0.108046 | -1.118019 |
2013-01-05 | 2.197607 | 1.352511 |
2013-01-06 | 0.421616 | 1.796068 |
df.loc['20130102':'20130104',['A','B']] # 同时按行列切片
A | B | |
---|---|---|
2013-01-02 | 0.185380 | -0.350632 |
2013-01-03 | -0.437222 | -0.183861 |
2013-01-04 | 0.108046 | -1.118019 |
df.loc['20130102']['A']
0.18537978096442437
df.iloc[0]['A']
0.6423327151848488
df.iloc[3:5,0:2]
A | B | |
---|---|---|
2013-01-04 | 0.108046 | -1.118019 |
2013-01-05 | 2.197607 | 1.352511 |
df.iloc[[1,2,4],[0,2]]
A | C | |
---|---|---|
2013-01-02 | 0.185380 | 0.220256 |
2013-01-03 | -0.437222 | -0.901640 |
2013-01-05 | 2.197607 | 1.631912 |
df[df['A'] > 0] # 按条件进行筛选
A | B | C | D | |
---|---|---|---|---|
2013-01-01 | 0.642333 | -0.772970 | -0.823404 | 0.495468 |
2013-01-02 | 0.185380 | -0.350632 | 0.220256 | -0.082338 |
2013-01-04 | 0.108046 | -1.118019 | 1.342771 | -0.902833 |
2013-01-05 | 2.197607 | 1.352511 | 1.631912 | 0.098881 |
2013-01-06 | 0.421616 | 1.796068 | -0.811890 | 1.328256 |
df.groupby('A').sum() # 聚合
B | C | D | |
---|---|---|---|
A | |||
-0.437222 | -0.183861 | -0.901640 | 0.518012 |
0.108046 | -1.118019 | 1.342771 | -0.902833 |
0.185380 | -0.350632 | 0.220256 | -0.082338 |
0.421616 | 1.796068 | -0.811890 | 1.328256 |
0.642333 | -0.772970 | -0.823404 | 0.495468 |
2.197607 | 1.352511 | 1.631912 | 0.098881 |
df.groupby(['A','B']).sum()
C | D | ||
---|---|---|---|
A | B | ||
-0.437222 | -0.183861 | -0.901640 | 0.518012 |
0.108046 | -1.118019 | 1.342771 | -0.902833 |
0.185380 | -0.350632 | 0.220256 | -0.082338 |
0.421616 | 1.796068 | -0.811890 | 1.328256 |
0.642333 | -0.772970 | -0.823404 | 0.495468 |
2.197607 | 1.352511 | 1.631912 | 0.098881 |
news = pd.read_csv("news.csv")
news
ID | score | title | url | |
---|---|---|---|---|
0 | 1 | 1 | Reverse engineering YouTube demonetization al... | https://docs.google.com/document/d/155yNpfR7dG... |
1 | 2 | 9 | Joplin A note-taking and to-do app with build... | https://github.com/laurent22/joplin/ |
2 | 3 | 4 | Coinbase Ordered to Turn Over Identities of 1... | https://motherboard.vice.com/en_us/article/ywn... |
3 | 4 | 8 | Australian uses snack bags as Faraday cage to... | https://arstechnica.com/information-technology... |
4 | 5 | 3 | A blog I started on Neural Networks and Proba... | https://jontysinai.github.io |
5 | 6 | 2 | It Looks Like Nobel Economics Laureates Don't... | https://www.bloomberg.com/news/articles/2017-1... |
6 | 7 | 7 | Seventh RISC-V Workshop: Day Two - LowRISC | http://www.lowrisc.org/blog/2017/11/seventh-ri... |
7 | 8 | 6 | China's Art Factories: Van Gogh from the Swea... | http://www.spiegel.de/international/0,1518,433... |
8 | 9 | 5 | Judge Tells Uber Lawyer: 'It Looks Like You C... | https://www.nytimes.com/2017/11/29/business/wa... |
9 | 10 | 0 | As a solo developer, I decided to offer phone... | http://plumshell.com/2017/11/30/as-a-solo-app-... |
10 | 11 | 1 | Why Did ProtonMail Vanish from Google Search ... | http://techcrunch.com/2016/10/27/why-did-proto... |
11 | 12 | 6 | Weekly Machine Learning Toolset and Library R... | https://blog.pocketcluster.io/2017/11/30/weekl... |
12 | 13 | 8 | 50 Years Ago Jocelyn Bell Discovered Pulsars | https://www.space.com/38912-pulsar-discovery-b... |
13 | 14 | 6 | Advent of Code 2017 | https://adventofcode.com/2017 |
14 | 15 | 9 | SafeButler (YC S17) is hiring employee #2 to ... | item?id=15815913 |
15 | 16 | 4 | BTC addresses whose private keys are from Sha... | https://twitter.com/4Dgifts/status/93622348798... |
16 | 17 | 5 | Review and Teardown of a Cheap GPS Jammer | http://phasenoise.livejournal.com/2017/11/3185... |
17 | 18 | 4 | Coinbase Obtains Partial Victory Over IRS | https://blog.coinbase.com/coinbase-obtains-par... |
18 | 19 | 2 | How to Profit from Bitcoin Bubble | http://www.danielwilczynski.com/2017/11/29/bit... |
19 | 20 | 7 | Linux Vendor Firmware Service | https://fwupd.org/ |
20 | 21 | 8 | Building mindshare in a company | http://www.writethedocs.org/guide/writing/mind... |
21 | 22 | 2 | How Cubism Protected Warships in WorldWar I | https://www.wired.com/story/dazzle-camouflage-... |
22 | 23 | 3 | The Model Book of Calligraphy (1561-1596) | http://publicdomainreview.org/collections/the-... |
23 | 24 | 4 | How do you move out of a smarthome? | https://shkspr.mobi/blog/2017/11/how-do-you-mo... |
24 | 25 | 7 | Software Giant Autodesk to Axe 13% of Global ... | http://www.animationmagazine.net/people/softwa... |
25 | 26 | 8 | AWS EC2 Virtualization 2017: Including Nitro | http://www.brendangregg.com/blog/2017-11-29/aw... |
26 | 27 | 6 | Google faces UK legal action for bypassing iP... | http://www.bbc.co.uk/news/technology-42166089 |
27 | 28 | 3 | OptionPlan, an app for founders looking to de... | https://www.indexventures.com/optionplan |
28 | 29 | 9 | The Peculiarly Quiet Decline and Fall of the KVM | http://www.loper-os.org/?p=1927 |
29 | 30 | 1 | Nvidia has confirmed a driver bug resulting i... | https://www.gamingonlinux.com/articles/nvidia-... |