import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
s = pd.Series([1, 3, 5, np.nan, 6, 8]) # 一维序列
s
0 1.0 1 3.0 2 5.0 3 NaN 4 6.0 5 8.0 dtype: float64
dates = pd.date_range('20130101', periods=6) # 时间序列
dates
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
'2013-01-05', '2013-01-06'],
dtype='datetime64[ns]', freq='D')
df = pd.DataFrame(np.random.randn(6,4),
index=dates,
columns=list('ABCD')) # 二维数据框
df
| A | B | C | D | |
|---|---|---|---|---|
| 2013-01-01 | 0.642333 | -0.772970 | -0.823404 | 0.495468 |
| 2013-01-02 | 0.185380 | -0.350632 | 0.220256 | -0.082338 |
| 2013-01-03 | -0.437222 | -0.183861 | -0.901640 | 0.518012 |
| 2013-01-04 | 0.108046 | -1.118019 | 1.342771 | -0.902833 |
| 2013-01-05 | 2.197607 | 1.352511 | 1.631912 | 0.098881 |
| 2013-01-06 | 0.421616 | 1.796068 | -0.811890 | 1.328256 |
df2 = pd.DataFrame({'A' : 1.,
'B' : pd.Timestamp('20130102'),
'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
'D' : np.array([3] * 4,dtype='int32'),
'E' : pd.Categorical(["test","train","test","train"]),
'F' : 'foo' })
df2
| A | B | C | D | E | F | |
|---|---|---|---|---|---|---|
| 0 | 1.0 | 2013-01-02 | 1.0 | 3 | test | foo |
| 1 | 1.0 | 2013-01-02 | 1.0 | 3 | train | foo |
| 2 | 1.0 | 2013-01-02 | 1.0 | 3 | test | foo |
| 3 | 1.0 | 2013-01-02 | 1.0 | 3 | train | foo |
df2.dtypes # 字段类型
A float64 B datetime64[ns] C float32 D int32 E category F object dtype: object
df2.head(2) # 前两行
| A | B | C | D | E | F | |
|---|---|---|---|---|---|---|
| 0 | 1.0 | 2013-01-02 | 1.0 | 3 | test | foo |
| 1 | 1.0 | 2013-01-02 | 1.0 | 3 | train | foo |
df2.tail(3) # 后两行
| A | B | C | D | E | F | |
|---|---|---|---|---|---|---|
| 1 | 1.0 | 2013-01-02 | 1.0 | 3 | train | foo |
| 2 | 1.0 | 2013-01-02 | 1.0 | 3 | test | foo |
| 3 | 1.0 | 2013-01-02 | 1.0 | 3 | train | foo |
df.index
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
'2013-01-05', '2013-01-06'],
dtype='datetime64[ns]', freq='D')
df2.index
Int64Index([0, 1, 2, 3], dtype='int64')
df.values
array([[ 0.64233272, -0.77297006, -0.82340364, 0.49546812],
[ 0.18537978, -0.35063155, 0.22025568, -0.08233841],
[-0.4372219 , -0.18386053, -0.9016397 , 0.51801185],
[ 0.10804587, -1.11801911, 1.34277136, -0.90283271],
[ 2.19760726, 1.35251076, 1.6319119 , 0.09888119],
[ 0.42161594, 1.79606758, -0.81189005, 1.32825639]])
df.T # 转置
| 2013-01-01 00:00:00 | 2013-01-02 00:00:00 | 2013-01-03 00:00:00 | 2013-01-04 00:00:00 | 2013-01-05 00:00:00 | 2013-01-06 00:00:00 | |
|---|---|---|---|---|---|---|
| A | 0.642333 | 0.185380 | -0.437222 | 0.108046 | 2.197607 | 0.421616 |
| B | -0.772970 | -0.350632 | -0.183861 | -1.118019 | 1.352511 | 1.796068 |
| C | -0.823404 | 0.220256 | -0.901640 | 1.342771 | 1.631912 | -0.811890 |
| D | 0.495468 | -0.082338 | 0.518012 | -0.902833 | 0.098881 | 1.328256 |
df.sort_values(by='B') # 按某一列排序
| A | B | C | D | |
|---|---|---|---|---|
| 2013-01-04 | 0.108046 | -1.118019 | 1.342771 | -0.902833 |
| 2013-01-01 | 0.642333 | -0.772970 | -0.823404 | 0.495468 |
| 2013-01-02 | 0.185380 | -0.350632 | 0.220256 | -0.082338 |
| 2013-01-03 | -0.437222 | -0.183861 | -0.901640 | 0.518012 |
| 2013-01-05 | 2.197607 | 1.352511 | 1.631912 | 0.098881 |
| 2013-01-06 | 0.421616 | 1.796068 | -0.811890 | 1.328256 |
df['A'] # 投影
2013-01-01 0.642333 2013-01-02 0.185380 2013-01-03 -0.437222 2013-01-04 0.108046 2013-01-05 2.197607 2013-01-06 0.421616 Freq: D, Name: A, dtype: float64
df[0:2] # 按行下标切片
| A | B | C | D | |
|---|---|---|---|---|
| 2013-01-01 | 0.642333 | -0.772970 | -0.823404 | 0.495468 |
| 2013-01-02 | 0.185380 | -0.350632 | 0.220256 | -0.082338 |
df['20130102':'20130104'] # 按索引切片
| A | B | C | D | |
|---|---|---|---|---|
| 2013-01-02 | 0.185380 | -0.350632 | 0.220256 | -0.082338 |
| 2013-01-03 | -0.437222 | -0.183861 | -0.901640 | 0.518012 |
| 2013-01-04 | 0.108046 | -1.118019 | 1.342771 | -0.902833 |
df.loc[:,['A','B']] # 按列切片
| A | B | |
|---|---|---|
| 2013-01-01 | 0.642333 | -0.772970 |
| 2013-01-02 | 0.185380 | -0.350632 |
| 2013-01-03 | -0.437222 | -0.183861 |
| 2013-01-04 | 0.108046 | -1.118019 |
| 2013-01-05 | 2.197607 | 1.352511 |
| 2013-01-06 | 0.421616 | 1.796068 |
df.loc['20130102':'20130104',['A','B']] # 同时按行列切片
| A | B | |
|---|---|---|
| 2013-01-02 | 0.185380 | -0.350632 |
| 2013-01-03 | -0.437222 | -0.183861 |
| 2013-01-04 | 0.108046 | -1.118019 |
df.loc['20130102']['A']
0.18537978096442437
df.iloc[0]['A']
0.6423327151848488
df.iloc[3:5,0:2]
| A | B | |
|---|---|---|
| 2013-01-04 | 0.108046 | -1.118019 |
| 2013-01-05 | 2.197607 | 1.352511 |
df.iloc[[1,2,4],[0,2]]
| A | C | |
|---|---|---|
| 2013-01-02 | 0.185380 | 0.220256 |
| 2013-01-03 | -0.437222 | -0.901640 |
| 2013-01-05 | 2.197607 | 1.631912 |
df[df['A'] > 0] # 按条件进行筛选
| A | B | C | D | |
|---|---|---|---|---|
| 2013-01-01 | 0.642333 | -0.772970 | -0.823404 | 0.495468 |
| 2013-01-02 | 0.185380 | -0.350632 | 0.220256 | -0.082338 |
| 2013-01-04 | 0.108046 | -1.118019 | 1.342771 | -0.902833 |
| 2013-01-05 | 2.197607 | 1.352511 | 1.631912 | 0.098881 |
| 2013-01-06 | 0.421616 | 1.796068 | -0.811890 | 1.328256 |
df.groupby('A').sum() # 聚合
| B | C | D | |
|---|---|---|---|
| A | |||
| -0.437222 | -0.183861 | -0.901640 | 0.518012 |
| 0.108046 | -1.118019 | 1.342771 | -0.902833 |
| 0.185380 | -0.350632 | 0.220256 | -0.082338 |
| 0.421616 | 1.796068 | -0.811890 | 1.328256 |
| 0.642333 | -0.772970 | -0.823404 | 0.495468 |
| 2.197607 | 1.352511 | 1.631912 | 0.098881 |
df.groupby(['A','B']).sum()
| C | D | ||
|---|---|---|---|
| A | B | ||
| -0.437222 | -0.183861 | -0.901640 | 0.518012 |
| 0.108046 | -1.118019 | 1.342771 | -0.902833 |
| 0.185380 | -0.350632 | 0.220256 | -0.082338 |
| 0.421616 | 1.796068 | -0.811890 | 1.328256 |
| 0.642333 | -0.772970 | -0.823404 | 0.495468 |
| 2.197607 | 1.352511 | 1.631912 | 0.098881 |
news = pd.read_csv("news.csv")
news
| ID | score | title | url | |
|---|---|---|---|---|
| 0 | 1 | 1 | Reverse engineering YouTube demonetization al... | https://docs.google.com/document/d/155yNpfR7dG... |
| 1 | 2 | 9 | Joplin A note-taking and to-do app with build... | https://github.com/laurent22/joplin/ |
| 2 | 3 | 4 | Coinbase Ordered to Turn Over Identities of 1... | https://motherboard.vice.com/en_us/article/ywn... |
| 3 | 4 | 8 | Australian uses snack bags as Faraday cage to... | https://arstechnica.com/information-technology... |
| 4 | 5 | 3 | A blog I started on Neural Networks and Proba... | https://jontysinai.github.io |
| 5 | 6 | 2 | It Looks Like Nobel Economics Laureates Don't... | https://www.bloomberg.com/news/articles/2017-1... |
| 6 | 7 | 7 | Seventh RISC-V Workshop: Day Two - LowRISC | http://www.lowrisc.org/blog/2017/11/seventh-ri... |
| 7 | 8 | 6 | China's Art Factories: Van Gogh from the Swea... | http://www.spiegel.de/international/0,1518,433... |
| 8 | 9 | 5 | Judge Tells Uber Lawyer: 'It Looks Like You C... | https://www.nytimes.com/2017/11/29/business/wa... |
| 9 | 10 | 0 | As a solo developer, I decided to offer phone... | http://plumshell.com/2017/11/30/as-a-solo-app-... |
| 10 | 11 | 1 | Why Did ProtonMail Vanish from Google Search ... | http://techcrunch.com/2016/10/27/why-did-proto... |
| 11 | 12 | 6 | Weekly Machine Learning Toolset and Library R... | https://blog.pocketcluster.io/2017/11/30/weekl... |
| 12 | 13 | 8 | 50 Years Ago Jocelyn Bell Discovered Pulsars | https://www.space.com/38912-pulsar-discovery-b... |
| 13 | 14 | 6 | Advent of Code 2017 | https://adventofcode.com/2017 |
| 14 | 15 | 9 | SafeButler (YC S17) is hiring employee #2 to ... | item?id=15815913 |
| 15 | 16 | 4 | BTC addresses whose private keys are from Sha... | https://twitter.com/4Dgifts/status/93622348798... |
| 16 | 17 | 5 | Review and Teardown of a Cheap GPS Jammer | http://phasenoise.livejournal.com/2017/11/3185... |
| 17 | 18 | 4 | Coinbase Obtains Partial Victory Over IRS | https://blog.coinbase.com/coinbase-obtains-par... |
| 18 | 19 | 2 | How to Profit from Bitcoin Bubble | http://www.danielwilczynski.com/2017/11/29/bit... |
| 19 | 20 | 7 | Linux Vendor Firmware Service | https://fwupd.org/ |
| 20 | 21 | 8 | Building mindshare in a company | http://www.writethedocs.org/guide/writing/mind... |
| 21 | 22 | 2 | How Cubism Protected Warships in WorldWar I | https://www.wired.com/story/dazzle-camouflage-... |
| 22 | 23 | 3 | The Model Book of Calligraphy (1561-1596) | http://publicdomainreview.org/collections/the-... |
| 23 | 24 | 4 | How do you move out of a smarthome? | https://shkspr.mobi/blog/2017/11/how-do-you-mo... |
| 24 | 25 | 7 | Software Giant Autodesk to Axe 13% of Global ... | http://www.animationmagazine.net/people/softwa... |
| 25 | 26 | 8 | AWS EC2 Virtualization 2017: Including Nitro | http://www.brendangregg.com/blog/2017-11-29/aw... |
| 26 | 27 | 6 | Google faces UK legal action for bypassing iP... | http://www.bbc.co.uk/news/technology-42166089 |
| 27 | 28 | 3 | OptionPlan, an app for founders looking to de... | https://www.indexventures.com/optionplan |
| 28 | 29 | 9 | The Peculiarly Quiet Decline and Fall of the KVM | http://www.loper-os.org/?p=1927 |
| 29 | 30 | 1 | Nvidia has confirmed a driver bug resulting i... | https://www.gamingonlinux.com/articles/nvidia-... |