Skip to content

Commit 6565afa

Browse files
committed
Pandas_compare_columns_in_two_Dataframes
1 parent 1e5744d commit 6565afa

File tree

1 file changed

+327
-5
lines changed

1 file changed

+327
-5
lines changed

notebooks/python/Files/How_to_merge_multiple_CSV_files_with_Python.ipynb

Lines changed: 327 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,245 @@
1313
"* Bonus: Merge multiple files with Windows/Linux"
1414
]
1515
},
16+
{
17+
"cell_type": "code",
18+
"execution_count": 2,
19+
"metadata": {},
20+
"outputs": [
21+
{
22+
"data": {
23+
"text/plain": [
24+
"['../../csv/data_202001.csv',\n",
25+
" '../../csv/data_202002.csv',\n",
26+
" '../../csv/data_201902.csv',\n",
27+
" '../../csv/data_201901.csv']"
28+
]
29+
},
30+
"metadata": {},
31+
"output_type": "display_data"
32+
},
33+
{
34+
"data": {
35+
"text/html": [
36+
"<div>\n",
37+
"<style scoped>\n",
38+
" .dataframe tbody tr th:only-of-type {\n",
39+
" vertical-align: middle;\n",
40+
" }\n",
41+
"\n",
42+
" .dataframe tbody tr th {\n",
43+
" vertical-align: top;\n",
44+
" }\n",
45+
"\n",
46+
" .dataframe thead th {\n",
47+
" text-align: right;\n",
48+
" }\n",
49+
"</style>\n",
50+
"<table border=\"1\" class=\"dataframe\">\n",
51+
" <thead>\n",
52+
" <tr style=\"text-align: right;\">\n",
53+
" <th></th>\n",
54+
" <th>col1</th>\n",
55+
" <th>col2</th>\n",
56+
" <th>col3</th>\n",
57+
" <th>col4</th>\n",
58+
" </tr>\n",
59+
" </thead>\n",
60+
" <tbody>\n",
61+
" <tr>\n",
62+
" <th>0</th>\n",
63+
" <td>E</td>\n",
64+
" <td>F</td>\n",
65+
" <td>5</td>\n",
66+
" <td>e5</td>\n",
67+
" </tr>\n",
68+
" <tr>\n",
69+
" <th>1</th>\n",
70+
" <td>EE</td>\n",
71+
" <td>FF</td>\n",
72+
" <td>6</td>\n",
73+
" <td>ee6</td>\n",
74+
" </tr>\n",
75+
" </tbody>\n",
76+
"</table>\n",
77+
"</div>"
78+
],
79+
"text/plain": [
80+
" col1 col2 col3 col4\n",
81+
"0 E F 5 e5\n",
82+
"1 EE FF 6 ee6"
83+
]
84+
},
85+
"metadata": {},
86+
"output_type": "display_data"
87+
},
88+
{
89+
"data": {
90+
"text/html": [
91+
"<div>\n",
92+
"<style scoped>\n",
93+
" .dataframe tbody tr th:only-of-type {\n",
94+
" vertical-align: middle;\n",
95+
" }\n",
96+
"\n",
97+
" .dataframe tbody tr th {\n",
98+
" vertical-align: top;\n",
99+
" }\n",
100+
"\n",
101+
" .dataframe thead th {\n",
102+
" text-align: right;\n",
103+
" }\n",
104+
"</style>\n",
105+
"<table border=\"1\" class=\"dataframe\">\n",
106+
" <thead>\n",
107+
" <tr style=\"text-align: right;\">\n",
108+
" <th></th>\n",
109+
" <th>col1</th>\n",
110+
" <th>col2</th>\n",
111+
" <th>col3</th>\n",
112+
" <th>col5</th>\n",
113+
" </tr>\n",
114+
" </thead>\n",
115+
" <tbody>\n",
116+
" <tr>\n",
117+
" <th>0</th>\n",
118+
" <td>H</td>\n",
119+
" <td>J</td>\n",
120+
" <td>7</td>\n",
121+
" <td>77</td>\n",
122+
" </tr>\n",
123+
" <tr>\n",
124+
" <th>1</th>\n",
125+
" <td>HH</td>\n",
126+
" <td>JJ</td>\n",
127+
" <td>8</td>\n",
128+
" <td>88</td>\n",
129+
" </tr>\n",
130+
" </tbody>\n",
131+
"</table>\n",
132+
"</div>"
133+
],
134+
"text/plain": [
135+
" col1 col2 col3 col5\n",
136+
"0 H J 7 77\n",
137+
"1 HH JJ 8 88"
138+
]
139+
},
140+
"metadata": {},
141+
"output_type": "display_data"
142+
},
143+
{
144+
"data": {
145+
"text/html": [
146+
"<div>\n",
147+
"<style scoped>\n",
148+
" .dataframe tbody tr th:only-of-type {\n",
149+
" vertical-align: middle;\n",
150+
" }\n",
151+
"\n",
152+
" .dataframe tbody tr th {\n",
153+
" vertical-align: top;\n",
154+
" }\n",
155+
"\n",
156+
" .dataframe thead th {\n",
157+
" text-align: right;\n",
158+
" }\n",
159+
"</style>\n",
160+
"<table border=\"1\" class=\"dataframe\">\n",
161+
" <thead>\n",
162+
" <tr style=\"text-align: right;\">\n",
163+
" <th></th>\n",
164+
" <th>col1</th>\n",
165+
" <th>col2</th>\n",
166+
" <th>col3</th>\n",
167+
" </tr>\n",
168+
" </thead>\n",
169+
" <tbody>\n",
170+
" <tr>\n",
171+
" <th>0</th>\n",
172+
" <td>C</td>\n",
173+
" <td>D</td>\n",
174+
" <td>3</td>\n",
175+
" </tr>\n",
176+
" <tr>\n",
177+
" <th>1</th>\n",
178+
" <td>CC</td>\n",
179+
" <td>DD</td>\n",
180+
" <td>4</td>\n",
181+
" </tr>\n",
182+
" </tbody>\n",
183+
"</table>\n",
184+
"</div>"
185+
],
186+
"text/plain": [
187+
" col1 col2 col3\n",
188+
"0 C D 3\n",
189+
"1 CC DD 4"
190+
]
191+
},
192+
"metadata": {},
193+
"output_type": "display_data"
194+
},
195+
{
196+
"data": {
197+
"text/html": [
198+
"<div>\n",
199+
"<style scoped>\n",
200+
" .dataframe tbody tr th:only-of-type {\n",
201+
" vertical-align: middle;\n",
202+
" }\n",
203+
"\n",
204+
" .dataframe tbody tr th {\n",
205+
" vertical-align: top;\n",
206+
" }\n",
207+
"\n",
208+
" .dataframe thead th {\n",
209+
" text-align: right;\n",
210+
" }\n",
211+
"</style>\n",
212+
"<table border=\"1\" class=\"dataframe\">\n",
213+
" <thead>\n",
214+
" <tr style=\"text-align: right;\">\n",
215+
" <th></th>\n",
216+
" <th>col1</th>\n",
217+
" <th>col2</th>\n",
218+
" <th>col3</th>\n",
219+
" </tr>\n",
220+
" </thead>\n",
221+
" <tbody>\n",
222+
" <tr>\n",
223+
" <th>0</th>\n",
224+
" <td>A</td>\n",
225+
" <td>B</td>\n",
226+
" <td>1</td>\n",
227+
" </tr>\n",
228+
" <tr>\n",
229+
" <th>1</th>\n",
230+
" <td>AA</td>\n",
231+
" <td>BB</td>\n",
232+
" <td>2</td>\n",
233+
" </tr>\n",
234+
" </tbody>\n",
235+
"</table>\n",
236+
"</div>"
237+
],
238+
"text/plain": [
239+
" col1 col2 col3\n",
240+
"0 A B 1\n",
241+
"1 AA BB 2"
242+
]
243+
},
244+
"metadata": {},
245+
"output_type": "display_data"
246+
}
247+
],
248+
"source": [
249+
"all_files = glob.glob(os.path.join(path, \"data_*.csv\"))\n",
250+
"display(all_files)\n",
251+
"for f in all_files:\n",
252+
" display(pd.read_csv(f, sep=','))"
253+
]
254+
},
16255
{
17256
"cell_type": "markdown",
18257
"metadata": {},
@@ -22,14 +261,15 @@
22261
},
23262
{
24263
"cell_type": "code",
25-
"execution_count": 1,
264+
"execution_count": 3,
26265
"metadata": {},
27266
"outputs": [],
28267
"source": [
29268
"import os, glob\n",
30269
"import pandas as pd\n",
31270
"\n",
32271
"path = \"../../csv/\"\n",
272+
"#path = \"/home/user/data\"\n",
33273
"\n",
34274
"all_files = glob.glob(os.path.join(path, \"data_2019*.csv\"))\n",
35275
"\n",
@@ -38,6 +278,88 @@
38278
"df_merged.to_csv( \"merged.csv\")"
39279
]
40280
},
281+
{
282+
"cell_type": "code",
283+
"execution_count": 4,
284+
"metadata": {},
285+
"outputs": [
286+
{
287+
"data": {
288+
"text/html": [
289+
"<div>\n",
290+
"<style scoped>\n",
291+
" .dataframe tbody tr th:only-of-type {\n",
292+
" vertical-align: middle;\n",
293+
" }\n",
294+
"\n",
295+
" .dataframe tbody tr th {\n",
296+
" vertical-align: top;\n",
297+
" }\n",
298+
"\n",
299+
" .dataframe thead th {\n",
300+
" text-align: right;\n",
301+
" }\n",
302+
"</style>\n",
303+
"<table border=\"1\" class=\"dataframe\">\n",
304+
" <thead>\n",
305+
" <tr style=\"text-align: right;\">\n",
306+
" <th></th>\n",
307+
" <th>Unnamed: 0</th>\n",
308+
" <th>col1</th>\n",
309+
" <th>col2</th>\n",
310+
" <th>col3</th>\n",
311+
" </tr>\n",
312+
" </thead>\n",
313+
" <tbody>\n",
314+
" <tr>\n",
315+
" <th>0</th>\n",
316+
" <td>0</td>\n",
317+
" <td>C</td>\n",
318+
" <td>D</td>\n",
319+
" <td>3</td>\n",
320+
" </tr>\n",
321+
" <tr>\n",
322+
" <th>1</th>\n",
323+
" <td>1</td>\n",
324+
" <td>CC</td>\n",
325+
" <td>DD</td>\n",
326+
" <td>4</td>\n",
327+
" </tr>\n",
328+
" <tr>\n",
329+
" <th>2</th>\n",
330+
" <td>2</td>\n",
331+
" <td>A</td>\n",
332+
" <td>B</td>\n",
333+
" <td>1</td>\n",
334+
" </tr>\n",
335+
" <tr>\n",
336+
" <th>3</th>\n",
337+
" <td>3</td>\n",
338+
" <td>AA</td>\n",
339+
" <td>BB</td>\n",
340+
" <td>2</td>\n",
341+
" </tr>\n",
342+
" </tbody>\n",
343+
"</table>\n",
344+
"</div>"
345+
],
346+
"text/plain": [
347+
" Unnamed: 0 col1 col2 col3\n",
348+
"0 0 C D 3\n",
349+
"1 1 CC DD 4\n",
350+
"2 2 A B 1\n",
351+
"3 3 AA BB 2"
352+
]
353+
},
354+
"execution_count": 4,
355+
"metadata": {},
356+
"output_type": "execute_result"
357+
}
358+
],
359+
"source": [
360+
"pd.read_csv('merged.csv')"
361+
]
362+
},
41363
{
42364
"cell_type": "markdown",
43365
"metadata": {},
@@ -47,7 +369,7 @@
47369
},
48370
{
49371
"cell_type": "code",
50-
"execution_count": 2,
372+
"execution_count": 5,
51373
"metadata": {},
52374
"outputs": [
53375
{
@@ -118,7 +440,7 @@
118440
"3 AA BB 2 data_201901.csv"
119441
]
120442
},
121-
"execution_count": 2,
443+
"execution_count": 5,
122444
"metadata": {},
123445
"output_type": "execute_result"
124446
}
@@ -150,7 +472,7 @@
150472
},
151473
{
152474
"cell_type": "code",
153-
"execution_count": 7,
475+
"execution_count": 6,
154476
"metadata": {},
155477
"outputs": [
156478
{
@@ -271,7 +593,7 @@
271593
"7 AA BB 2 NaN NaN data_201901.csv"
272594
]
273595
},
274-
"execution_count": 7,
596+
"execution_count": 6,
275597
"metadata": {},
276598
"output_type": "execute_result"
277599
}

0 commit comments

Comments
 (0)