File tree Expand file tree Collapse file tree 1 file changed +15
-1
lines changed Expand file tree Collapse file tree 1 file changed +15
-1
lines changed Original file line number Diff line number Diff line change @@ -206,12 +206,26 @@ orderbydesc = ORDER departments BY department_id DESC;
206
206
207
207
-- Remove the duplicate tuples of a Pig relation
208
208
-- select distinct order_status from orders;
209
+ orders = LOAD 'pig_demo.orders' USING org.apache.hive.hcatalog.pig.HCatLoader();
210
+ orderstatus = FOREACH orders GENERATE order_status;
211
+ grouped = GROUP orderstatus BY order_status;
212
+ orderstatusdistinct = FOREACH grouped {
213
+ odistinct = DISTINCT orderstatus.order_status;
214
+ GENERATE FLATTEN(odistinct);
215
+ };
216
+ DUMP orderstatusdistinct;
217
+
209
218
orders = LOAD 'pig_demo.orders' USING org.apache.hive.hcatalog.pig.HCatLoader();
210
219
orderstatus = FOREACH orders GENERATE order_status;
211
220
orderstatusdistinct = DISTINCT orderstatus;
212
- DUMP orderstatus ;
221
+ DUMP orderstatusdistinct ;
213
222
214
223
-- Specify the number of reduce tasks for a Pig MapReduce job
224
+ orders = LOAD 'pig_demo.orders' USING org.apache.hive.hcatalog.pig.HCatLoader();
225
+ ordersgrouped = GROUP orders BY order_status PARALLEL 2;
226
+ DESCRIBE ordersgrouped
227
+ orderscount = FOREACH ordersgrouped GENERATE group, COUNT(orders) AS cnt;
228
+ DUMP orderscount;
215
229
216
230
-- Join two datasets using Pig
217
231
-- select o.order_date, sum(oi.order_item_subtotal) from orders o join order_items oi
You can’t perform that action at this time.
0 commit comments