Skip to content

Commit 760f478

Browse files
Added setting number of reduce tasks
1 parent de92b71 commit 760f478

File tree

1 file changed

+15
-1
lines changed

1 file changed

+15
-1
lines changed

hadoop/edw/hdp/pig/pig_demo.txt

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,12 +206,26 @@ orderbydesc = ORDER departments BY department_id DESC;
206206

207207
-- Remove the duplicate tuples of a Pig relation
208208
-- select distinct order_status from orders;
209+
orders = LOAD 'pig_demo.orders' USING org.apache.hive.hcatalog.pig.HCatLoader();
210+
orderstatus = FOREACH orders GENERATE order_status;
211+
grouped = GROUP orderstatus BY order_status;
212+
orderstatusdistinct = FOREACH grouped {
213+
odistinct = DISTINCT orderstatus.order_status;
214+
GENERATE FLATTEN(odistinct);
215+
};
216+
DUMP orderstatusdistinct;
217+
209218
orders = LOAD 'pig_demo.orders' USING org.apache.hive.hcatalog.pig.HCatLoader();
210219
orderstatus = FOREACH orders GENERATE order_status;
211220
orderstatusdistinct = DISTINCT orderstatus;
212-
DUMP orderstatus;
221+
DUMP orderstatusdistinct;
213222

214223
-- Specify the number of reduce tasks for a Pig MapReduce job
224+
orders = LOAD 'pig_demo.orders' USING org.apache.hive.hcatalog.pig.HCatLoader();
225+
ordersgrouped = GROUP orders BY order_status PARALLEL 2;
226+
DESCRIBE ordersgrouped
227+
orderscount = FOREACH ordersgrouped GENERATE group, COUNT(orders) AS cnt;
228+
DUMP orderscount;
215229

216230
-- Join two datasets using Pig
217231
-- select o.order_date, sum(oi.order_item_subtotal) from orders o join order_items oi

0 commit comments

Comments
 (0)