1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
| #!/bin/bash
deleteDate=`date -d "1 day ago" +%Y%m%d`
for ((i=1; i<=11; i++))
do
dorisDate=`date -d "$i day ago" +%Y-%m-%d`
hiveDate=`date -d "$i day ago" +%Y%m%d` /opt/cloudera/parcels/CDH/bin/sqoop-import \ --connect "jdbc:mysql://10.0.15.131:9030/dwd_homedo_ubt" \ --username root \ --password "root" \ --target-dir /opt/user/HomedoDB/warehouse/ods_cdp/mid_event_vd/d=$hiveDate \ --query "select distinct_id, xwho, xwhen, xwhat, os, utm_campaign_id, utm_campaign, utm_medium, dict_utm_medium, utm_source, dict_utm_source, utm_content, utm_term, channel, referrer, referrer_domain, traffic_source_type, dict_traffic_source_type, search_engine, dict_search_engine, search_keyword, social_share_from, scene, dict_scene, scene_type, dict_scene_type, platform, dict_platform, session_id, url, social_media, dict_social_media, url_domain, track_xwhen, fingerprint, campaign_shortlink, offset, xwhat_id, need_update, network, os_version, is_first_day, is_from_background, model, screen_width, is_first_time, brand, ip, screen_height, language, app_version, lib, is_time_calibrated, is_login, lib_version, platform_extra, time_zone, manufacturer, country, province, city, title, duration, element_type, element_path, start_source, element_content, element_id, carrier_name, element_position, parent_url, web_crawler, user_agent, browser_version, device_type, dict_device_type, browser, startup_time, original_id, url_path, element_class_name, element_target_url, btn_name, type, content_type, nav_name, \`desc\`, is_success, method, user_id, source_position_name, source_module, rank, name, module_name, link_page_url, ad_name, ad_id, location, element_name, third_category, news_id, news_name, event_name, second_category, first_category, price, item_id, item_name, item_count, keyword_type, min_buy_unit, review_num, list_price, event_id, reason, brand_id, shop_name, shop_id, brand_name, video_name, video_id, is_filter, item_code, item_specific_module, shop_num, recommend_item_num, page_num, filter_detail, filter_type, crash_data, string_price, second_category_id, stock_status_pro, first_category_id, stock_status, shelf_status, item_u8_code, string_price_pro, third_category_id, min_buy_num, string_stp_price2, string_stp_price1, string_stp_price3, string_stp_num3, string_stp_num2, string_stp_num1, string_stp_price4, string_stp_num4, string_stp_price5, string_stp_num5, pic_num, integral_use_amount, shipping_cost, orgcode, credit_period_type, is_wait_deliver, insert_time, actual_pay_amount, coupon_freight_use_amount, is_negotiation, package_use_amount, discount_use_amount, channel_type, order_id, category_code, payment_time, is_status, payment_method, per_pay_amount, is_first_order, status, smb_pay_time, order_amount, confirm_time, action, viewport_position, viewport_width, viewport_height, event_duration, deal_amount, live_name, live_id, debug, news_channel, news_subject, radio_num, plan_name, plan_id, click_content_rank, activity_tag, string_list_price, is_delete, device_id, oaid, imei, android_id, element_clickable, click_x, click_y, page_height, element_y, element_x, page_width, push_title, activityid, activityname, activitytype, virtualurl, prop_0, prop_1, prop_2, prop_3, prop_4, dict_platform_extra, page_name, sms_channel, message_id, ea_cohort_code, ad_space_type, ad_space_name, grade1, ad_space_id, dese, item_type, package_num, package_id, pagestaytime, is_ad_sku, sku_rank, package_name, url_domain2, search_scene, share_source_id, static_type from dwd_homedo_ubt.dwd_log_event where date_d = '$dorisDate' and \$CONDITIONS" \ --delete-target-dir \ --fields-terminated-by '\001' \ --hive-delims-replacement '' \ --null-string '\\N' \ --null-non-string '\\N' \ --m 1 hive -e "set hive.exec.dynamic.partition.mode=nonstrict; set hive.execution.engine=mr; msck repair table ods_cdp.mid_event_vd; insert overwrite table ods_cdp.event_vd partition(d) select distinct_id, xwho, xwhen, xwhat, os, utm_campaign_id, utm_campaign, utm_medium, dict_utm_medium, utm_source, dict_utm_source, utm_content, utm_term, channel, referrer, referrer_domain, traffic_source_type, dict_traffic_source_type, search_engine, dict_search_engine, search_keyword, social_share_from, scene, dict_scene, scene_type, dict_scene_type, platform, dict_platform, session_id, url, social_media, dict_social_media, url_domain, track_xwhen, fingerprint, campaign_shortlink, offset, xwhat_id, need_update, network, os_version, is_first_day, is_from_background, model, screen_width, is_first_time, brand, ip, screen_height, language, app_version, lib, is_time_calibrated, is_login, lib_version, platform_extra, time_zone, manufacturer, country, province, city, title, duration, element_type, element_path, start_source, element_content, element_id, carrier_name, element_position, parent_url, web_crawler, user_agent, browser_version, device_type, dict_device_type, browser, startup_time, original_id, url_path, element_class_name, element_target_url, btn_name, type, content_type, nav_name, desc, is_success, method, user_id, source_position_name, source_module, rank, name, module_name, link_page_url, ad_name, ad_id, location, element_name, third_category, news_id, news_name, event_name, second_category, first_category, price, item_id, item_name, item_count, keyword_type, min_buy_unit, review_num, list_price, event_id, reason, brand_id, shop_name, shop_id, brand_name, video_name, video_id, is_filter, item_code, item_specific_module, shop_num, recommend_item_num, page_num, filter_detail, filter_type, crash_data, string_price, second_category_id, stock_status_pro, first_category_id, stock_status, shelf_status, item_u8_code, string_price_pro, third_category_id, min_buy_num, string_stp_price2, string_stp_price1, string_stp_price3, string_stp_num3, string_stp_num2, string_stp_num1, string_stp_price4, string_stp_num4, string_stp_price5, string_stp_num5, pic_num, integral_use_amount, shipping_cost, orgcode, credit_period_type, is_wait_deliver, insert_time, actual_pay_amount, coupon_freight_use_amount, is_negotiation, package_use_amount, discount_use_amount, channel_type, order_id, category_code, payment_time, is_status, payment_method, per_pay_amount, is_first_order, status, smb_pay_time, order_amount, confirm_time, action, viewport_position, viewport_width, viewport_height, event_duration, deal_amount, live_name, live_id, debug, news_channel, news_subject, radio_num, plan_name, plan_id, click_content_rank, activity_tag, string_list_price, is_delete, device_id, oaid, imei, android_id, element_clickable, click_x, click_y, page_height, element_y, element_x, page_width, push_title, activityid, activityname, activitytype, virtualurl, prop_0, prop_1, prop_2, prop_3, prop_4, dict_platform_extra, page_name, sms_channel, message_id, ea_cohort_code, ad_space_type, ad_space_name, grade1, ad_space_id, dese, item_type, package_num, package_id, pagestaytime, is_ad_sku, sku_rank, package_name, url_domain2, search_scene, share_source_id, static_type, d from ods_cdp.mid_event_vd where d = '$hiveDate';"
done
hive -e "alter table ods_cdp.mid_event_vd drop partition (d < '$deleteDate');"
|