{"id":209,"date":"2020-10-22T06:17:29","date_gmt":"2020-10-21T21:17:29","guid":{"rendered":"https:\/\/www.ritzcolor.net\/?p=209"},"modified":"2020-12-10T20:09:23","modified_gmt":"2020-12-10T11:09:23","slug":"%e3%81%a9%e7%b4%a0%e4%ba%ba%e3%81%aekaggle5-3rd-try","status":"publish","type":"post","link":"https:\/\/www.ritzcolor.net\/?p=209","title":{"rendered":"\u3069\u7d20\u4eba\u306ekaggle5 -3rd try-"},"content":{"rendered":"\n<p>\u65e9\u304f\u30823\u56de\u76ee\u306e\u63d0\u51fa\u3068\u306a\u308a\u307e\u3057\u305f\u3002\u307e\u305a\u306f\u524d\u56de\u307e\u3067\u306e\u30b3\u30fc\u30c9\u3092\u4e00\u62ec\u3067\u8a18\u8ff0\u3057\u307e\u3059\u3002train_df\u3001test_df\u306b\u30c7\u30fc\u30bf\u683c\u7d0d\u3059\u308b\u3068\u3053\u308d\u306f\u5272\u611b\u3057\u3066\u307e\u3059\u3002\u5730\u5473\u3067\u3059\u304c\u3001kaggle\u306eNotebook\u3067quick save\u3060\u3051\u3060\u3068output\u304c\u4fdd\u5b58\u3055\u308c\u306a\u3044\u3089\u3057\u3044\u306e\u3067\u3001\u5168\u90e8\u30b3\u30fc\u30c9\u66f8\u304d\u7d42\u308f\u3063\u305f\u3089full\u3067save\u3068\u308a\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<div class=\"hcb_wrap\"><pre class=\"prism line-numbers lang-python\" data-lang=\"Python\"><code>#\u5404\u7a2e\u30c6\u30fc\u30d6\u30eb\u3092\u7d50\u5408\n item_2 = pd.merge(item, categories, how=&quot;inner&quot; ,on=&quot;item_category_id&quot;)\n train_2 = pd.merge(train_df, item_2, how=&quot;inner&quot; ,on=&quot;item_id&quot;)\n test_2 = pd.merge(test_df, item_2, how=&quot;inner&quot; ,on=&quot;item_id&quot;)\n train_3 = pd.merge(train_2, shops, how=&quot;inner&quot; ,on=&quot;shop_id&quot;)\n test_3 = pd.merge(test_2, shops, how=&quot;inner&quot; ,on=&quot;shop_id&quot;)\n#date\u3092\u5e74\u3001\u6708\u3001\u65e5\u3067\u5206\u89e3\u3002\n train_3[&quot;year&quot;] = train_3[&quot;date&quot;].str[-4:]\n train_3[&quot;month&quot;] = train_3[&quot;date&quot;].str[-7:-5]\n train_3[&quot;day&quot;] = train_3[&quot;date&quot;].str[0:2]\n#item, shop, category\u306erank\u3092\u4f5c\u6210\n train_3[[&quot;month&quot;, &quot;item_cnt_day&quot;]].groupby([&quot;month&quot;], as_index=False).sum().sort_values(by=&quot;item_cnt_day&quot;, ascending=True)\n train_3[[&quot;year&quot;, &quot;item_cnt_day&quot;]].groupby([&quot;year&quot;], as_index=False).sum().sort_values(by=&quot;item_cnt_day&quot;, ascending=True)\n#\u305d\u308c\u305e\u308cpivot\u3067\u30c6\u30fc\u30d6\u30eb\u3092\u4f5c\u308a\u3001\u964d\u9806\u306b\u30bd\u30fc\u30c8\u3002\u305d\u3053\u306bindex\u3092\u4ed8\u4e0e\u3057\u3001\u58f2\u308a\u4e0a\u3052\u30e9\u30f3\u30ad\u30f3\u30b0\u3092\u4f5c\u6210\u3059\u308b\n item_rank = train_3[[&quot;item_name&quot;, &quot;item_cnt_day&quot;]].groupby([&quot;item_name&quot;], as_index=True).sum().sort_values(by=&quot;item_cnt_day&quot;, ascending=False)\n#ID\u3068\u3057\u30661\u304b\u3089\u632f\u308a\u76f4\u3057\u3002sort\u306f\u4e0a\u306e\u30d4\u30dc\u30c3\u30c8\u751f\u6210\u6642\u306b\u5b9f\u65bd\u305a\u307f\n item_rank[&#39;item_rank&#39;] = pd.RangeIndex(start=1, stop=len(item_rank.index) + 1, step=1)\n shop_rank = train_3[[&quot;shop_name&quot;, &quot;item_cnt_day&quot;]].groupby([&quot;shop_name&quot;], as_index=True).sum().sort_values(by=&quot;item_cnt_day&quot;, ascending=False)\n category_rank = train_3[[&quot;item_category_name&quot;, &quot;item_cnt_day&quot;]].groupby([&quot;item_category_name&quot;], as_index=True).sum().sort_values(by=&quot;item_cnt_day&quot;, ascending=False)\n shop_rank[&#39;shop_rank&#39;] = pd.RangeIndex(start=1, stop=len(shop_rank.index) + 1, step=1)\n category_rank[&#39;category_rank&#39;] = pd.RangeIndex(start=1, stop=len(category_rank.index) + 1, step=1)\n\n#train, test\u306bitem, shop, category_rank\u3092\u7d50\u5408\n#how\u3092left\u306b\u3057\u306a\u3044\u3068test\u306e\u884c\u6570\u304c\u5909\u5316\u3057\u3066\u3042\u3068\u3067\u56f0\u308b\n train_4 = pd.merge(train_3, item_rank[&quot;item_rank&quot;], how=&quot;left&quot; ,on=&quot;item_name&quot;)\n test_4 = pd.merge(test_3, item_rank[&quot;item_rank&quot;], how=&quot;left&quot; ,on=&quot;item_name&quot;)\n train_5 = pd.merge(train_4, shop_rank[&quot;shop_rank&quot;], how=&quot;left&quot; ,on=&quot;shop_name&quot;)\n test_5 = pd.merge(test_4, shop_rank[&quot;shop_rank&quot;], how=&quot;left&quot; ,on=&quot;shop_name&quot;)\n train_6 = pd.merge(train_5, category_rank[&quot;category_rank&quot;], how=&quot;left&quot; ,on=&quot;item_category_name&quot;)\n test_6 = pd.merge(test_5, category_rank[&quot;category_rank&quot;], how=&quot;left&quot; ,on=&quot;item_category_name&quot;)\n\n#test\u306b\u4e88\u6e2c\u3057\u305f\u3044\u5e74\u6708\u65e5\u3092\u5165\u529b\n test_6[&quot;year&quot;] = 2016\n test_6[&quot;month&quot;] = 11\n test_6[&quot;day&quot;] = 1\n\n#item_rank\u306b\u7a7a\u767d\u304c\u3042\u308b\u305f\u3081\u3001\u6700\u4e0b\u4f4d\u3067\u88dc\u586b\u3002\u9806\u4f4d\u306ftest.info()\u3067\u78ba\u8a8d\n test_6[&quot;item_rank&quot;] = test_6[&quot;item_rank&quot;].fillna(21808)<\/code><\/pre><\/div>\n\n\n\n<pre id=\"block-d070a1ff-775c-46da-8ac3-fec3838a43f3\" class=\"wp-block-preformatted\">\u4eca\u56de\u306e\u76ee\u7389\u3068\u3057\u3066\u3001item_rank\u3092\u533a\u5206\u5316\u3057\u3088\u3046\u3068\u3044\u3046\u3053\u3068\u3067\u3001\u304a\u304a\u3088\u305d\u306e\u76ee\u5b89\u3092train_6.describe()\u3067\u78ba\u8a8d\u3002<\/pre>\n\n\n\n<div class=\"wp-container-1 wp-block-group\"><div class=\"wp-block-group__inner-container\">\n<figure class=\"wp-block-table\"><table><thead><tr><th><br><\/th><th class=\"has-text-align-right\" data-align=\"right\"><strong>date_block_num<\/strong><\/th><th><strong>shop_id<\/strong><\/th><th><strong>item_id<\/strong><\/th><th><strong>item_price<\/strong><\/th><th><strong>item_cnt_day<\/strong><\/th><th><strong>item_category_id<\/strong><\/th><th><strong>item_rank<\/strong><\/th><th><strong>shop_rank<\/strong><\/th><\/tr><\/thead><tbody><tr><th>count<\/th><td class=\"has-text-align-right\" data-align=\"right\">2.935849e+06<\/td><td>2.935849e+06<\/td><td>2.935849e+06<\/td><td>2.935849e+06<\/td><td>2.935849e+06<\/td><td>2.935849e+06<\/td><td>2.935849e+06<\/td><td>2.935849e+06<\/td><td>2.935849e+06<\/td><\/tr><tr><th>mean<\/th><td class=\"has-text-align-right\" data-align=\"right\">1.456991e+01<\/td><td>3.300173e+01<\/td><td>1.019723e+04<\/td><td>8.908532e+02<\/td><td>1.242641e+00<\/td><td>4.000138e+01<\/td><td>2.649291e+03<\/td><td>1.734426e+01<\/td><td>9.804866e+00<\/td><\/tr><tr><th>std<\/th><td class=\"has-text-align-right\" data-align=\"right\">9.422988e+00<\/td><td>1.622697e+01<\/td><td>6.324297e+03<\/td><td>1.729800e+03<\/td><td>2.618834e+00<\/td><td>1.710076e+01<\/td><td>3.292436e+03<\/td><td>1.409859e+01<\/td><td>1.204331e+01<\/td><\/tr><tr><th>min<\/th><td class=\"has-text-align-right\" data-align=\"right\">0.000000e+00<\/td><td>0.000000e+00<\/td><td>0.000000e+00<\/td><td>-1.000000e+00<\/td><td>-2.200000e+01<\/td><td>0.000000e+00<\/td><td>1.000000e+00<\/td><td>1.000000e+00<\/td><td>1.000000e+00<\/td><\/tr><tr><th>25%<\/th><td class=\"has-text-align-right\" data-align=\"right\">7.000000e+00<\/td><td>2.200000e+01<\/td><td>4.476000e+03<\/td><td>2.490000e+02<\/td><td>1.000000e+00<\/td><td>2.800000e+01<\/td><td>3.260000e+02<\/td><td>5.000000e+00<\/td><td>2.000000e+00<\/td><\/tr><tr><th>50%<\/th><td class=\"has-text-align-right\" data-align=\"right\">1.400000e+01<\/td><td>3.100000e+01<\/td><td>9.343000e+03<\/td><td>3.990000e+02<\/td><td>1.000000e+00<\/td><td>4.000000e+01<\/td><td>1.345000e+03<\/td><td>1.400000e+01<\/td><td>5.000000e+00<\/td><\/tr><tr><th>75%<\/th><td class=\"has-text-align-right\" data-align=\"right\">2.300000e+01<\/td><td>4.700000e+01<\/td><td>1.568400e+04<\/td><td>9.990000e+02<\/td><td>1.000000e+00<\/td><td>5.500000e+01<\/td><td>3.726000e+03<\/td><td>2.800000e+01<\/td><td>1.300000e+01<\/td><\/tr><tr><th>max<\/th><td class=\"has-text-align-right\" data-align=\"right\">3.300000e+01<\/td><td>5.900000e+01<\/td><td>2.216900e+04<\/td><td>3.079800e+05<\/td><td>2.169000e+03<\/td><td>8.300000e+01<\/td><td>2.180700e+04<\/td><td>6.000000e+01<\/td><td>8.400000e+01<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p>\u4eca\u56de\u306f25%, 50%, 75%\u3067\u533a\u5206\u3057\u3066\u307f\u307e\u3057\u305f\u3002<\/p>\n\n\n\n<div class=\"hcb_wrap\"><pre class=\"prism line-numbers lang-plane\"><code>#describe\u304b\u3089iterank\u3092\u533a\u5206\u5206\u3051\n\ncombine = [train_6, test_6]\n\nfor dataset in combine:    \n    dataset.loc[ dataset[&#39;item_rank&#39;] &lt;= 326, &#39;item_rank&#39;] = 0\n    dataset.loc[(dataset[&#39;item_rank&#39;] &gt; 326) & (dataset[&#39;item_rank&#39;] &lt;= 1345), &#39;item_rank&#39;] = 1\n    dataset.loc[(dataset[&#39;item_rank&#39;] &gt; 1345) & (dataset[&#39;item_rank&#39;] &lt;= 3726), &#39;item_rank&#39;] = 2\n    dataset.loc[ dataset[&#39;item_rank&#39;] &gt; 3726, &#39;item_rank&#39;] = 3\n    \n# \u5148\u982d\u304b\u30895\u884c\u3092\u8868\u793a\ntrain_6.head()<\/code><\/pre><\/div>\n\n\n\n<p>\u3042\u3068\u306f\u524d\u56de\u540c\u69d8\u306b\u6c7a\u5b9a\u6728\u3067\u4e88\u60f3\u3057\u3066\u3001\u63d0\u51fa\u3057\u3066\u307f\u307e\u3057\u305f\u3002<\/p>\n<\/div><\/div>\n\n\n\n<p>\u6c17\u306b\u306a\u308b\u7d50\u679c\u306f\u3001\u521d\u56de\u304c1.53867\u30012\u56de\u76ee\u306f1.47967\u3001\u305d\u3057\u3066\u4eca\u56de\u306f1.41241\u3067\u3001\u3053\u308c\u307e\u305f\u5fae\u5999\u306b\u6539\u5584\u3002\u4f34\u3063\u3066\u9806\u4f4d\u306f\u521d\u56de7,935\u4f4d\u3001\u524d\u56de7,782\u4f4d\u3068\u6765\u3066\u3001\u4eca\u56de\u306f7,645\u4f4d\u3002\u307e\u3060\u307e\u3060\u3084\u3063\u3066\u307f\u305f\u3044\u3053\u3068\u304c\u3042\u308b\u306e\u3067\u3001\u3044\u308d\u3044\u308d\u8a66\u305d\u3046\u3002<\/p>\n\n\n\n<p>\u6b21\u56de\u306e\u6539\u5584\u30dd\u30a4\u30f3\u30c8\u306f\u3001item_rank\u540c\u69d8\u306bcatefory_rank\u3068shop_rank\u306e\u533a\u5206\u5316\u3001\u3042\u3068\u306fitem_price\u306e\u7279\u5fb4\u91cf\u53cd\u6620\u3001\u3055\u3089\u306b\u6c7a\u5b9a\u6728\u4ee5\u5916\u306e\u624b\u6cd5\u306e\u63a1\u7528\uff0b\u30cf\u30a4\u30d1\u30fc\u30d1\u30e9\u30e1\u30fc\u30bf\u306e\u4f7f\u7528\u306a\u3069\u306a\u3069\u3002<\/p>\n\n\n\n<p>\u3068\u3044\u3046\u3053\u3068\u3067\u307e\u305f\u3082\u3084\u3059\u3053\u30fc\u3057\u3060\u3051\u6539\u5584\u3057\u305f3\u56de\u76ee\u306e\u63d0\u51fa\u3067\u3057\u305f\u3002\u76ee\u6a19\u306f\u4eca\u9031\u4e2d\u306b\u4f55\u3068\u304b\u534a\u5206\u304f\u3089\u3044\u306e\u9806\u4f4d(4500\u4f4d\u304f\u3089\u3044)\uff01\uff01<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u65e9\u304f\u30823\u56de\u76ee\u306e\u63d0\u51fa\u3068\u306a\u308a\u307e\u3057\u305f\u3002\u307e\u305a\u306f\u524d\u56de\u307e\u3067\u306e\u30b3\u30fc\u30c9\u3092\u4e00\u62ec\u3067\u8a18\u8ff0\u3057\u307e\u3059\u3002train_df\u3001test_df\u306b\u30c7\u30fc\u30bf\u683c\u7d0d\u3059\u308b\u3068\u3053\u308d\u306f\u5272\u611b\u3057\u3066\u307e\u3059\u3002\u5730\u5473\u3067\u3059\u304c\u3001kaggle\u306eNotebook\u3067quick save\u3060\u3051\u3060\u3068ou [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":176,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"spay_email":""},"categories":[3],"tags":[6,19,20],"aioseo_notices":[],"jetpack_featured_media_url":"https:\/\/i0.wp.com\/www.ritzcolor.net\/wp-content\/uploads\/2020\/10\/character_program_smart-1.png?fit=400%2C400&ssl=1","_links":{"self":[{"href":"https:\/\/www.ritzcolor.net\/index.php?rest_route=\/wp\/v2\/posts\/209"}],"collection":[{"href":"https:\/\/www.ritzcolor.net\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.ritzcolor.net\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.ritzcolor.net\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.ritzcolor.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=209"}],"version-history":[{"count":1,"href":"https:\/\/www.ritzcolor.net\/index.php?rest_route=\/wp\/v2\/posts\/209\/revisions"}],"predecessor-version":[{"id":210,"href":"https:\/\/www.ritzcolor.net\/index.php?rest_route=\/wp\/v2\/posts\/209\/revisions\/210"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.ritzcolor.net\/index.php?rest_route=\/wp\/v2\/media\/176"}],"wp:attachment":[{"href":"https:\/\/www.ritzcolor.net\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=209"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.ritzcolor.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=209"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.ritzcolor.net\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=209"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}