From 164f300762297bb6069a25cfc1cbb819e1bff9ec Mon Sep 17 00:00:00 2001 From: Konstantin Lopuhin Date: Fri, 19 Feb 2016 18:22:56 +0300 Subject: [PATCH 1/2] See #1778 - change default S3 ACL to "private" and allow customization via settings --- docs/topics/feed-exports.rst | 3 +++ docs/topics/settings.rst | 9 +++++++++ scrapy/pipelines/files.py | 4 +++- scrapy/settings/default_settings.py | 2 ++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/docs/topics/feed-exports.rst b/docs/topics/feed-exports.rst index e5037129c..1f17ccd51 100644 --- a/docs/topics/feed-exports.rst +++ b/docs/topics/feed-exports.rst @@ -185,6 +185,9 @@ passed through the following settings: * :setting:`AWS_ACCESS_KEY_ID` * :setting:`AWS_SECRET_ACCESS_KEY` +Default access policy for uploaded files is ``private``, it can be changed +(for example, to ``public-read``) via :setting:`S3_STORE_ACL`. + .. _topics-feed-storage-stdout: Standard output diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst index f8f35b5e3..b7d6f2d9a 100644 --- a/docs/topics/settings.rst +++ b/docs/topics/settings.rst @@ -926,6 +926,15 @@ If enabled, Scrapy will respect robots.txt policies. For more information see this option is enabled by default in settings.py file generated by ``scrapy startproject`` command. +.. setting:: S3_STORE_ACL + +S3_STORE_ACL +------------ + +Default: ``'private'`` + +S3-specific access control policy (ACL) for uploaded files. + .. setting:: SCHEDULER SCHEDULER diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py index 45ceddcbb..b48f43094 100644 --- a/scrapy/pipelines/files.py +++ b/scrapy/pipelines/files.py @@ -82,7 +82,8 @@ class S3FilesStore(object): AWS_ACCESS_KEY_ID = None AWS_SECRET_ACCESS_KEY = None - POLICY = 'public-read' + POLICY = 'private' # Overriden from settings.S3_STORE_ACL in + # FilesPipeline.from_settings. HEADERS = { 'Cache-Control': 'max-age=172800', } @@ -232,6 +233,7 @@ class FilesPipeline(MediaPipeline): s3store = cls.STORE_SCHEMES['s3'] s3store.AWS_ACCESS_KEY_ID = settings['AWS_ACCESS_KEY_ID'] s3store.AWS_SECRET_ACCESS_KEY = settings['AWS_SECRET_ACCESS_KEY'] + s3store.POLICY = settings['S3_STORE_ACL'] cls.FILES_URLS_FIELD = settings.get('FILES_URLS_FIELD', cls.DEFAULT_FILES_URLS_FIELD) cls.FILES_RESULT_FIELD = settings.get('FILES_RESULT_FIELD', cls.DEFAULT_FILES_RESULT_FIELD) diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py index 44e74dc61..e31dc6aef 100644 --- a/scrapy/settings/default_settings.py +++ b/scrapy/settings/default_settings.py @@ -231,6 +231,8 @@ SCHEDULER = 'scrapy.core.scheduler.Scheduler' SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleLifoDiskQueue' SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.LifoMemoryQueue' +S3_STORE_ACL = 'private' + SPIDER_LOADER_CLASS = 'scrapy.spiderloader.SpiderLoader' SPIDER_MIDDLEWARES = {} From 6137dd96d9b52e1a4e4e28dd64924f877a63329f Mon Sep 17 00:00:00 2001 From: Konstantin Lopuhin Date: Wed, 24 Feb 2016 10:16:10 +0300 Subject: [PATCH 2/2] Fix documentation for S3_STORE_ACL (now settings.FILES_STORE_S3_ACL) settings: it has nothing to do with feed exporters. --- docs/topics/feed-exports.rst | 3 --- docs/topics/settings.rst | 18 +++++++++--------- scrapy/pipelines/files.py | 4 ++-- scrapy/settings/default_settings.py | 4 ++-- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/docs/topics/feed-exports.rst b/docs/topics/feed-exports.rst index 1f17ccd51..e5037129c 100644 --- a/docs/topics/feed-exports.rst +++ b/docs/topics/feed-exports.rst @@ -185,9 +185,6 @@ passed through the following settings: * :setting:`AWS_ACCESS_KEY_ID` * :setting:`AWS_SECRET_ACCESS_KEY` -Default access policy for uploaded files is ``private``, it can be changed -(for example, to ``public-read``) via :setting:`S3_STORE_ACL`. - .. _topics-feed-storage-stdout: Standard output diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst index b7d6f2d9a..0b1d99dba 100644 --- a/docs/topics/settings.rst +++ b/docs/topics/settings.rst @@ -607,6 +607,15 @@ For more information See the :ref:`extensions user guide ` and the :ref:`list of available extensions `. +.. setting:: FILES_STORE_S3_ACL + +FILES_STORE_S3_ACL +------------------ + +Default: ``'private'`` + +S3-specific access control policy (ACL) for S3 files store. + .. setting:: ITEM_PIPELINES ITEM_PIPELINES @@ -926,15 +935,6 @@ If enabled, Scrapy will respect robots.txt policies. For more information see this option is enabled by default in settings.py file generated by ``scrapy startproject`` command. -.. setting:: S3_STORE_ACL - -S3_STORE_ACL ------------- - -Default: ``'private'`` - -S3-specific access control policy (ACL) for uploaded files. - .. setting:: SCHEDULER SCHEDULER diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py index b48f43094..1fd2571e2 100644 --- a/scrapy/pipelines/files.py +++ b/scrapy/pipelines/files.py @@ -82,7 +82,7 @@ class S3FilesStore(object): AWS_ACCESS_KEY_ID = None AWS_SECRET_ACCESS_KEY = None - POLICY = 'private' # Overriden from settings.S3_STORE_ACL in + POLICY = 'private' # Overriden from settings.FILES_STORE_S3_ACL in # FilesPipeline.from_settings. HEADERS = { 'Cache-Control': 'max-age=172800', @@ -233,7 +233,7 @@ class FilesPipeline(MediaPipeline): s3store = cls.STORE_SCHEMES['s3'] s3store.AWS_ACCESS_KEY_ID = settings['AWS_ACCESS_KEY_ID'] s3store.AWS_SECRET_ACCESS_KEY = settings['AWS_SECRET_ACCESS_KEY'] - s3store.POLICY = settings['S3_STORE_ACL'] + s3store.POLICY = settings['FILES_STORE_S3_ACL'] cls.FILES_URLS_FIELD = settings.get('FILES_URLS_FIELD', cls.DEFAULT_FILES_URLS_FIELD) cls.FILES_RESULT_FIELD = settings.get('FILES_RESULT_FIELD', cls.DEFAULT_FILES_RESULT_FIELD) diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py index e31dc6aef..6a989d048 100644 --- a/scrapy/settings/default_settings.py +++ b/scrapy/settings/default_settings.py @@ -156,6 +156,8 @@ FEED_EXPORTERS_BASE = { 'pickle': 'scrapy.exporters.PickleItemExporter', } +FILES_STORE_S3_ACL = 'private' + HTTPCACHE_ENABLED = False HTTPCACHE_DIR = 'httpcache' HTTPCACHE_IGNORE_MISSING = False @@ -231,8 +233,6 @@ SCHEDULER = 'scrapy.core.scheduler.Scheduler' SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleLifoDiskQueue' SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.LifoMemoryQueue' -S3_STORE_ACL = 'private' - SPIDER_LOADER_CLASS = 'scrapy.spiderloader.SpiderLoader' SPIDER_MIDDLEWARES = {}