mirror of
https://github.com/Sneed-Group/Poodletooth-iLand
synced 2025-01-09 17:53:50 +00:00
merge my ass conflicts
This commit is contained in:
commit
3820b89b3e
717 changed files with 399275 additions and 70 deletions
|
@ -287,7 +287,7 @@ from toontown.uberdog.DistributedPartyManager/AI/UD import DistributedPartyManag
|
||||||
from toontown.rpc.AwardManager/UD import AwardManager/UD
|
from toontown.rpc.AwardManager/UD import AwardManager/UD
|
||||||
from toontown.uberdog.DistributedCpuInfoMgr/AI/UD import DistributedCpuInfoMgr/AI/UD
|
from toontown.uberdog.DistributedCpuInfoMgr/AI/UD import DistributedCpuInfoMgr/AI/UD
|
||||||
from toontown.uberdog.DistributedInGameNewsMgr/AI/UD import DistributedInGameNewsMgr/AI/UD
|
from toontown.uberdog.DistributedInGameNewsMgr/AI/UD import DistributedInGameNewsMgr/AI/UD
|
||||||
from toontown.coderedemption.TTCodeRedemptionMgr/AI/UD import TTCodeRedemptionMgr/AI/UD
|
from toontown.coderedemption.TTCodeRedemptionMgr/AI import TTCodeRedemptionMgr/AI
|
||||||
from toontown.distributed.NonRepeatableRandomSourceAI import NonRepeatableRandomSourceAI
|
from toontown.distributed.NonRepeatableRandomSourceAI import NonRepeatableRandomSourceAI
|
||||||
from toontown.distributed.NonRepeatableRandomSourceUD import NonRepeatableRandomSourceUD
|
from toontown.distributed.NonRepeatableRandomSourceUD import NonRepeatableRandomSourceUD
|
||||||
from toontown.ai.DistributedPhaseEventMgr/AI import DistributedPhaseEventMgr/AI
|
from toontown.ai.DistributedPhaseEventMgr/AI import DistributedPhaseEventMgr/AI
|
||||||
|
@ -1264,7 +1264,7 @@ dclass DistributedPartyActivity : DistributedObject {
|
||||||
exitRequestDenied(uint8);
|
exitRequestDenied(uint8);
|
||||||
setToonsPlaying(uint32 []) broadcast ram;
|
setToonsPlaying(uint32 []) broadcast ram;
|
||||||
setState(string, int16) broadcast ram;
|
setState(string, int16) broadcast ram;
|
||||||
showJellybeanReward(uint32, uint8, string);
|
showJellybeanReward(uint32, uint32, string);
|
||||||
};
|
};
|
||||||
|
|
||||||
dclass DistributedPartyTeamActivity : DistributedPartyActivity {
|
dclass DistributedPartyTeamActivity : DistributedPartyActivity {
|
||||||
|
@ -3310,10 +3310,7 @@ dclass NonRepeatableRandomSourceClient {
|
||||||
};
|
};
|
||||||
|
|
||||||
dclass TTCodeRedemptionMgr : DistributedObject, NonRepeatableRandomSourceClient {
|
dclass TTCodeRedemptionMgr : DistributedObject, NonRepeatableRandomSourceClient {
|
||||||
giveAwardToToonResult(uint32, uint32);
|
|
||||||
redeemCode(uint32, char [0-256]) airecv clsend;
|
redeemCode(uint32, char [0-256]) airecv clsend;
|
||||||
redeemCodeAiToUd(uint32, DoId, uint32, string, uint32);
|
|
||||||
redeemCodeResultUdToAi(uint32, uint32, uint32, uint32, uint32) airecv;
|
|
||||||
redeemCodeResult(uint32, uint32, uint32);
|
redeemCodeResult(uint32, uint32, uint32);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,7 @@ punchline-place-building-chance 100.0
|
||||||
want-fishing #f
|
want-fishing #f
|
||||||
want-housing #f
|
want-housing #f
|
||||||
want-pets #f
|
want-pets #f
|
||||||
want-parties #f
|
want-parties #t
|
||||||
|
|
||||||
# Optional:
|
# Optional:
|
||||||
want-talkative-tyler #f
|
want-talkative-tyler #f
|
||||||
|
|
|
@ -21,7 +21,7 @@ dc-file astron/dclass/otp.dc
|
||||||
|
|
||||||
# Core features:
|
# Core features:
|
||||||
want-pets #f
|
want-pets #f
|
||||||
want-parties #f
|
want-parties #t
|
||||||
want-cogdominiums #f
|
want-cogdominiums #f
|
||||||
want-achievements #f
|
want-achievements #f
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ shard-mid-pop 150
|
||||||
# Core features:
|
# Core features:
|
||||||
want-housing #t
|
want-housing #t
|
||||||
want-pets #f
|
want-pets #f
|
||||||
want-parties #f
|
want-parties #t
|
||||||
want-cogdominiums #f
|
want-cogdominiums #f
|
||||||
want-achievements #f
|
want-achievements #f
|
||||||
boarding-group-merges #t
|
boarding-group-merges #t
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
from pandac.PandaModules import *
|
from pandac.PandaModules import *
|
||||||
|
|
||||||
|
|
||||||
hashVal = 3181838135L
|
hashVal = 1907452799
|
||||||
|
|
||||||
|
|
||||||
from toontown.coghq import DistributedCashbotBossSafe, DistributedCashbotBossCrane, DistributedBattleFactory, DistributedCashbotBossTreasure, DistributedCogHQDoor, DistributedSellbotHQDoor, DistributedFactoryElevatorExt, DistributedMintElevatorExt, DistributedLawOfficeElevatorExt, DistributedLawOfficeElevatorInt, LobbyManager, DistributedMegaCorp, DistributedFactory, DistributedLawOffice, DistributedLawOfficeFloor, DistributedLift, DistributedDoorEntity, DistributedSwitch, DistributedButton, DistributedTrigger, DistributedCrushableEntity, DistributedCrusherEntity, DistributedStomper, DistributedStomperPair, DistributedLaserField, DistributedGolfGreenGame, DistributedSecurityCamera, DistributedMover, DistributedElevatorMarker, DistributedBarrelBase, DistributedGagBarrel, DistributedBeanBarrel, DistributedHealBarrel, DistributedGrid, ActiveCell, DirectionalCell, CrusherCell, DistributedCrate, DistributedSinkingPlatform, BattleBlocker, DistributedMint, DistributedMintRoom, DistributedMintBattle, DistributedStage, DistributedStageRoom, DistributedStageBattle, DistributedLawbotBossGavel, DistributedLawbotCannon, DistributedLawbotChair, DistributedCogKart, DistributedCountryClub, DistributedCountryClubRoom, DistributedMoleField, DistributedCountryClubBattle, DistributedMaze, DistributedFoodBelt, DistributedBanquetTable, DistributedGolfSpot
|
from toontown.coghq import DistributedCashbotBossSafe, DistributedCashbotBossCrane, DistributedBattleFactory, DistributedCashbotBossTreasure, DistributedCogHQDoor, DistributedSellbotHQDoor, DistributedFactoryElevatorExt, DistributedMintElevatorExt, DistributedLawOfficeElevatorExt, DistributedLawOfficeElevatorInt, LobbyManager, DistributedMegaCorp, DistributedFactory, DistributedLawOffice, DistributedLawOfficeFloor, DistributedLift, DistributedDoorEntity, DistributedSwitch, DistributedButton, DistributedTrigger, DistributedCrushableEntity, DistributedCrusherEntity, DistributedStomper, DistributedStomperPair, DistributedLaserField, DistributedGolfGreenGame, DistributedSecurityCamera, DistributedMover, DistributedElevatorMarker, DistributedBarrelBase, DistributedGagBarrel, DistributedBeanBarrel, DistributedHealBarrel, DistributedGrid, ActiveCell, DirectionalCell, CrusherCell, DistributedCrate, DistributedSinkingPlatform, BattleBlocker, DistributedMint, DistributedMintRoom, DistributedMintBattle, DistributedStage, DistributedStageRoom, DistributedStageBattle, DistributedLawbotBossGavel, DistributedLawbotCannon, DistributedLawbotChair, DistributedCogKart, DistributedCountryClub, DistributedCountryClubRoom, DistributedMoleField, DistributedCountryClubBattle, DistributedMaze, DistributedFoodBelt, DistributedBanquetTable, DistributedGolfSpot
|
||||||
|
|
4
panda/python/Lib/site-packages/easy-install.pth
Normal file
4
panda/python/Lib/site-packages/easy-install.pth
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
import sys; sys.__plen = len(sys.path)
|
||||||
|
./setuptools-12.0.5-py2.7.egg
|
||||||
|
./pip-6.0.8-py2.7.egg
|
||||||
|
import sys; new=sys.path[sys.__plen:]; del sys.path[sys.__plen:]; p=getattr(sys,'__egginsert',0); sys.path[p:p]=new; sys.__egginsert = p+len(new)
|
|
@ -0,0 +1,45 @@
|
||||||
|
Metadata-Version: 1.1
|
||||||
|
Name: pip
|
||||||
|
Version: 6.0.8
|
||||||
|
Summary: The PyPA recommended tool for installing Python packages.
|
||||||
|
Home-page: https://pip.pypa.io/
|
||||||
|
Author: The pip developers
|
||||||
|
Author-email: python-virtualenv@groups.google.com
|
||||||
|
License: MIT
|
||||||
|
Description: pip
|
||||||
|
===
|
||||||
|
|
||||||
|
The `PyPA recommended
|
||||||
|
<https://python-packaging-user-guide.readthedocs.org/en/latest/current.html>`_
|
||||||
|
tool for installing Python packages.
|
||||||
|
|
||||||
|
* `Installation <https://pip.pypa.io/en/latest/installing.html>`_
|
||||||
|
* `Documentation <https://pip.pypa.io/>`_
|
||||||
|
* `Changelog <https://pip.pypa.io/en/latest/news.html>`_
|
||||||
|
* `Github Page <https://github.com/pypa/pip>`_
|
||||||
|
* `Issue Tracking <https://github.com/pypa/pip/issues>`_
|
||||||
|
* `Mailing list <http://groups.google.com/group/python-virtualenv>`_
|
||||||
|
* User IRC: #pypa on Freenode.
|
||||||
|
* Dev IRC: #pypa-dev on Freenode.
|
||||||
|
|
||||||
|
|
||||||
|
.. image:: https://pypip.in/v/pip/badge.png
|
||||||
|
:target: https://pypi.python.org/pypi/pip
|
||||||
|
|
||||||
|
.. image:: https://secure.travis-ci.org/pypa/pip.png?branch=develop
|
||||||
|
:target: http://travis-ci.org/pypa/pip
|
||||||
|
|
||||||
|
Keywords: easy_install distutils setuptools egg virtualenv
|
||||||
|
Platform: UNKNOWN
|
||||||
|
Classifier: Development Status :: 5 - Production/Stable
|
||||||
|
Classifier: Intended Audience :: Developers
|
||||||
|
Classifier: License :: OSI Approved :: MIT License
|
||||||
|
Classifier: Topic :: Software Development :: Build Tools
|
||||||
|
Classifier: Programming Language :: Python :: 2
|
||||||
|
Classifier: Programming Language :: Python :: 2.6
|
||||||
|
Classifier: Programming Language :: Python :: 2.7
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: Programming Language :: Python :: 3.2
|
||||||
|
Classifier: Programming Language :: Python :: 3.3
|
||||||
|
Classifier: Programming Language :: Python :: 3.4
|
||||||
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
@ -0,0 +1,277 @@
|
||||||
|
AUTHORS.txt
|
||||||
|
CHANGES.txt
|
||||||
|
LICENSE.txt
|
||||||
|
MANIFEST.in
|
||||||
|
README.rst
|
||||||
|
setup.cfg
|
||||||
|
setup.py
|
||||||
|
docs/Makefile
|
||||||
|
docs/__init__.py
|
||||||
|
docs/conf.py
|
||||||
|
docs/configuration.rst
|
||||||
|
docs/cookbook.rst
|
||||||
|
docs/development.rst
|
||||||
|
docs/distribute_setuptools.rst
|
||||||
|
docs/index.rst
|
||||||
|
docs/installing.rst
|
||||||
|
docs/logic.rst
|
||||||
|
docs/make.bat
|
||||||
|
docs/news.rst
|
||||||
|
docs/pipext.py
|
||||||
|
docs/quickstart.rst
|
||||||
|
docs/usage.rst
|
||||||
|
docs/user_guide.rst
|
||||||
|
docs/reference/index.rst
|
||||||
|
docs/reference/pip.rst
|
||||||
|
docs/reference/pip_freeze.rst
|
||||||
|
docs/reference/pip_install.rst
|
||||||
|
docs/reference/pip_list.rst
|
||||||
|
docs/reference/pip_search.rst
|
||||||
|
docs/reference/pip_show.rst
|
||||||
|
docs/reference/pip_uninstall.rst
|
||||||
|
docs/reference/pip_wheel.rst
|
||||||
|
pip/__init__.py
|
||||||
|
pip/__main__.py
|
||||||
|
pip/basecommand.py
|
||||||
|
pip/baseparser.py
|
||||||
|
pip/cmdoptions.py
|
||||||
|
pip/download.py
|
||||||
|
pip/exceptions.py
|
||||||
|
pip/index.py
|
||||||
|
pip/locations.py
|
||||||
|
pip/pep425tags.py
|
||||||
|
pip/status_codes.py
|
||||||
|
pip/wheel.py
|
||||||
|
pip.egg-info/PKG-INFO
|
||||||
|
pip.egg-info/SOURCES.txt
|
||||||
|
pip.egg-info/dependency_links.txt
|
||||||
|
pip.egg-info/entry_points.txt
|
||||||
|
pip.egg-info/not-zip-safe
|
||||||
|
pip.egg-info/pbr.json
|
||||||
|
pip.egg-info/requires.txt
|
||||||
|
pip.egg-info/top_level.txt
|
||||||
|
pip/_vendor/README.rst
|
||||||
|
pip/_vendor/__init__.py
|
||||||
|
pip/_vendor/ipaddress.py
|
||||||
|
pip/_vendor/re-vendor.py
|
||||||
|
pip/_vendor/retrying.py
|
||||||
|
pip/_vendor/six.py
|
||||||
|
pip/_vendor/vendor.txt
|
||||||
|
pip/_vendor/_markerlib/__init__.py
|
||||||
|
pip/_vendor/_markerlib/markers.py
|
||||||
|
pip/_vendor/cachecontrol/__init__.py
|
||||||
|
pip/_vendor/cachecontrol/adapter.py
|
||||||
|
pip/_vendor/cachecontrol/cache.py
|
||||||
|
pip/_vendor/cachecontrol/compat.py
|
||||||
|
pip/_vendor/cachecontrol/controller.py
|
||||||
|
pip/_vendor/cachecontrol/filewrapper.py
|
||||||
|
pip/_vendor/cachecontrol/heuristics.py
|
||||||
|
pip/_vendor/cachecontrol/serialize.py
|
||||||
|
pip/_vendor/cachecontrol/wrapper.py
|
||||||
|
pip/_vendor/cachecontrol/caches/__init__.py
|
||||||
|
pip/_vendor/cachecontrol/caches/file_cache.py
|
||||||
|
pip/_vendor/cachecontrol/caches/redis_cache.py
|
||||||
|
pip/_vendor/certifi/__init__.py
|
||||||
|
pip/_vendor/certifi/__main__.py
|
||||||
|
pip/_vendor/certifi/cacert.pem
|
||||||
|
pip/_vendor/certifi/core.py
|
||||||
|
pip/_vendor/colorama/__init__.py
|
||||||
|
pip/_vendor/colorama/ansi.py
|
||||||
|
pip/_vendor/colorama/ansitowin32.py
|
||||||
|
pip/_vendor/colorama/initialise.py
|
||||||
|
pip/_vendor/colorama/win32.py
|
||||||
|
pip/_vendor/colorama/winterm.py
|
||||||
|
pip/_vendor/distlib/__init__.py
|
||||||
|
pip/_vendor/distlib/compat.py
|
||||||
|
pip/_vendor/distlib/database.py
|
||||||
|
pip/_vendor/distlib/index.py
|
||||||
|
pip/_vendor/distlib/locators.py
|
||||||
|
pip/_vendor/distlib/manifest.py
|
||||||
|
pip/_vendor/distlib/markers.py
|
||||||
|
pip/_vendor/distlib/metadata.py
|
||||||
|
pip/_vendor/distlib/resources.py
|
||||||
|
pip/_vendor/distlib/scripts.py
|
||||||
|
pip/_vendor/distlib/t32.exe
|
||||||
|
pip/_vendor/distlib/t64.exe
|
||||||
|
pip/_vendor/distlib/util.py
|
||||||
|
pip/_vendor/distlib/version.py
|
||||||
|
pip/_vendor/distlib/w32.exe
|
||||||
|
pip/_vendor/distlib/w64.exe
|
||||||
|
pip/_vendor/distlib/wheel.py
|
||||||
|
pip/_vendor/distlib/_backport/__init__.py
|
||||||
|
pip/_vendor/distlib/_backport/misc.py
|
||||||
|
pip/_vendor/distlib/_backport/shutil.py
|
||||||
|
pip/_vendor/distlib/_backport/sysconfig.cfg
|
||||||
|
pip/_vendor/distlib/_backport/sysconfig.py
|
||||||
|
pip/_vendor/distlib/_backport/tarfile.py
|
||||||
|
pip/_vendor/html5lib/__init__.py
|
||||||
|
pip/_vendor/html5lib/constants.py
|
||||||
|
pip/_vendor/html5lib/html5parser.py
|
||||||
|
pip/_vendor/html5lib/ihatexml.py
|
||||||
|
pip/_vendor/html5lib/inputstream.py
|
||||||
|
pip/_vendor/html5lib/sanitizer.py
|
||||||
|
pip/_vendor/html5lib/tokenizer.py
|
||||||
|
pip/_vendor/html5lib/utils.py
|
||||||
|
pip/_vendor/html5lib/filters/__init__.py
|
||||||
|
pip/_vendor/html5lib/filters/_base.py
|
||||||
|
pip/_vendor/html5lib/filters/alphabeticalattributes.py
|
||||||
|
pip/_vendor/html5lib/filters/inject_meta_charset.py
|
||||||
|
pip/_vendor/html5lib/filters/lint.py
|
||||||
|
pip/_vendor/html5lib/filters/optionaltags.py
|
||||||
|
pip/_vendor/html5lib/filters/sanitizer.py
|
||||||
|
pip/_vendor/html5lib/filters/whitespace.py
|
||||||
|
pip/_vendor/html5lib/serializer/__init__.py
|
||||||
|
pip/_vendor/html5lib/serializer/htmlserializer.py
|
||||||
|
pip/_vendor/html5lib/treeadapters/__init__.py
|
||||||
|
pip/_vendor/html5lib/treeadapters/sax.py
|
||||||
|
pip/_vendor/html5lib/treebuilders/__init__.py
|
||||||
|
pip/_vendor/html5lib/treebuilders/_base.py
|
||||||
|
pip/_vendor/html5lib/treebuilders/dom.py
|
||||||
|
pip/_vendor/html5lib/treebuilders/etree.py
|
||||||
|
pip/_vendor/html5lib/treebuilders/etree_lxml.py
|
||||||
|
pip/_vendor/html5lib/treewalkers/__init__.py
|
||||||
|
pip/_vendor/html5lib/treewalkers/_base.py
|
||||||
|
pip/_vendor/html5lib/treewalkers/dom.py
|
||||||
|
pip/_vendor/html5lib/treewalkers/etree.py
|
||||||
|
pip/_vendor/html5lib/treewalkers/genshistream.py
|
||||||
|
pip/_vendor/html5lib/treewalkers/lxmletree.py
|
||||||
|
pip/_vendor/html5lib/treewalkers/pulldom.py
|
||||||
|
pip/_vendor/html5lib/trie/__init__.py
|
||||||
|
pip/_vendor/html5lib/trie/_base.py
|
||||||
|
pip/_vendor/html5lib/trie/datrie.py
|
||||||
|
pip/_vendor/html5lib/trie/py.py
|
||||||
|
pip/_vendor/lockfile/__init__.py
|
||||||
|
pip/_vendor/lockfile/linklockfile.py
|
||||||
|
pip/_vendor/lockfile/mkdirlockfile.py
|
||||||
|
pip/_vendor/lockfile/pidlockfile.py
|
||||||
|
pip/_vendor/lockfile/sqlitelockfile.py
|
||||||
|
pip/_vendor/lockfile/symlinklockfile.py
|
||||||
|
pip/_vendor/packaging/__about__.py
|
||||||
|
pip/_vendor/packaging/__init__.py
|
||||||
|
pip/_vendor/packaging/_compat.py
|
||||||
|
pip/_vendor/packaging/_structures.py
|
||||||
|
pip/_vendor/packaging/specifiers.py
|
||||||
|
pip/_vendor/packaging/version.py
|
||||||
|
pip/_vendor/pkg_resources/__init__.py
|
||||||
|
pip/_vendor/progress/__init__.py
|
||||||
|
pip/_vendor/progress/bar.py
|
||||||
|
pip/_vendor/progress/counter.py
|
||||||
|
pip/_vendor/progress/helpers.py
|
||||||
|
pip/_vendor/progress/spinner.py
|
||||||
|
pip/_vendor/requests/__init__.py
|
||||||
|
pip/_vendor/requests/adapters.py
|
||||||
|
pip/_vendor/requests/api.py
|
||||||
|
pip/_vendor/requests/auth.py
|
||||||
|
pip/_vendor/requests/cacert.pem
|
||||||
|
pip/_vendor/requests/certs.py
|
||||||
|
pip/_vendor/requests/compat.py
|
||||||
|
pip/_vendor/requests/cookies.py
|
||||||
|
pip/_vendor/requests/exceptions.py
|
||||||
|
pip/_vendor/requests/hooks.py
|
||||||
|
pip/_vendor/requests/models.py
|
||||||
|
pip/_vendor/requests/sessions.py
|
||||||
|
pip/_vendor/requests/status_codes.py
|
||||||
|
pip/_vendor/requests/structures.py
|
||||||
|
pip/_vendor/requests/utils.py
|
||||||
|
pip/_vendor/requests/packages/__init__.py
|
||||||
|
pip/_vendor/requests/packages/chardet/__init__.py
|
||||||
|
pip/_vendor/requests/packages/chardet/big5freq.py
|
||||||
|
pip/_vendor/requests/packages/chardet/big5prober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/chardetect.py
|
||||||
|
pip/_vendor/requests/packages/chardet/chardistribution.py
|
||||||
|
pip/_vendor/requests/packages/chardet/charsetgroupprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/charsetprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/codingstatemachine.py
|
||||||
|
pip/_vendor/requests/packages/chardet/compat.py
|
||||||
|
pip/_vendor/requests/packages/chardet/constants.py
|
||||||
|
pip/_vendor/requests/packages/chardet/cp949prober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/escprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/escsm.py
|
||||||
|
pip/_vendor/requests/packages/chardet/eucjpprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/euckrfreq.py
|
||||||
|
pip/_vendor/requests/packages/chardet/euckrprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/euctwfreq.py
|
||||||
|
pip/_vendor/requests/packages/chardet/euctwprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/gb2312freq.py
|
||||||
|
pip/_vendor/requests/packages/chardet/gb2312prober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/hebrewprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/jisfreq.py
|
||||||
|
pip/_vendor/requests/packages/chardet/jpcntx.py
|
||||||
|
pip/_vendor/requests/packages/chardet/langbulgarianmodel.py
|
||||||
|
pip/_vendor/requests/packages/chardet/langcyrillicmodel.py
|
||||||
|
pip/_vendor/requests/packages/chardet/langgreekmodel.py
|
||||||
|
pip/_vendor/requests/packages/chardet/langhebrewmodel.py
|
||||||
|
pip/_vendor/requests/packages/chardet/langhungarianmodel.py
|
||||||
|
pip/_vendor/requests/packages/chardet/langthaimodel.py
|
||||||
|
pip/_vendor/requests/packages/chardet/latin1prober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/mbcharsetprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/mbcsgroupprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/mbcssm.py
|
||||||
|
pip/_vendor/requests/packages/chardet/sbcharsetprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/sbcsgroupprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/sjisprober.py
|
||||||
|
pip/_vendor/requests/packages/chardet/universaldetector.py
|
||||||
|
pip/_vendor/requests/packages/chardet/utf8prober.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/__init__.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/_collections.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/connection.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/connectionpool.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/exceptions.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/fields.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/filepost.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/poolmanager.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/request.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/response.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/contrib/__init__.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/contrib/ntlmpool.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/contrib/pyopenssl.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/packages/__init__.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/packages/ordered_dict.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/packages/six.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/util/__init__.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/util/connection.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/util/request.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/util/response.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/util/retry.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/util/ssl_.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/util/timeout.py
|
||||||
|
pip/_vendor/requests/packages/urllib3/util/url.py
|
||||||
|
pip/commands/__init__.py
|
||||||
|
pip/commands/completion.py
|
||||||
|
pip/commands/freeze.py
|
||||||
|
pip/commands/help.py
|
||||||
|
pip/commands/install.py
|
||||||
|
pip/commands/list.py
|
||||||
|
pip/commands/search.py
|
||||||
|
pip/commands/show.py
|
||||||
|
pip/commands/uninstall.py
|
||||||
|
pip/commands/unzip.py
|
||||||
|
pip/commands/wheel.py
|
||||||
|
pip/commands/zip.py
|
||||||
|
pip/compat/__init__.py
|
||||||
|
pip/compat/dictconfig.py
|
||||||
|
pip/models/__init__.py
|
||||||
|
pip/models/index.py
|
||||||
|
pip/operations/__init__.py
|
||||||
|
pip/operations/freeze.py
|
||||||
|
pip/req/__init__.py
|
||||||
|
pip/req/req_file.py
|
||||||
|
pip/req/req_install.py
|
||||||
|
pip/req/req_requirement.py
|
||||||
|
pip/req/req_set.py
|
||||||
|
pip/req/req_uninstall.py
|
||||||
|
pip/utils/__init__.py
|
||||||
|
pip/utils/appdirs.py
|
||||||
|
pip/utils/build.py
|
||||||
|
pip/utils/deprecation.py
|
||||||
|
pip/utils/filesystem.py
|
||||||
|
pip/utils/logging.py
|
||||||
|
pip/utils/outdated.py
|
||||||
|
pip/utils/ui.py
|
||||||
|
pip/vcs/__init__.py
|
||||||
|
pip/vcs/bazaar.py
|
||||||
|
pip/vcs/git.py
|
||||||
|
pip/vcs/mercurial.py
|
||||||
|
pip/vcs/subversion.py
|
|
@ -0,0 +1 @@
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
[console_scripts]
|
||||||
|
pip = pip:main
|
||||||
|
pip2 = pip:main
|
||||||
|
pip2.7 = pip:main
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
|
||||||
|
[testing]
|
||||||
|
pytest
|
||||||
|
virtualenv>=1.10
|
||||||
|
scripttest>=1.3
|
||||||
|
mock
|
|
@ -0,0 +1 @@
|
||||||
|
pip
|
|
@ -0,0 +1,313 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import optparse
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
|
from pip.exceptions import InstallationError, CommandError, PipError
|
||||||
|
from pip.utils import get_installed_distributions, get_prog
|
||||||
|
from pip.utils import deprecation
|
||||||
|
from pip.vcs import git, mercurial, subversion, bazaar # noqa
|
||||||
|
from pip.baseparser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
|
||||||
|
from pip.commands import get_summaries, get_similar_commands
|
||||||
|
from pip.commands import commands_dict
|
||||||
|
from pip._vendor.requests.packages.urllib3.exceptions import (
|
||||||
|
InsecureRequestWarning,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# assignment for flake8 to be happy
|
||||||
|
|
||||||
|
# This fixes a peculiarity when importing via __import__ - as we are
|
||||||
|
# initialising the pip module, "from pip import cmdoptions" is recursive
|
||||||
|
# and appears not to work properly in that situation.
|
||||||
|
import pip.cmdoptions
|
||||||
|
cmdoptions = pip.cmdoptions
|
||||||
|
|
||||||
|
# The version as used in the setup.py and the docs conf.py
|
||||||
|
__version__ = "6.0.8"
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Hide the InsecureRequestWArning from urllib3
|
||||||
|
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
|
||||||
|
|
||||||
|
|
||||||
|
def autocomplete():
|
||||||
|
"""Command and option completion for the main option parser (and options)
|
||||||
|
and its subcommands (and options).
|
||||||
|
|
||||||
|
Enable by sourcing one of the completion shell scripts (bash or zsh).
|
||||||
|
"""
|
||||||
|
# Don't complete if user hasn't sourced bash_completion file.
|
||||||
|
if 'PIP_AUTO_COMPLETE' not in os.environ:
|
||||||
|
return
|
||||||
|
cwords = os.environ['COMP_WORDS'].split()[1:]
|
||||||
|
cword = int(os.environ['COMP_CWORD'])
|
||||||
|
try:
|
||||||
|
current = cwords[cword - 1]
|
||||||
|
except IndexError:
|
||||||
|
current = ''
|
||||||
|
|
||||||
|
subcommands = [cmd for cmd, summary in get_summaries()]
|
||||||
|
options = []
|
||||||
|
# subcommand
|
||||||
|
try:
|
||||||
|
subcommand_name = [w for w in cwords if w in subcommands][0]
|
||||||
|
except IndexError:
|
||||||
|
subcommand_name = None
|
||||||
|
|
||||||
|
parser = create_main_parser()
|
||||||
|
# subcommand options
|
||||||
|
if subcommand_name:
|
||||||
|
# special case: 'help' subcommand has no options
|
||||||
|
if subcommand_name == 'help':
|
||||||
|
sys.exit(1)
|
||||||
|
# special case: list locally installed dists for uninstall command
|
||||||
|
if subcommand_name == 'uninstall' and not current.startswith('-'):
|
||||||
|
installed = []
|
||||||
|
lc = current.lower()
|
||||||
|
for dist in get_installed_distributions(local_only=True):
|
||||||
|
if dist.key.startswith(lc) and dist.key not in cwords[1:]:
|
||||||
|
installed.append(dist.key)
|
||||||
|
# if there are no dists installed, fall back to option completion
|
||||||
|
if installed:
|
||||||
|
for dist in installed:
|
||||||
|
print(dist)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
subcommand = commands_dict[subcommand_name]()
|
||||||
|
options += [(opt.get_opt_string(), opt.nargs)
|
||||||
|
for opt in subcommand.parser.option_list_all
|
||||||
|
if opt.help != optparse.SUPPRESS_HELP]
|
||||||
|
|
||||||
|
# filter out previously specified options from available options
|
||||||
|
prev_opts = [x.split('=')[0] for x in cwords[1:cword - 1]]
|
||||||
|
options = [(x, v) for (x, v) in options if x not in prev_opts]
|
||||||
|
# filter options by current input
|
||||||
|
options = [(k, v) for k, v in options if k.startswith(current)]
|
||||||
|
for option in options:
|
||||||
|
opt_label = option[0]
|
||||||
|
# append '=' to options which require args
|
||||||
|
if option[1]:
|
||||||
|
opt_label += '='
|
||||||
|
print(opt_label)
|
||||||
|
else:
|
||||||
|
# show main parser options only when necessary
|
||||||
|
if current.startswith('-') or current.startswith('--'):
|
||||||
|
opts = [i.option_list for i in parser.option_groups]
|
||||||
|
opts.append(parser.option_list)
|
||||||
|
opts = (o for it in opts for o in it)
|
||||||
|
|
||||||
|
subcommands += [i.get_opt_string() for i in opts
|
||||||
|
if i.help != optparse.SUPPRESS_HELP]
|
||||||
|
|
||||||
|
print(' '.join([x for x in subcommands if x.startswith(current)]))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def create_main_parser():
|
||||||
|
parser_kw = {
|
||||||
|
'usage': '\n%prog <command> [options]',
|
||||||
|
'add_help_option': False,
|
||||||
|
'formatter': UpdatingDefaultsHelpFormatter(),
|
||||||
|
'name': 'global',
|
||||||
|
'prog': get_prog(),
|
||||||
|
}
|
||||||
|
|
||||||
|
parser = ConfigOptionParser(**parser_kw)
|
||||||
|
parser.disable_interspersed_args()
|
||||||
|
|
||||||
|
pip_pkg_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
parser.version = 'pip %s from %s (python %s)' % (
|
||||||
|
__version__, pip_pkg_dir, sys.version[:3])
|
||||||
|
|
||||||
|
# add the general options
|
||||||
|
gen_opts = cmdoptions.make_option_group(cmdoptions.general_group, parser)
|
||||||
|
parser.add_option_group(gen_opts)
|
||||||
|
|
||||||
|
parser.main = True # so the help formatter knows
|
||||||
|
|
||||||
|
# create command listing for description
|
||||||
|
command_summaries = get_summaries()
|
||||||
|
description = [''] + ['%-27s %s' % (i, j) for i, j in command_summaries]
|
||||||
|
parser.description = '\n'.join(description)
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def parseopts(args):
|
||||||
|
parser = create_main_parser()
|
||||||
|
|
||||||
|
# Note: parser calls disable_interspersed_args(), so the result of this
|
||||||
|
# call is to split the initial args into the general options before the
|
||||||
|
# subcommand and everything else.
|
||||||
|
# For example:
|
||||||
|
# args: ['--timeout=5', 'install', '--user', 'INITools']
|
||||||
|
# general_options: ['--timeout==5']
|
||||||
|
# args_else: ['install', '--user', 'INITools']
|
||||||
|
general_options, args_else = parser.parse_args(args)
|
||||||
|
|
||||||
|
# --version
|
||||||
|
if general_options.version:
|
||||||
|
sys.stdout.write(parser.version)
|
||||||
|
sys.stdout.write(os.linesep)
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
# pip || pip help -> print_help()
|
||||||
|
if not args_else or (args_else[0] == 'help' and len(args_else) == 1):
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
# the subcommand name
|
||||||
|
cmd_name = args_else[0]
|
||||||
|
|
||||||
|
if cmd_name not in commands_dict:
|
||||||
|
guess = get_similar_commands(cmd_name)
|
||||||
|
|
||||||
|
msg = ['unknown command "%s"' % cmd_name]
|
||||||
|
if guess:
|
||||||
|
msg.append('maybe you meant "%s"' % guess)
|
||||||
|
|
||||||
|
raise CommandError(' - '.join(msg))
|
||||||
|
|
||||||
|
# all the args without the subcommand
|
||||||
|
cmd_args = args[:]
|
||||||
|
cmd_args.remove(cmd_name)
|
||||||
|
|
||||||
|
return cmd_name, cmd_args
|
||||||
|
|
||||||
|
|
||||||
|
def check_isolated(args):
|
||||||
|
isolated = False
|
||||||
|
|
||||||
|
if "--isolated" in args:
|
||||||
|
isolated = True
|
||||||
|
|
||||||
|
return isolated
|
||||||
|
|
||||||
|
|
||||||
|
def main(args=None):
|
||||||
|
if args is None:
|
||||||
|
args = sys.argv[1:]
|
||||||
|
|
||||||
|
# Enable our Deprecation Warnings
|
||||||
|
for deprecation_warning in deprecation.DEPRECATIONS:
|
||||||
|
warnings.simplefilter("default", deprecation_warning)
|
||||||
|
|
||||||
|
# Configure our deprecation warnings to be sent through loggers
|
||||||
|
deprecation.install_warning_logger()
|
||||||
|
|
||||||
|
autocomplete()
|
||||||
|
|
||||||
|
try:
|
||||||
|
cmd_name, cmd_args = parseopts(args)
|
||||||
|
except PipError as exc:
|
||||||
|
sys.stderr.write("ERROR: %s" % exc)
|
||||||
|
sys.stderr.write(os.linesep)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
command = commands_dict[cmd_name](isolated=check_isolated(cmd_args))
|
||||||
|
return command.main(cmd_args)
|
||||||
|
|
||||||
|
|
||||||
|
# ###########################################################
|
||||||
|
# # Writing freeze files
|
||||||
|
|
||||||
|
class FrozenRequirement(object):
|
||||||
|
|
||||||
|
def __init__(self, name, req, editable, comments=()):
|
||||||
|
self.name = name
|
||||||
|
self.req = req
|
||||||
|
self.editable = editable
|
||||||
|
self.comments = comments
|
||||||
|
|
||||||
|
_rev_re = re.compile(r'-r(\d+)$')
|
||||||
|
_date_re = re.compile(r'-(20\d\d\d\d\d\d)$')
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dist(cls, dist, dependency_links, find_tags=False):
|
||||||
|
location = os.path.normcase(os.path.abspath(dist.location))
|
||||||
|
comments = []
|
||||||
|
from pip.vcs import vcs, get_src_requirement
|
||||||
|
if vcs.get_backend_name(location):
|
||||||
|
editable = True
|
||||||
|
try:
|
||||||
|
req = get_src_requirement(dist, location, find_tags)
|
||||||
|
except InstallationError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"Error when trying to get requirement for VCS system %s, "
|
||||||
|
"falling back to uneditable format", exc
|
||||||
|
)
|
||||||
|
req = None
|
||||||
|
if req is None:
|
||||||
|
logger.warning(
|
||||||
|
'Could not determine repository location of %s', location
|
||||||
|
)
|
||||||
|
comments.append(
|
||||||
|
'## !! Could not determine repository location'
|
||||||
|
)
|
||||||
|
req = dist.as_requirement()
|
||||||
|
editable = False
|
||||||
|
else:
|
||||||
|
editable = False
|
||||||
|
req = dist.as_requirement()
|
||||||
|
specs = req.specs
|
||||||
|
assert len(specs) == 1 and specs[0][0] in ["==", "==="]
|
||||||
|
version = specs[0][1]
|
||||||
|
ver_match = cls._rev_re.search(version)
|
||||||
|
date_match = cls._date_re.search(version)
|
||||||
|
if ver_match or date_match:
|
||||||
|
svn_backend = vcs.get_backend('svn')
|
||||||
|
if svn_backend:
|
||||||
|
svn_location = svn_backend().get_location(
|
||||||
|
dist,
|
||||||
|
dependency_links,
|
||||||
|
)
|
||||||
|
if not svn_location:
|
||||||
|
logger.warning(
|
||||||
|
'Warning: cannot find svn location for %s', req)
|
||||||
|
comments.append(
|
||||||
|
'## FIXME: could not find svn URL in dependency_links '
|
||||||
|
'for this package:'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
comments.append(
|
||||||
|
'# Installing as editable to satisfy requirement %s:' %
|
||||||
|
req
|
||||||
|
)
|
||||||
|
if ver_match:
|
||||||
|
rev = ver_match.group(1)
|
||||||
|
else:
|
||||||
|
rev = '{%s}' % date_match.group(1)
|
||||||
|
editable = True
|
||||||
|
req = '%s@%s#egg=%s' % (
|
||||||
|
svn_location,
|
||||||
|
rev,
|
||||||
|
cls.egg_name(dist)
|
||||||
|
)
|
||||||
|
return cls(dist.project_name, req, editable, comments)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def egg_name(dist):
|
||||||
|
name = dist.egg_name()
|
||||||
|
match = re.search(r'-py\d\.\d$', name)
|
||||||
|
if match:
|
||||||
|
name = name[:match.start()]
|
||||||
|
return name
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
req = self.req
|
||||||
|
if self.editable:
|
||||||
|
req = '-e %s' % req
|
||||||
|
return '\n'.join(list(self.comments) + [str(req)]) + '\n'
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
|
@ -0,0 +1,19 @@
|
||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# If we are running from a wheel, add the wheel to sys.path
|
||||||
|
# This allows the usage python pip-*.whl/pip install pip-*.whl
|
||||||
|
if __package__ == '':
|
||||||
|
import os
|
||||||
|
# __file__ is pip-*.whl/pip/__main__.py
|
||||||
|
# first dirname call strips of '/__main__.py', second strips off '/pip'
|
||||||
|
# Resulting path is the name of the wheel itself
|
||||||
|
# Add that to sys.path so we can import pip
|
||||||
|
path = os.path.dirname(os.path.dirname(__file__))
|
||||||
|
sys.path.insert(0, path)
|
||||||
|
|
||||||
|
import pip
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(pip.main())
|
|
@ -0,0 +1,93 @@
|
||||||
|
"""
|
||||||
|
pip._vendor is for vendoring dependencies of pip to prevent needing pip to
|
||||||
|
depend on something external.
|
||||||
|
|
||||||
|
Files inside of pip._vendor should be considered immutable and should only be
|
||||||
|
updated to versions from upstream.
|
||||||
|
"""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
# By default, look in this directory for a bunch of .whl files which we will
|
||||||
|
# add to the beginning of sys.path before attempting to import anything. This
|
||||||
|
# is done to support downstream re-distributors like Debian and Fedora who
|
||||||
|
# wish to create their own Wheels for our dependencies to aid in debundling.
|
||||||
|
WHEEL_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
# Actually look inside of WHEEL_DIR to find .whl files and add them to the
|
||||||
|
# front of our sys.path.
|
||||||
|
sys.path = glob.glob(os.path.join(WHEEL_DIR, "*.whl")) + sys.path
|
||||||
|
|
||||||
|
|
||||||
|
class VendorAlias(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._vendor_name = __name__
|
||||||
|
self._vendor_pkg = self._vendor_name + "."
|
||||||
|
|
||||||
|
def find_module(self, fullname, path=None):
|
||||||
|
if fullname.startswith(self._vendor_pkg):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def load_module(self, name):
|
||||||
|
# Ensure that this only works for the vendored name
|
||||||
|
if not name.startswith(self._vendor_pkg):
|
||||||
|
raise ImportError(
|
||||||
|
"Cannot import %s, must be a subpackage of '%s'." % (
|
||||||
|
name, self._vendor_name,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check to see if we already have this item in sys.modules, if we do
|
||||||
|
# then simply return that.
|
||||||
|
if name in sys.modules:
|
||||||
|
return sys.modules[name]
|
||||||
|
|
||||||
|
# Check to see if we can import the vendor name
|
||||||
|
try:
|
||||||
|
# We do this dance here because we want to try and import this
|
||||||
|
# module without hitting a recursion error because of a bunch of
|
||||||
|
# VendorAlias instances on sys.meta_path
|
||||||
|
real_meta_path = sys.meta_path[:]
|
||||||
|
try:
|
||||||
|
sys.meta_path = [
|
||||||
|
m for m in sys.meta_path
|
||||||
|
if not isinstance(m, VendorAlias)
|
||||||
|
]
|
||||||
|
__import__(name)
|
||||||
|
module = sys.modules[name]
|
||||||
|
finally:
|
||||||
|
# Re-add any additions to sys.meta_path that were made while
|
||||||
|
# during the import we just did, otherwise things like
|
||||||
|
# pip._vendor.six.moves will fail.
|
||||||
|
for m in sys.meta_path:
|
||||||
|
if m not in real_meta_path:
|
||||||
|
real_meta_path.append(m)
|
||||||
|
|
||||||
|
# Restore sys.meta_path with any new items.
|
||||||
|
sys.meta_path = real_meta_path
|
||||||
|
except ImportError:
|
||||||
|
# We can't import the vendor name, so we'll try to import the
|
||||||
|
# "real" name.
|
||||||
|
real_name = name[len(self._vendor_pkg):]
|
||||||
|
try:
|
||||||
|
__import__(real_name)
|
||||||
|
module = sys.modules[real_name]
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError("No module named '%s'" % (name,))
|
||||||
|
|
||||||
|
# If we've gotten here we've found the module we're looking for, either
|
||||||
|
# as part of our vendored package, or as the real name, so we'll add
|
||||||
|
# it to sys.modules as the vendored name so that we don't have to do
|
||||||
|
# the lookup again.
|
||||||
|
sys.modules[name] = module
|
||||||
|
|
||||||
|
# Finally, return the loaded module
|
||||||
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
sys.meta_path.append(VendorAlias())
|
|
@ -0,0 +1,16 @@
|
||||||
|
try:
|
||||||
|
import ast
|
||||||
|
from pip._vendor._markerlib.markers import default_environment, compile, interpret
|
||||||
|
except ImportError:
|
||||||
|
if 'ast' in globals():
|
||||||
|
raise
|
||||||
|
def default_environment():
|
||||||
|
return {}
|
||||||
|
def compile(marker):
|
||||||
|
def marker_fn(environment=None, override=None):
|
||||||
|
# 'empty markers are True' heuristic won't install extra deps.
|
||||||
|
return not marker.strip()
|
||||||
|
marker_fn.__doc__ = marker
|
||||||
|
return marker_fn
|
||||||
|
def interpret(marker, environment=None, override=None):
|
||||||
|
return compile(marker)()
|
|
@ -0,0 +1,119 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Interpret PEP 345 environment markers.
|
||||||
|
|
||||||
|
EXPR [in|==|!=|not in] EXPR [or|and] ...
|
||||||
|
|
||||||
|
where EXPR belongs to any of those:
|
||||||
|
|
||||||
|
python_version = '%s.%s' % (sys.version_info[0], sys.version_info[1])
|
||||||
|
python_full_version = sys.version.split()[0]
|
||||||
|
os.name = os.name
|
||||||
|
sys.platform = sys.platform
|
||||||
|
platform.version = platform.version()
|
||||||
|
platform.machine = platform.machine()
|
||||||
|
platform.python_implementation = platform.python_implementation()
|
||||||
|
a free string, like '2.6', or 'win32'
|
||||||
|
"""
|
||||||
|
|
||||||
|
__all__ = ['default_environment', 'compile', 'interpret']
|
||||||
|
|
||||||
|
import ast
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import sys
|
||||||
|
import weakref
|
||||||
|
|
||||||
|
_builtin_compile = compile
|
||||||
|
|
||||||
|
try:
|
||||||
|
from platform import python_implementation
|
||||||
|
except ImportError:
|
||||||
|
if os.name == "java":
|
||||||
|
# Jython 2.5 has ast module, but not platform.python_implementation() function.
|
||||||
|
def python_implementation():
|
||||||
|
return "Jython"
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
# restricted set of variables
|
||||||
|
_VARS = {'sys.platform': sys.platform,
|
||||||
|
'python_version': '%s.%s' % sys.version_info[:2],
|
||||||
|
# FIXME parsing sys.platform is not reliable, but there is no other
|
||||||
|
# way to get e.g. 2.7.2+, and the PEP is defined with sys.version
|
||||||
|
'python_full_version': sys.version.split(' ', 1)[0],
|
||||||
|
'os.name': os.name,
|
||||||
|
'platform.version': platform.version(),
|
||||||
|
'platform.machine': platform.machine(),
|
||||||
|
'platform.python_implementation': python_implementation(),
|
||||||
|
'extra': None # wheel extension
|
||||||
|
}
|
||||||
|
|
||||||
|
for var in list(_VARS.keys()):
|
||||||
|
if '.' in var:
|
||||||
|
_VARS[var.replace('.', '_')] = _VARS[var]
|
||||||
|
|
||||||
|
def default_environment():
|
||||||
|
"""Return copy of default PEP 385 globals dictionary."""
|
||||||
|
return dict(_VARS)
|
||||||
|
|
||||||
|
class ASTWhitelist(ast.NodeTransformer):
|
||||||
|
def __init__(self, statement):
|
||||||
|
self.statement = statement # for error messages
|
||||||
|
|
||||||
|
ALLOWED = (ast.Compare, ast.BoolOp, ast.Attribute, ast.Name, ast.Load, ast.Str)
|
||||||
|
# Bool operations
|
||||||
|
ALLOWED += (ast.And, ast.Or)
|
||||||
|
# Comparison operations
|
||||||
|
ALLOWED += (ast.Eq, ast.Gt, ast.GtE, ast.In, ast.Is, ast.IsNot, ast.Lt, ast.LtE, ast.NotEq, ast.NotIn)
|
||||||
|
|
||||||
|
def visit(self, node):
|
||||||
|
"""Ensure statement only contains allowed nodes."""
|
||||||
|
if not isinstance(node, self.ALLOWED):
|
||||||
|
raise SyntaxError('Not allowed in environment markers.\n%s\n%s' %
|
||||||
|
(self.statement,
|
||||||
|
(' ' * node.col_offset) + '^'))
|
||||||
|
return ast.NodeTransformer.visit(self, node)
|
||||||
|
|
||||||
|
def visit_Attribute(self, node):
|
||||||
|
"""Flatten one level of attribute access."""
|
||||||
|
new_node = ast.Name("%s.%s" % (node.value.id, node.attr), node.ctx)
|
||||||
|
return ast.copy_location(new_node, node)
|
||||||
|
|
||||||
|
def parse_marker(marker):
|
||||||
|
tree = ast.parse(marker, mode='eval')
|
||||||
|
new_tree = ASTWhitelist(marker).generic_visit(tree)
|
||||||
|
return new_tree
|
||||||
|
|
||||||
|
def compile_marker(parsed_marker):
|
||||||
|
return _builtin_compile(parsed_marker, '<environment marker>', 'eval',
|
||||||
|
dont_inherit=True)
|
||||||
|
|
||||||
|
_cache = weakref.WeakValueDictionary()
|
||||||
|
|
||||||
|
def compile(marker):
|
||||||
|
"""Return compiled marker as a function accepting an environment dict."""
|
||||||
|
try:
|
||||||
|
return _cache[marker]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
if not marker.strip():
|
||||||
|
def marker_fn(environment=None, override=None):
|
||||||
|
""""""
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
compiled_marker = compile_marker(parse_marker(marker))
|
||||||
|
def marker_fn(environment=None, override=None):
|
||||||
|
"""override updates environment"""
|
||||||
|
if override is None:
|
||||||
|
override = {}
|
||||||
|
if environment is None:
|
||||||
|
environment = default_environment()
|
||||||
|
environment.update(override)
|
||||||
|
return eval(compiled_marker, environment)
|
||||||
|
marker_fn.__doc__ = marker
|
||||||
|
_cache[marker] = marker_fn
|
||||||
|
return _cache[marker]
|
||||||
|
|
||||||
|
def interpret(marker, environment=None):
|
||||||
|
return compile(marker)(environment)
|
|
@ -0,0 +1,11 @@
|
||||||
|
"""CacheControl import Interface.
|
||||||
|
|
||||||
|
Make it easy to import from cachecontrol without long namespaces.
|
||||||
|
"""
|
||||||
|
__author__ = 'Eric Larson'
|
||||||
|
__email__ = 'eric@ionrock.org'
|
||||||
|
__version__ = '0.11.1'
|
||||||
|
|
||||||
|
from .wrapper import CacheControl
|
||||||
|
from .adapter import CacheControlAdapter
|
||||||
|
from .controller import CacheController
|
|
@ -0,0 +1,110 @@
|
||||||
|
import functools
|
||||||
|
|
||||||
|
from pip._vendor.requests.adapters import HTTPAdapter
|
||||||
|
|
||||||
|
from .controller import CacheController
|
||||||
|
from .cache import DictCache
|
||||||
|
from .filewrapper import CallbackFileWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class CacheControlAdapter(HTTPAdapter):
|
||||||
|
invalidating_methods = set(['PUT', 'DELETE'])
|
||||||
|
|
||||||
|
def __init__(self, cache=None,
|
||||||
|
cache_etags=True,
|
||||||
|
controller_class=None,
|
||||||
|
serializer=None,
|
||||||
|
heuristic=None,
|
||||||
|
*args, **kw):
|
||||||
|
super(CacheControlAdapter, self).__init__(*args, **kw)
|
||||||
|
self.cache = cache or DictCache()
|
||||||
|
self.heuristic = heuristic
|
||||||
|
|
||||||
|
controller_factory = controller_class or CacheController
|
||||||
|
self.controller = controller_factory(
|
||||||
|
self.cache,
|
||||||
|
cache_etags=cache_etags,
|
||||||
|
serializer=serializer,
|
||||||
|
)
|
||||||
|
|
||||||
|
def send(self, request, **kw):
|
||||||
|
"""
|
||||||
|
Send a request. Use the request information to see if it
|
||||||
|
exists in the cache and cache the response if we need to and can.
|
||||||
|
"""
|
||||||
|
if request.method == 'GET':
|
||||||
|
cached_response = self.controller.cached_request(request)
|
||||||
|
if cached_response:
|
||||||
|
return self.build_response(request, cached_response, from_cache=True)
|
||||||
|
|
||||||
|
# check for etags and add headers if appropriate
|
||||||
|
request.headers.update(self.controller.conditional_headers(request))
|
||||||
|
|
||||||
|
resp = super(CacheControlAdapter, self).send(request, **kw)
|
||||||
|
|
||||||
|
return resp
|
||||||
|
|
||||||
|
def build_response(self, request, response, from_cache=False):
|
||||||
|
"""
|
||||||
|
Build a response by making a request or using the cache.
|
||||||
|
|
||||||
|
This will end up calling send and returning a potentially
|
||||||
|
cached response
|
||||||
|
"""
|
||||||
|
if not from_cache and request.method == 'GET':
|
||||||
|
|
||||||
|
# apply any expiration heuristics
|
||||||
|
if response.status == 304:
|
||||||
|
# We must have sent an ETag request. This could mean
|
||||||
|
# that we've been expired already or that we simply
|
||||||
|
# have an etag. In either case, we want to try and
|
||||||
|
# update the cache if that is the case.
|
||||||
|
cached_response = self.controller.update_cached_response(
|
||||||
|
request, response
|
||||||
|
)
|
||||||
|
|
||||||
|
if cached_response is not response:
|
||||||
|
from_cache = True
|
||||||
|
|
||||||
|
# We are done with the server response, read a
|
||||||
|
# possible response body (compliant servers will
|
||||||
|
# not return one, but we cannot be 100% sure) and
|
||||||
|
# release the connection back to the pool.
|
||||||
|
response.read(decode_content=False)
|
||||||
|
response.release_conn()
|
||||||
|
|
||||||
|
response = cached_response
|
||||||
|
else:
|
||||||
|
# Check for any heuristics that might update headers
|
||||||
|
# before trying to cache.
|
||||||
|
if self.heuristic:
|
||||||
|
response = self.heuristic.apply(response)
|
||||||
|
|
||||||
|
# Wrap the response file with a wrapper that will cache the
|
||||||
|
# response when the stream has been consumed.
|
||||||
|
response._fp = CallbackFileWrapper(
|
||||||
|
response._fp,
|
||||||
|
functools.partial(
|
||||||
|
self.controller.cache_response,
|
||||||
|
request,
|
||||||
|
response,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = super(CacheControlAdapter, self).build_response(
|
||||||
|
request, response
|
||||||
|
)
|
||||||
|
|
||||||
|
# See if we should invalidate the cache.
|
||||||
|
if request.method in self.invalidating_methods and resp.ok:
|
||||||
|
cache_url = self.controller.cache_url(request.url)
|
||||||
|
self.cache.delete(cache_url)
|
||||||
|
|
||||||
|
# Give the request a from_cache attr to let people use it
|
||||||
|
resp.from_cache = from_cache
|
||||||
|
|
||||||
|
return resp
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.cache.close()
|
||||||
|
super(CacheControlAdapter, self).close()
|
|
@ -0,0 +1,39 @@
|
||||||
|
"""
|
||||||
|
The cache object API for implementing caches. The default is a thread
|
||||||
|
safe in-memory dictionary.
|
||||||
|
"""
|
||||||
|
from threading import Lock
|
||||||
|
|
||||||
|
|
||||||
|
class BaseCache(object):
|
||||||
|
|
||||||
|
def get(self, key):
|
||||||
|
raise NotImplemented()
|
||||||
|
|
||||||
|
def set(self, key, value):
|
||||||
|
raise NotImplemented()
|
||||||
|
|
||||||
|
def delete(self, key):
|
||||||
|
raise NotImplemented()
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DictCache(BaseCache):
|
||||||
|
|
||||||
|
def __init__(self, init_dict=None):
|
||||||
|
self.lock = Lock()
|
||||||
|
self.data = init_dict or {}
|
||||||
|
|
||||||
|
def get(self, key):
|
||||||
|
return self.data.get(key, None)
|
||||||
|
|
||||||
|
def set(self, key, value):
|
||||||
|
with self.lock:
|
||||||
|
self.data.update({key: value})
|
||||||
|
|
||||||
|
def delete(self, key):
|
||||||
|
with self.lock:
|
||||||
|
if key in self.data:
|
||||||
|
self.data.pop(key)
|
|
@ -0,0 +1,18 @@
|
||||||
|
from textwrap import dedent
|
||||||
|
|
||||||
|
try:
|
||||||
|
from .file_cache import FileCache
|
||||||
|
except ImportError:
|
||||||
|
notice = dedent('''
|
||||||
|
NOTE: In order to use the FileCache you must have
|
||||||
|
lockfile installed. You can install it via pip:
|
||||||
|
pip install lockfile
|
||||||
|
''')
|
||||||
|
print(notice)
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
import redis
|
||||||
|
from .redis_cache import RedisCache
|
||||||
|
except ImportError:
|
||||||
|
pass
|
|
@ -0,0 +1,91 @@
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
|
||||||
|
from pip._vendor.lockfile import FileLock
|
||||||
|
|
||||||
|
from ..cache import BaseCache
|
||||||
|
|
||||||
|
|
||||||
|
def _secure_open_write(filename, fmode):
|
||||||
|
# We only want to write to this file, so open it in write only mode
|
||||||
|
flags = os.O_WRONLY
|
||||||
|
|
||||||
|
# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
|
||||||
|
# will open *new* files.
|
||||||
|
# We specify this because we want to ensure that the mode we pass is the
|
||||||
|
# mode of the file.
|
||||||
|
flags |= os.O_CREAT | os.O_EXCL
|
||||||
|
|
||||||
|
# Do not follow symlinks to prevent someone from making a symlink that
|
||||||
|
# we follow and insecurely open a cache file.
|
||||||
|
if hasattr(os, "O_NOFOLLOW"):
|
||||||
|
flags |= os.O_NOFOLLOW
|
||||||
|
|
||||||
|
# On Windows we'll mark this file as binary
|
||||||
|
if hasattr(os, "O_BINARY"):
|
||||||
|
flags |= os.O_BINARY
|
||||||
|
|
||||||
|
# Before we open our file, we want to delete any existing file that is
|
||||||
|
# there
|
||||||
|
try:
|
||||||
|
os.remove(filename)
|
||||||
|
except (IOError, OSError):
|
||||||
|
# The file must not exist already, so we can just skip ahead to opening
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
|
||||||
|
# race condition happens between the os.remove and this line, that an
|
||||||
|
# error will be raised. Because we utilize a lockfile this should only
|
||||||
|
# happen if someone is attempting to attack us.
|
||||||
|
fd = os.open(filename, flags, fmode)
|
||||||
|
try:
|
||||||
|
return os.fdopen(fd, "wb")
|
||||||
|
except:
|
||||||
|
# An error occurred wrapping our FD in a file object
|
||||||
|
os.close(fd)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
class FileCache(BaseCache):
|
||||||
|
def __init__(self, directory, forever=False, filemode=0o0600,
|
||||||
|
dirmode=0o0700):
|
||||||
|
self.directory = directory
|
||||||
|
self.forever = forever
|
||||||
|
self.filemode = filemode
|
||||||
|
self.dirmode = dirmode
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def encode(x):
|
||||||
|
return hashlib.sha224(x.encode()).hexdigest()
|
||||||
|
|
||||||
|
def _fn(self, name):
|
||||||
|
hashed = self.encode(name)
|
||||||
|
parts = list(hashed[:5]) + [hashed]
|
||||||
|
return os.path.join(self.directory, *parts)
|
||||||
|
|
||||||
|
def get(self, key):
|
||||||
|
name = self._fn(key)
|
||||||
|
if not os.path.exists(name):
|
||||||
|
return None
|
||||||
|
|
||||||
|
with open(name, 'rb') as fh:
|
||||||
|
return fh.read()
|
||||||
|
|
||||||
|
def set(self, key, value):
|
||||||
|
name = self._fn(key)
|
||||||
|
|
||||||
|
# Make sure the directory exists
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(name), self.dirmode)
|
||||||
|
except (IOError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
with FileLock(name) as lock:
|
||||||
|
# Write our actual file
|
||||||
|
with _secure_open_write(lock.path, self.filemode) as fh:
|
||||||
|
fh.write(value)
|
||||||
|
|
||||||
|
def delete(self, key):
|
||||||
|
name = self._fn(key)
|
||||||
|
if not self.forever:
|
||||||
|
os.remove(name)
|
|
@ -0,0 +1,41 @@
|
||||||
|
from __future__ import division
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def total_seconds(td):
|
||||||
|
"""Python 2.6 compatability"""
|
||||||
|
if hasattr(td, 'total_seconds'):
|
||||||
|
return td.total_seconds()
|
||||||
|
|
||||||
|
ms = td.microseconds
|
||||||
|
secs = (td.seconds + td.days * 24 * 3600)
|
||||||
|
return (ms + secs * 10**6) / 10**6
|
||||||
|
|
||||||
|
|
||||||
|
class RedisCache(object):
|
||||||
|
|
||||||
|
def __init__(self, conn):
|
||||||
|
self.conn = conn
|
||||||
|
|
||||||
|
def get(self, key):
|
||||||
|
return self.conn.get(key)
|
||||||
|
|
||||||
|
def set(self, key, value, expires=None):
|
||||||
|
if not expires:
|
||||||
|
self.conn.set(key, value)
|
||||||
|
else:
|
||||||
|
expires = expires - datetime.now()
|
||||||
|
self.conn.setex(key, total_seconds(expires), value)
|
||||||
|
|
||||||
|
def delete(self, key):
|
||||||
|
self.conn.delete(key)
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
"""Helper for clearing all the keys in a database. Use with
|
||||||
|
caution!"""
|
||||||
|
for key in self.conn.keys():
|
||||||
|
self.conn.delete(key)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.conn.disconnect()
|
|
@ -0,0 +1,16 @@
|
||||||
|
try:
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
except ImportError:
|
||||||
|
from urlparse import urljoin
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
import cPickle as pickle
|
||||||
|
except ImportError:
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
|
||||||
|
# Handle the case where the requests has been patched to not have urllib3
|
||||||
|
# bundled as part of it's source.
|
||||||
|
from pip._vendor.requests.packages.urllib3.response import HTTPResponse
|
||||||
|
from pip._vendor.requests.packages.urllib3.util import is_fp_closed
|
|
@ -0,0 +1,269 @@
|
||||||
|
"""
|
||||||
|
The httplib2 algorithms ported for use with requests.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
import calendar
|
||||||
|
import time
|
||||||
|
from email.utils import parsedate_tz
|
||||||
|
|
||||||
|
from pip._vendor.requests.structures import CaseInsensitiveDict
|
||||||
|
|
||||||
|
from .cache import DictCache
|
||||||
|
from .serialize import Serializer
|
||||||
|
|
||||||
|
|
||||||
|
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_uri(uri):
|
||||||
|
"""Parses a URI using the regex given in Appendix B of RFC 3986.
|
||||||
|
|
||||||
|
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
||||||
|
"""
|
||||||
|
groups = URI.match(uri).groups()
|
||||||
|
return (groups[1], groups[3], groups[4], groups[6], groups[8])
|
||||||
|
|
||||||
|
|
||||||
|
class CacheController(object):
|
||||||
|
"""An interface to see if request should cached or not.
|
||||||
|
"""
|
||||||
|
def __init__(self, cache=None, cache_etags=True, serializer=None):
|
||||||
|
self.cache = cache or DictCache()
|
||||||
|
self.cache_etags = cache_etags
|
||||||
|
self.serializer = serializer or Serializer()
|
||||||
|
|
||||||
|
def _urlnorm(self, uri):
|
||||||
|
"""Normalize the URL to create a safe key for the cache"""
|
||||||
|
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
||||||
|
if not scheme or not authority:
|
||||||
|
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
|
||||||
|
|
||||||
|
scheme = scheme.lower()
|
||||||
|
authority = authority.lower()
|
||||||
|
|
||||||
|
if not path:
|
||||||
|
path = "/"
|
||||||
|
|
||||||
|
# Could do syntax based normalization of the URI before
|
||||||
|
# computing the digest. See Section 6.2.2 of Std 66.
|
||||||
|
request_uri = query and "?".join([path, query]) or path
|
||||||
|
defrag_uri = scheme + "://" + authority + request_uri
|
||||||
|
|
||||||
|
return defrag_uri
|
||||||
|
|
||||||
|
def cache_url(self, uri):
|
||||||
|
return self._urlnorm(uri)
|
||||||
|
|
||||||
|
def parse_cache_control(self, headers):
|
||||||
|
"""
|
||||||
|
Parse the cache control headers returning a dictionary with values
|
||||||
|
for the different directives.
|
||||||
|
"""
|
||||||
|
retval = {}
|
||||||
|
|
||||||
|
cc_header = 'cache-control'
|
||||||
|
if 'Cache-Control' in headers:
|
||||||
|
cc_header = 'Cache-Control'
|
||||||
|
|
||||||
|
if cc_header in headers:
|
||||||
|
parts = headers[cc_header].split(',')
|
||||||
|
parts_with_args = [
|
||||||
|
tuple([x.strip().lower() for x in part.split("=", 1)])
|
||||||
|
for part in parts if -1 != part.find("=")
|
||||||
|
]
|
||||||
|
parts_wo_args = [
|
||||||
|
(name.strip().lower(), 1)
|
||||||
|
for name in parts if -1 == name.find("=")
|
||||||
|
]
|
||||||
|
retval = dict(parts_with_args + parts_wo_args)
|
||||||
|
return retval
|
||||||
|
|
||||||
|
def cached_request(self, request):
|
||||||
|
"""
|
||||||
|
Return a cached response if it exists in the cache, otherwise
|
||||||
|
return False.
|
||||||
|
"""
|
||||||
|
cache_url = self.cache_url(request.url)
|
||||||
|
cc = self.parse_cache_control(request.headers)
|
||||||
|
|
||||||
|
# non-caching states
|
||||||
|
no_cache = True if 'no-cache' in cc else False
|
||||||
|
if 'max-age' in cc and cc['max-age'] == 0:
|
||||||
|
no_cache = True
|
||||||
|
|
||||||
|
# Bail out if no-cache was set
|
||||||
|
if no_cache:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# It is in the cache, so lets see if it is going to be
|
||||||
|
# fresh enough
|
||||||
|
resp = self.serializer.loads(request, self.cache.get(cache_url))
|
||||||
|
|
||||||
|
# Check to see if we have a cached object
|
||||||
|
if not resp:
|
||||||
|
return False
|
||||||
|
|
||||||
|
headers = CaseInsensitiveDict(resp.headers)
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
|
date = calendar.timegm(
|
||||||
|
parsedate_tz(headers['date'])
|
||||||
|
)
|
||||||
|
current_age = max(0, now - date)
|
||||||
|
|
||||||
|
# TODO: There is an assumption that the result will be a
|
||||||
|
# urllib3 response object. This may not be best since we
|
||||||
|
# could probably avoid instantiating or constructing the
|
||||||
|
# response until we know we need it.
|
||||||
|
resp_cc = self.parse_cache_control(headers)
|
||||||
|
|
||||||
|
# determine freshness
|
||||||
|
freshness_lifetime = 0
|
||||||
|
|
||||||
|
# Check the max-age pragma in the cache control header
|
||||||
|
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
|
||||||
|
freshness_lifetime = int(resp_cc['max-age'])
|
||||||
|
|
||||||
|
# If there isn't a max-age, check for an expires header
|
||||||
|
elif 'expires' in headers:
|
||||||
|
expires = parsedate_tz(headers['expires'])
|
||||||
|
if expires is not None:
|
||||||
|
expire_time = calendar.timegm(expires) - date
|
||||||
|
freshness_lifetime = max(0, expire_time)
|
||||||
|
|
||||||
|
# determine if we are setting freshness limit in the req
|
||||||
|
if 'max-age' in cc:
|
||||||
|
try:
|
||||||
|
freshness_lifetime = int(cc['max-age'])
|
||||||
|
except ValueError:
|
||||||
|
freshness_lifetime = 0
|
||||||
|
|
||||||
|
if 'min-fresh' in cc:
|
||||||
|
try:
|
||||||
|
min_fresh = int(cc['min-fresh'])
|
||||||
|
except ValueError:
|
||||||
|
min_fresh = 0
|
||||||
|
# adjust our current age by our min fresh
|
||||||
|
current_age += min_fresh
|
||||||
|
|
||||||
|
# see how fresh we actually are
|
||||||
|
fresh = (freshness_lifetime > current_age)
|
||||||
|
|
||||||
|
if fresh:
|
||||||
|
return resp
|
||||||
|
|
||||||
|
# we're not fresh. If we don't have an Etag, clear it out
|
||||||
|
if 'etag' not in headers:
|
||||||
|
self.cache.delete(cache_url)
|
||||||
|
|
||||||
|
# return the original handler
|
||||||
|
return False
|
||||||
|
|
||||||
|
def conditional_headers(self, request):
|
||||||
|
cache_url = self.cache_url(request.url)
|
||||||
|
resp = self.serializer.loads(request, self.cache.get(cache_url))
|
||||||
|
new_headers = {}
|
||||||
|
|
||||||
|
if resp:
|
||||||
|
headers = CaseInsensitiveDict(resp.headers)
|
||||||
|
|
||||||
|
if 'etag' in headers:
|
||||||
|
new_headers['If-None-Match'] = headers['ETag']
|
||||||
|
|
||||||
|
if 'last-modified' in headers:
|
||||||
|
new_headers['If-Modified-Since'] = headers['Last-Modified']
|
||||||
|
|
||||||
|
return new_headers
|
||||||
|
|
||||||
|
def cache_response(self, request, response, body=None):
|
||||||
|
"""
|
||||||
|
Algorithm for caching requests.
|
||||||
|
|
||||||
|
This assumes a requests Response object.
|
||||||
|
"""
|
||||||
|
# From httplib2: Don't cache 206's since we aren't going to
|
||||||
|
# handle byte range requests
|
||||||
|
if response.status not in [200, 203]:
|
||||||
|
return
|
||||||
|
|
||||||
|
response_headers = CaseInsensitiveDict(response.headers)
|
||||||
|
|
||||||
|
cc_req = self.parse_cache_control(request.headers)
|
||||||
|
cc = self.parse_cache_control(response_headers)
|
||||||
|
|
||||||
|
cache_url = self.cache_url(request.url)
|
||||||
|
|
||||||
|
# Delete it from the cache if we happen to have it stored there
|
||||||
|
no_store = cc.get('no-store') or cc_req.get('no-store')
|
||||||
|
if no_store and self.cache.get(cache_url):
|
||||||
|
self.cache.delete(cache_url)
|
||||||
|
|
||||||
|
# If we've been given an etag, then keep the response
|
||||||
|
if self.cache_etags and 'etag' in response_headers:
|
||||||
|
self.cache.set(
|
||||||
|
cache_url,
|
||||||
|
self.serializer.dumps(request, response, body=body),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add to the cache if the response headers demand it. If there
|
||||||
|
# is no date header then we can't do anything about expiring
|
||||||
|
# the cache.
|
||||||
|
elif 'date' in response_headers:
|
||||||
|
# cache when there is a max-age > 0
|
||||||
|
if cc and cc.get('max-age'):
|
||||||
|
if int(cc['max-age']) > 0:
|
||||||
|
self.cache.set(
|
||||||
|
cache_url,
|
||||||
|
self.serializer.dumps(request, response, body=body),
|
||||||
|
)
|
||||||
|
|
||||||
|
# If the request can expire, it means we should cache it
|
||||||
|
# in the meantime.
|
||||||
|
elif 'expires' in response_headers:
|
||||||
|
if response_headers['expires']:
|
||||||
|
self.cache.set(
|
||||||
|
cache_url,
|
||||||
|
self.serializer.dumps(request, response, body=body),
|
||||||
|
)
|
||||||
|
|
||||||
|
def update_cached_response(self, request, response):
|
||||||
|
"""On a 304 we will get a new set of headers that we want to
|
||||||
|
update our cached value with, assuming we have one.
|
||||||
|
|
||||||
|
This should only ever be called when we've sent an ETag and
|
||||||
|
gotten a 304 as the response.
|
||||||
|
"""
|
||||||
|
cache_url = self.cache_url(request.url)
|
||||||
|
|
||||||
|
cached_response = self.serializer.loads(request, self.cache.get(cache_url))
|
||||||
|
|
||||||
|
if not cached_response:
|
||||||
|
# we didn't have a cached response
|
||||||
|
return response
|
||||||
|
|
||||||
|
# Lets update our headers with the headers from the new request:
|
||||||
|
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
|
||||||
|
#
|
||||||
|
# The server isn't supposed to send headers that would make
|
||||||
|
# the cached body invalid. But... just in case, we'll be sure
|
||||||
|
# to strip out ones we know that might be problmatic due to
|
||||||
|
# typical assumptions.
|
||||||
|
excluded_headers = [
|
||||||
|
"content-length",
|
||||||
|
]
|
||||||
|
|
||||||
|
cached_response.headers.update(
|
||||||
|
dict((k, v) for k, v in response.headers.items()
|
||||||
|
if k.lower() not in excluded_headers)
|
||||||
|
)
|
||||||
|
|
||||||
|
# we want a 200 b/c we have content via the cache
|
||||||
|
cached_response.status = 200
|
||||||
|
|
||||||
|
# update our cache
|
||||||
|
self.cache.set(
|
||||||
|
cache_url,
|
||||||
|
self.serializer.dumps(request, cached_response),
|
||||||
|
)
|
||||||
|
|
||||||
|
return cached_response
|
|
@ -0,0 +1,63 @@
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
|
||||||
|
class CallbackFileWrapper(object):
|
||||||
|
"""
|
||||||
|
Small wrapper around a fp object which will tee everything read into a
|
||||||
|
buffer, and when that file is closed it will execute a callback with the
|
||||||
|
contents of that buffer.
|
||||||
|
|
||||||
|
All attributes are proxied to the underlying file object.
|
||||||
|
|
||||||
|
This class uses members with a double underscore (__) leading prefix so as
|
||||||
|
not to accidentally shadow an attribute.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, fp, callback):
|
||||||
|
self.__buf = BytesIO()
|
||||||
|
self.__fp = fp
|
||||||
|
self.__callback = callback
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
# The vaguaries of garbage collection means that self.__fp is
|
||||||
|
# not always set. By using __getattribute__ and the private
|
||||||
|
# name[0] allows looking up the attribute value and raising an
|
||||||
|
# AttributeError when it doesn't exist. This stop thigns from
|
||||||
|
# infinitely recursing calls to getattr in the case where
|
||||||
|
# self.__fp hasn't been set.
|
||||||
|
#
|
||||||
|
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
|
||||||
|
fp = self.__getattribute__('_CallbackFileWrapper__fp')
|
||||||
|
return getattr(fp, name)
|
||||||
|
|
||||||
|
def __is_fp_closed(self):
|
||||||
|
try:
|
||||||
|
return self.__fp.fp is None
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
return self.__fp.closed
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# We just don't cache it then.
|
||||||
|
# TODO: Add some logging here...
|
||||||
|
return False
|
||||||
|
|
||||||
|
def read(self, amt=None):
|
||||||
|
data = self.__fp.read(amt)
|
||||||
|
self.__buf.write(data)
|
||||||
|
|
||||||
|
if self.__is_fp_closed():
|
||||||
|
if self.__callback:
|
||||||
|
self.__callback(self.__buf.getvalue())
|
||||||
|
|
||||||
|
# We assign this to None here, because otherwise we can get into
|
||||||
|
# really tricky problems where the CPython interpreter dead locks
|
||||||
|
# because the callback is holding a reference to something which
|
||||||
|
# has a __del__ method. Setting this to None breaks the cycle
|
||||||
|
# and allows the garbage collector to do it's thing normally.
|
||||||
|
self.__callback = None
|
||||||
|
|
||||||
|
return data
|
|
@ -0,0 +1,79 @@
|
||||||
|
import calendar
|
||||||
|
|
||||||
|
from email.utils import formatdate, parsedate
|
||||||
|
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
|
||||||
|
def expire_after(delta, date=None):
|
||||||
|
date = date or datetime.now()
|
||||||
|
return date + delta
|
||||||
|
|
||||||
|
|
||||||
|
def datetime_to_header(dt):
|
||||||
|
return formatdate(calendar.timegm(dt.timetuple()))
|
||||||
|
|
||||||
|
|
||||||
|
class BaseHeuristic(object):
|
||||||
|
|
||||||
|
def warning(self, response):
|
||||||
|
"""
|
||||||
|
Return a valid 1xx warning header value describing the cache adjustments.
|
||||||
|
|
||||||
|
The response is provided too allow warnings like 113
|
||||||
|
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
|
||||||
|
to explicitly say response is over 24 hours old.
|
||||||
|
"""
|
||||||
|
return '110 - "Response is Stale"'
|
||||||
|
|
||||||
|
def update_headers(self, response):
|
||||||
|
"""Update the response headers with any new headers.
|
||||||
|
|
||||||
|
NOTE: This SHOULD always include some Warning header to
|
||||||
|
signify that the response was cached by the client, not
|
||||||
|
by way of the provided headers.
|
||||||
|
"""
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def apply(self, response):
|
||||||
|
warning_header = {'warning': self.warning(response)}
|
||||||
|
response.headers.update(self.update_headers(response))
|
||||||
|
response.headers.update(warning_header)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
class OneDayCache(BaseHeuristic):
|
||||||
|
"""
|
||||||
|
Cache the response by providing an expires 1 day in the
|
||||||
|
future.
|
||||||
|
"""
|
||||||
|
def update_headers(self, response):
|
||||||
|
headers = {}
|
||||||
|
|
||||||
|
if 'expires' not in response.headers:
|
||||||
|
date = parsedate(response.headers['date'])
|
||||||
|
expires = expire_after(timedelta(days=1),
|
||||||
|
date=datetime(*date[:6]))
|
||||||
|
headers['expires'] = datetime_to_header(expires)
|
||||||
|
headers['cache-control'] = 'public'
|
||||||
|
return headers
|
||||||
|
|
||||||
|
|
||||||
|
class ExpiresAfter(BaseHeuristic):
|
||||||
|
"""
|
||||||
|
Cache **all** requests for a defined time period.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kw):
|
||||||
|
self.delta = timedelta(**kw)
|
||||||
|
|
||||||
|
def update_headers(self, response):
|
||||||
|
expires = expire_after(self.delta)
|
||||||
|
return {
|
||||||
|
'expires': datetime_to_header(expires),
|
||||||
|
'cache-control': 'public',
|
||||||
|
}
|
||||||
|
|
||||||
|
def warning(self, response):
|
||||||
|
tmpl = '110 - Automatically cached for %s. Response might be stale'
|
||||||
|
return tmpl % self.delta
|
|
@ -0,0 +1,172 @@
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
from pip._vendor.requests.structures import CaseInsensitiveDict
|
||||||
|
|
||||||
|
from .compat import HTTPResponse, pickle
|
||||||
|
|
||||||
|
|
||||||
|
def _b64_encode_bytes(b):
|
||||||
|
return base64.b64encode(b).decode("ascii")
|
||||||
|
|
||||||
|
|
||||||
|
def _b64_encode_str(s):
|
||||||
|
return _b64_encode_bytes(s.encode("utf8"))
|
||||||
|
|
||||||
|
|
||||||
|
def _b64_decode_bytes(b):
|
||||||
|
return base64.b64decode(b.encode("ascii"))
|
||||||
|
|
||||||
|
|
||||||
|
def _b64_decode_str(s):
|
||||||
|
return _b64_decode_bytes(s).decode("utf8")
|
||||||
|
|
||||||
|
|
||||||
|
class Serializer(object):
|
||||||
|
|
||||||
|
def dumps(self, request, response, body=None):
|
||||||
|
response_headers = CaseInsensitiveDict(response.headers)
|
||||||
|
|
||||||
|
if body is None:
|
||||||
|
body = response.read(decode_content=False)
|
||||||
|
|
||||||
|
# NOTE: 99% sure this is dead code. I'm only leaving it
|
||||||
|
# here b/c I don't have a test yet to prove
|
||||||
|
# it. Basically, before using
|
||||||
|
# `cachecontrol.filewrapper.CallbackFileWrapper`,
|
||||||
|
# this made an effort to reset the file handle. The
|
||||||
|
# `CallbackFileWrapper` short circuits this code by
|
||||||
|
# setting the body as the content is consumed, the
|
||||||
|
# result being a `body` argument is *always* passed
|
||||||
|
# into cache_response, and in turn,
|
||||||
|
# `Serializer.dump`.
|
||||||
|
response._fp = io.BytesIO(body)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"response": {
|
||||||
|
"body": _b64_encode_bytes(body),
|
||||||
|
"headers": dict(
|
||||||
|
(_b64_encode_str(k), _b64_encode_str(v))
|
||||||
|
for k, v in response.headers.items()
|
||||||
|
),
|
||||||
|
"status": response.status,
|
||||||
|
"version": response.version,
|
||||||
|
"reason": _b64_encode_str(response.reason),
|
||||||
|
"strict": response.strict,
|
||||||
|
"decode_content": response.decode_content,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Construct our vary headers
|
||||||
|
data["vary"] = {}
|
||||||
|
if "vary" in response_headers:
|
||||||
|
varied_headers = response_headers['vary'].split(',')
|
||||||
|
for header in varied_headers:
|
||||||
|
header = header.strip()
|
||||||
|
data["vary"][header] = request.headers.get(header, None)
|
||||||
|
|
||||||
|
# Encode our Vary headers to ensure they can be serialized as JSON
|
||||||
|
data["vary"] = dict(
|
||||||
|
(_b64_encode_str(k), _b64_encode_str(v) if v is not None else v)
|
||||||
|
for k, v in data["vary"].items()
|
||||||
|
)
|
||||||
|
|
||||||
|
return b",".join([
|
||||||
|
b"cc=2",
|
||||||
|
zlib.compress(
|
||||||
|
json.dumps(
|
||||||
|
data, separators=(",", ":"), sort_keys=True,
|
||||||
|
).encode("utf8"),
|
||||||
|
),
|
||||||
|
])
|
||||||
|
|
||||||
|
def loads(self, request, data):
|
||||||
|
# Short circuit if we've been given an empty set of data
|
||||||
|
if not data:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Determine what version of the serializer the data was serialized
|
||||||
|
# with
|
||||||
|
try:
|
||||||
|
ver, data = data.split(b",", 1)
|
||||||
|
except ValueError:
|
||||||
|
ver = b"cc=0"
|
||||||
|
|
||||||
|
# Make sure that our "ver" is actually a version and isn't a false
|
||||||
|
# positive from a , being in the data stream.
|
||||||
|
if ver[:3] != b"cc=":
|
||||||
|
data = ver + data
|
||||||
|
ver = b"cc=0"
|
||||||
|
|
||||||
|
# Get the version number out of the cc=N
|
||||||
|
ver = ver.split(b"=", 1)[-1].decode("ascii")
|
||||||
|
|
||||||
|
# Dispatch to the actual load method for the given version
|
||||||
|
try:
|
||||||
|
return getattr(self, "_loads_v{0}".format(ver))(request, data)
|
||||||
|
except AttributeError:
|
||||||
|
# This is a version we don't have a loads function for, so we'll
|
||||||
|
# just treat it as a miss and return None
|
||||||
|
return
|
||||||
|
|
||||||
|
def prepare_response(self, request, cached):
|
||||||
|
"""Verify our vary headers match and construct a real urllib3
|
||||||
|
HTTPResponse object.
|
||||||
|
"""
|
||||||
|
# Special case the '*' Vary value as it means we cannot actually
|
||||||
|
# determine if the cached response is suitable for this request.
|
||||||
|
if "*" in cached.get("vary", {}):
|
||||||
|
return
|
||||||
|
|
||||||
|
# Ensure that the Vary headers for the cached response match our
|
||||||
|
# request
|
||||||
|
for header, value in cached.get("vary", {}).items():
|
||||||
|
if request.headers.get(header, None) != value:
|
||||||
|
return
|
||||||
|
|
||||||
|
body = io.BytesIO(cached["response"].pop("body"))
|
||||||
|
return HTTPResponse(
|
||||||
|
body=body,
|
||||||
|
preload_content=False,
|
||||||
|
**cached["response"]
|
||||||
|
)
|
||||||
|
|
||||||
|
def _loads_v0(self, request, data):
|
||||||
|
# The original legacy cache data. This doesn't contain enough
|
||||||
|
# information to construct everything we need, so we'll treat this as
|
||||||
|
# a miss.
|
||||||
|
return
|
||||||
|
|
||||||
|
def _loads_v1(self, request, data):
|
||||||
|
try:
|
||||||
|
cached = pickle.loads(data)
|
||||||
|
except ValueError:
|
||||||
|
return
|
||||||
|
|
||||||
|
return self.prepare_response(request, cached)
|
||||||
|
|
||||||
|
def _loads_v2(self, request, data):
|
||||||
|
try:
|
||||||
|
cached = json.loads(zlib.decompress(data).decode("utf8"))
|
||||||
|
except ValueError:
|
||||||
|
return
|
||||||
|
|
||||||
|
# We need to decode the items that we've base64 encoded
|
||||||
|
cached["response"]["body"] = _b64_decode_bytes(
|
||||||
|
cached["response"]["body"]
|
||||||
|
)
|
||||||
|
cached["response"]["headers"] = dict(
|
||||||
|
(_b64_decode_str(k), _b64_decode_str(v))
|
||||||
|
for k, v in cached["response"]["headers"].items()
|
||||||
|
)
|
||||||
|
cached["response"]["reason"] = _b64_decode_str(
|
||||||
|
cached["response"]["reason"],
|
||||||
|
)
|
||||||
|
cached["vary"] = dict(
|
||||||
|
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
|
||||||
|
for k, v in cached["vary"].items()
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.prepare_response(request, cached)
|
|
@ -0,0 +1,21 @@
|
||||||
|
from .adapter import CacheControlAdapter
|
||||||
|
from .cache import DictCache
|
||||||
|
|
||||||
|
|
||||||
|
def CacheControl(sess,
|
||||||
|
cache=None,
|
||||||
|
cache_etags=True,
|
||||||
|
serializer=None,
|
||||||
|
heuristic=None):
|
||||||
|
|
||||||
|
cache = cache or DictCache()
|
||||||
|
adapter = CacheControlAdapter(
|
||||||
|
cache,
|
||||||
|
cache_etags=cache_etags,
|
||||||
|
serializer=serializer,
|
||||||
|
heuristic=heuristic,
|
||||||
|
)
|
||||||
|
sess.mount('http://', adapter)
|
||||||
|
sess.mount('https://', adapter)
|
||||||
|
|
||||||
|
return sess
|
|
@ -0,0 +1 @@
|
||||||
|
from .core import where
|
|
@ -0,0 +1,2 @@
|
||||||
|
from pip._vendor.certifi import where
|
||||||
|
print(where())
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,19 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
certifi.py
|
||||||
|
~~~~~~~~~~
|
||||||
|
|
||||||
|
This module returns the installation location of cacert.pem.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
def where():
|
||||||
|
f = os.path.split(__file__)[0]
|
||||||
|
|
||||||
|
return os.path.join(f, 'cacert.pem')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print(where())
|
|
@ -0,0 +1,7 @@
|
||||||
|
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||||
|
from .initialise import init, deinit, reinit
|
||||||
|
from .ansi import Fore, Back, Style, Cursor
|
||||||
|
from .ansitowin32 import AnsiToWin32
|
||||||
|
|
||||||
|
__version__ = '0.3.3'
|
||||||
|
|
|
@ -0,0 +1,99 @@
|
||||||
|
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||||
|
'''
|
||||||
|
This module generates ANSI character codes to printing colors to terminals.
|
||||||
|
See: http://en.wikipedia.org/wiki/ANSI_escape_code
|
||||||
|
'''
|
||||||
|
|
||||||
|
CSI = '\033['
|
||||||
|
OSC = '\033]'
|
||||||
|
BEL = '\007'
|
||||||
|
|
||||||
|
|
||||||
|
def code_to_chars(code):
|
||||||
|
return CSI + str(code) + 'm'
|
||||||
|
|
||||||
|
|
||||||
|
class AnsiCodes(object):
|
||||||
|
def __init__(self, codes):
|
||||||
|
for name in dir(codes):
|
||||||
|
if not name.startswith('_'):
|
||||||
|
value = getattr(codes, name)
|
||||||
|
setattr(self, name, code_to_chars(value))
|
||||||
|
|
||||||
|
|
||||||
|
class AnsiCursor(object):
|
||||||
|
def UP(self, n=1):
|
||||||
|
return CSI + str(n) + "A"
|
||||||
|
def DOWN(self, n=1):
|
||||||
|
return CSI + str(n) + "B"
|
||||||
|
def FORWARD(self, n=1):
|
||||||
|
return CSI + str(n) + "C"
|
||||||
|
def BACK(self, n=1):
|
||||||
|
return CSI + str(n) + "D"
|
||||||
|
def POS(self, x=1, y=1):
|
||||||
|
return CSI + str(y) + ";" + str(x) + "H"
|
||||||
|
|
||||||
|
def set_title(title):
|
||||||
|
return OSC + "2;" + title + BEL
|
||||||
|
|
||||||
|
def clear_screen(mode=2):
|
||||||
|
return CSI + str(mode) + "J"
|
||||||
|
|
||||||
|
def clear_line(mode=2):
|
||||||
|
return CSI + str(mode) + "K"
|
||||||
|
|
||||||
|
|
||||||
|
class AnsiFore:
|
||||||
|
BLACK = 30
|
||||||
|
RED = 31
|
||||||
|
GREEN = 32
|
||||||
|
YELLOW = 33
|
||||||
|
BLUE = 34
|
||||||
|
MAGENTA = 35
|
||||||
|
CYAN = 36
|
||||||
|
WHITE = 37
|
||||||
|
RESET = 39
|
||||||
|
|
||||||
|
# These are fairly well supported, but not part of the standard.
|
||||||
|
LIGHTBLACK_EX = 90
|
||||||
|
LIGHTRED_EX = 91
|
||||||
|
LIGHTGREEN_EX = 92
|
||||||
|
LIGHTYELLOW_EX = 93
|
||||||
|
LIGHTBLUE_EX = 94
|
||||||
|
LIGHTMAGENTA_EX = 95
|
||||||
|
LIGHTCYAN_EX = 96
|
||||||
|
LIGHTWHITE_EX = 97
|
||||||
|
|
||||||
|
|
||||||
|
class AnsiBack:
|
||||||
|
BLACK = 40
|
||||||
|
RED = 41
|
||||||
|
GREEN = 42
|
||||||
|
YELLOW = 43
|
||||||
|
BLUE = 44
|
||||||
|
MAGENTA = 45
|
||||||
|
CYAN = 46
|
||||||
|
WHITE = 47
|
||||||
|
RESET = 49
|
||||||
|
|
||||||
|
# These are fairly well supported, but not part of the standard.
|
||||||
|
LIGHTBLACK_EX = 100
|
||||||
|
LIGHTRED_EX = 101
|
||||||
|
LIGHTGREEN_EX = 102
|
||||||
|
LIGHTYELLOW_EX = 103
|
||||||
|
LIGHTBLUE_EX = 104
|
||||||
|
LIGHTMAGENTA_EX = 105
|
||||||
|
LIGHTCYAN_EX = 106
|
||||||
|
LIGHTWHITE_EX = 107
|
||||||
|
|
||||||
|
|
||||||
|
class AnsiStyle:
|
||||||
|
BRIGHT = 1
|
||||||
|
DIM = 2
|
||||||
|
NORMAL = 22
|
||||||
|
RESET_ALL = 0
|
||||||
|
|
||||||
|
Fore = AnsiCodes( AnsiFore )
|
||||||
|
Back = AnsiCodes( AnsiBack )
|
||||||
|
Style = AnsiCodes( AnsiStyle )
|
||||||
|
Cursor = AnsiCursor()
|
|
@ -0,0 +1,228 @@
|
||||||
|
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style
|
||||||
|
from .winterm import WinTerm, WinColor, WinStyle
|
||||||
|
from .win32 import windll
|
||||||
|
|
||||||
|
|
||||||
|
winterm = None
|
||||||
|
if windll is not None:
|
||||||
|
winterm = WinTerm()
|
||||||
|
|
||||||
|
|
||||||
|
def is_a_tty(stream):
|
||||||
|
return hasattr(stream, 'isatty') and stream.isatty()
|
||||||
|
|
||||||
|
|
||||||
|
class StreamWrapper(object):
|
||||||
|
'''
|
||||||
|
Wraps a stream (such as stdout), acting as a transparent proxy for all
|
||||||
|
attribute access apart from method 'write()', which is delegated to our
|
||||||
|
Converter instance.
|
||||||
|
'''
|
||||||
|
def __init__(self, wrapped, converter):
|
||||||
|
# double-underscore everything to prevent clashes with names of
|
||||||
|
# attributes on the wrapped stream object.
|
||||||
|
self.__wrapped = wrapped
|
||||||
|
self.__convertor = converter
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self.__wrapped, name)
|
||||||
|
|
||||||
|
def write(self, text):
|
||||||
|
self.__convertor.write(text)
|
||||||
|
|
||||||
|
|
||||||
|
class AnsiToWin32(object):
|
||||||
|
'''
|
||||||
|
Implements a 'write()' method which, on Windows, will strip ANSI character
|
||||||
|
sequences from the text, and if outputting to a tty, will convert them into
|
||||||
|
win32 function calls.
|
||||||
|
'''
|
||||||
|
ANSI_CSI_RE = re.compile('\033\[((?:\d|;)*)([a-zA-Z])') # Control Sequence Introducer
|
||||||
|
ANSI_OSC_RE = re.compile('\033\]((?:.|;)*?)(\x07)') # Operating System Command
|
||||||
|
|
||||||
|
def __init__(self, wrapped, convert=None, strip=None, autoreset=False):
|
||||||
|
# The wrapped stream (normally sys.stdout or sys.stderr)
|
||||||
|
self.wrapped = wrapped
|
||||||
|
|
||||||
|
# should we reset colors to defaults after every .write()
|
||||||
|
self.autoreset = autoreset
|
||||||
|
|
||||||
|
# create the proxy wrapping our output stream
|
||||||
|
self.stream = StreamWrapper(wrapped, self)
|
||||||
|
|
||||||
|
on_windows = os.name == 'nt'
|
||||||
|
on_emulated_windows = on_windows and 'TERM' in os.environ
|
||||||
|
|
||||||
|
# should we strip ANSI sequences from our output?
|
||||||
|
if strip is None:
|
||||||
|
strip = on_windows and not on_emulated_windows
|
||||||
|
self.strip = strip
|
||||||
|
|
||||||
|
# should we should convert ANSI sequences into win32 calls?
|
||||||
|
if convert is None:
|
||||||
|
convert = on_windows and not wrapped.closed and not on_emulated_windows and is_a_tty(wrapped)
|
||||||
|
self.convert = convert
|
||||||
|
|
||||||
|
# dict of ansi codes to win32 functions and parameters
|
||||||
|
self.win32_calls = self.get_win32_calls()
|
||||||
|
|
||||||
|
# are we wrapping stderr?
|
||||||
|
self.on_stderr = self.wrapped is sys.stderr
|
||||||
|
|
||||||
|
def should_wrap(self):
|
||||||
|
'''
|
||||||
|
True if this class is actually needed. If false, then the output
|
||||||
|
stream will not be affected, nor will win32 calls be issued, so
|
||||||
|
wrapping stdout is not actually required. This will generally be
|
||||||
|
False on non-Windows platforms, unless optional functionality like
|
||||||
|
autoreset has been requested using kwargs to init()
|
||||||
|
'''
|
||||||
|
return self.convert or self.strip or self.autoreset
|
||||||
|
|
||||||
|
def get_win32_calls(self):
|
||||||
|
if self.convert and winterm:
|
||||||
|
return {
|
||||||
|
AnsiStyle.RESET_ALL: (winterm.reset_all, ),
|
||||||
|
AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT),
|
||||||
|
AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL),
|
||||||
|
AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL),
|
||||||
|
AnsiFore.BLACK: (winterm.fore, WinColor.BLACK),
|
||||||
|
AnsiFore.RED: (winterm.fore, WinColor.RED),
|
||||||
|
AnsiFore.GREEN: (winterm.fore, WinColor.GREEN),
|
||||||
|
AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW),
|
||||||
|
AnsiFore.BLUE: (winterm.fore, WinColor.BLUE),
|
||||||
|
AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA),
|
||||||
|
AnsiFore.CYAN: (winterm.fore, WinColor.CYAN),
|
||||||
|
AnsiFore.WHITE: (winterm.fore, WinColor.GREY),
|
||||||
|
AnsiFore.RESET: (winterm.fore, ),
|
||||||
|
AnsiFore.LIGHTBLACK_EX: (winterm.fore, WinColor.BLACK, True),
|
||||||
|
AnsiFore.LIGHTRED_EX: (winterm.fore, WinColor.RED, True),
|
||||||
|
AnsiFore.LIGHTGREEN_EX: (winterm.fore, WinColor.GREEN, True),
|
||||||
|
AnsiFore.LIGHTYELLOW_EX: (winterm.fore, WinColor.YELLOW, True),
|
||||||
|
AnsiFore.LIGHTBLUE_EX: (winterm.fore, WinColor.BLUE, True),
|
||||||
|
AnsiFore.LIGHTMAGENTA_EX: (winterm.fore, WinColor.MAGENTA, True),
|
||||||
|
AnsiFore.LIGHTCYAN_EX: (winterm.fore, WinColor.CYAN, True),
|
||||||
|
AnsiFore.LIGHTWHITE_EX: (winterm.fore, WinColor.GREY, True),
|
||||||
|
AnsiBack.BLACK: (winterm.back, WinColor.BLACK),
|
||||||
|
AnsiBack.RED: (winterm.back, WinColor.RED),
|
||||||
|
AnsiBack.GREEN: (winterm.back, WinColor.GREEN),
|
||||||
|
AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW),
|
||||||
|
AnsiBack.BLUE: (winterm.back, WinColor.BLUE),
|
||||||
|
AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA),
|
||||||
|
AnsiBack.CYAN: (winterm.back, WinColor.CYAN),
|
||||||
|
AnsiBack.WHITE: (winterm.back, WinColor.GREY),
|
||||||
|
AnsiBack.RESET: (winterm.back, ),
|
||||||
|
AnsiBack.LIGHTBLACK_EX: (winterm.back, WinColor.BLACK, True),
|
||||||
|
AnsiBack.LIGHTRED_EX: (winterm.back, WinColor.RED, True),
|
||||||
|
AnsiBack.LIGHTGREEN_EX: (winterm.back, WinColor.GREEN, True),
|
||||||
|
AnsiBack.LIGHTYELLOW_EX: (winterm.back, WinColor.YELLOW, True),
|
||||||
|
AnsiBack.LIGHTBLUE_EX: (winterm.back, WinColor.BLUE, True),
|
||||||
|
AnsiBack.LIGHTMAGENTA_EX: (winterm.back, WinColor.MAGENTA, True),
|
||||||
|
AnsiBack.LIGHTCYAN_EX: (winterm.back, WinColor.CYAN, True),
|
||||||
|
AnsiBack.LIGHTWHITE_EX: (winterm.back, WinColor.GREY, True),
|
||||||
|
}
|
||||||
|
return dict()
|
||||||
|
|
||||||
|
def write(self, text):
|
||||||
|
if self.strip or self.convert:
|
||||||
|
self.write_and_convert(text)
|
||||||
|
else:
|
||||||
|
self.wrapped.write(text)
|
||||||
|
self.wrapped.flush()
|
||||||
|
if self.autoreset:
|
||||||
|
self.reset_all()
|
||||||
|
|
||||||
|
|
||||||
|
def reset_all(self):
|
||||||
|
if self.convert:
|
||||||
|
self.call_win32('m', (0,))
|
||||||
|
elif not self.wrapped.closed and is_a_tty(self.wrapped):
|
||||||
|
self.wrapped.write(Style.RESET_ALL)
|
||||||
|
|
||||||
|
|
||||||
|
def write_and_convert(self, text):
|
||||||
|
'''
|
||||||
|
Write the given text to our wrapped stream, stripping any ANSI
|
||||||
|
sequences from the text, and optionally converting them into win32
|
||||||
|
calls.
|
||||||
|
'''
|
||||||
|
cursor = 0
|
||||||
|
text = self.convert_osc(text)
|
||||||
|
for match in self.ANSI_CSI_RE.finditer(text):
|
||||||
|
start, end = match.span()
|
||||||
|
self.write_plain_text(text, cursor, start)
|
||||||
|
self.convert_ansi(*match.groups())
|
||||||
|
cursor = end
|
||||||
|
self.write_plain_text(text, cursor, len(text))
|
||||||
|
|
||||||
|
|
||||||
|
def write_plain_text(self, text, start, end):
|
||||||
|
if start < end:
|
||||||
|
self.wrapped.write(text[start:end])
|
||||||
|
self.wrapped.flush()
|
||||||
|
|
||||||
|
|
||||||
|
def convert_ansi(self, paramstring, command):
|
||||||
|
if self.convert:
|
||||||
|
params = self.extract_params(command, paramstring)
|
||||||
|
self.call_win32(command, params)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_params(self, command, paramstring):
|
||||||
|
if command in 'Hf':
|
||||||
|
params = tuple(int(p) if len(p) != 0 else 1 for p in paramstring.split(';'))
|
||||||
|
while len(params) < 2:
|
||||||
|
# defaults:
|
||||||
|
params = params + (1,)
|
||||||
|
else:
|
||||||
|
params = tuple(int(p) for p in paramstring.split(';') if len(p) != 0)
|
||||||
|
if len(params) == 0:
|
||||||
|
# defaults:
|
||||||
|
if command in 'JKm':
|
||||||
|
params = (0,)
|
||||||
|
elif command in 'ABCD':
|
||||||
|
params = (1,)
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def call_win32(self, command, params):
|
||||||
|
if command == 'm':
|
||||||
|
for param in params:
|
||||||
|
if param in self.win32_calls:
|
||||||
|
func_args = self.win32_calls[param]
|
||||||
|
func = func_args[0]
|
||||||
|
args = func_args[1:]
|
||||||
|
kwargs = dict(on_stderr=self.on_stderr)
|
||||||
|
func(*args, **kwargs)
|
||||||
|
elif command in 'J':
|
||||||
|
winterm.erase_screen(params[0], on_stderr=self.on_stderr)
|
||||||
|
elif command in 'K':
|
||||||
|
winterm.erase_line(params[0], on_stderr=self.on_stderr)
|
||||||
|
elif command in 'Hf': # cursor position - absolute
|
||||||
|
winterm.set_cursor_position(params, on_stderr=self.on_stderr)
|
||||||
|
elif command in 'ABCD': # cursor position - relative
|
||||||
|
n = params[0]
|
||||||
|
# A - up, B - down, C - forward, D - back
|
||||||
|
x, y = {'A': (0, -n), 'B': (0, n), 'C': (n, 0), 'D': (-n, 0)}[command]
|
||||||
|
winterm.cursor_adjust(x, y, on_stderr=self.on_stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_osc(self, text):
|
||||||
|
for match in self.ANSI_OSC_RE.finditer(text):
|
||||||
|
start, end = match.span()
|
||||||
|
text = text[:start] + text[end:]
|
||||||
|
paramstring, command = match.groups()
|
||||||
|
if command in '\x07': # \x07 = BEL
|
||||||
|
params = paramstring.split(";")
|
||||||
|
# 0 - change title and icon (we will only change title)
|
||||||
|
# 1 - change icon (we don't support this)
|
||||||
|
# 2 - change title
|
||||||
|
if params[0] in '02':
|
||||||
|
winterm.set_title(params[1])
|
||||||
|
return text
|
|
@ -0,0 +1,66 @@
|
||||||
|
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||||
|
import atexit
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from .ansitowin32 import AnsiToWin32
|
||||||
|
|
||||||
|
|
||||||
|
orig_stdout = sys.stdout
|
||||||
|
orig_stderr = sys.stderr
|
||||||
|
|
||||||
|
wrapped_stdout = sys.stdout
|
||||||
|
wrapped_stderr = sys.stderr
|
||||||
|
|
||||||
|
atexit_done = False
|
||||||
|
|
||||||
|
|
||||||
|
def reset_all():
|
||||||
|
AnsiToWin32(orig_stdout).reset_all()
|
||||||
|
|
||||||
|
|
||||||
|
def init(autoreset=False, convert=None, strip=None, wrap=True):
|
||||||
|
|
||||||
|
if not wrap and any([autoreset, convert, strip]):
|
||||||
|
raise ValueError('wrap=False conflicts with any other arg=True')
|
||||||
|
|
||||||
|
global wrapped_stdout, wrapped_stderr
|
||||||
|
if sys.stdout is None:
|
||||||
|
wrapped_stdout = None
|
||||||
|
else:
|
||||||
|
sys.stdout = wrapped_stdout = \
|
||||||
|
wrap_stream(orig_stdout, convert, strip, autoreset, wrap)
|
||||||
|
if sys.stderr is None:
|
||||||
|
wrapped_stderr = None
|
||||||
|
else:
|
||||||
|
sys.stderr = wrapped_stderr = \
|
||||||
|
wrap_stream(orig_stderr, convert, strip, autoreset, wrap)
|
||||||
|
|
||||||
|
global atexit_done
|
||||||
|
if not atexit_done:
|
||||||
|
atexit.register(reset_all)
|
||||||
|
atexit_done = True
|
||||||
|
|
||||||
|
|
||||||
|
def deinit():
|
||||||
|
if orig_stdout is not None:
|
||||||
|
sys.stdout = orig_stdout
|
||||||
|
if orig_stderr is not None:
|
||||||
|
sys.stderr = orig_stderr
|
||||||
|
|
||||||
|
|
||||||
|
def reinit():
|
||||||
|
if wrapped_stdout is not None:
|
||||||
|
sys.stdout = wrapped_stdout
|
||||||
|
if wrapped_stderr is not None:
|
||||||
|
sys.stderr = wrapped_stderr
|
||||||
|
|
||||||
|
|
||||||
|
def wrap_stream(stream, convert, strip, autoreset, wrap):
|
||||||
|
if wrap:
|
||||||
|
wrapper = AnsiToWin32(stream,
|
||||||
|
convert=convert, strip=strip, autoreset=autoreset)
|
||||||
|
if wrapper.should_wrap():
|
||||||
|
stream = wrapper.stream
|
||||||
|
return stream
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,146 @@
|
||||||
|
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||||
|
|
||||||
|
# from winbase.h
|
||||||
|
STDOUT = -11
|
||||||
|
STDERR = -12
|
||||||
|
|
||||||
|
try:
|
||||||
|
import ctypes
|
||||||
|
from ctypes import LibraryLoader
|
||||||
|
windll = LibraryLoader(ctypes.WinDLL)
|
||||||
|
from ctypes import wintypes
|
||||||
|
except (AttributeError, ImportError):
|
||||||
|
windll = None
|
||||||
|
SetConsoleTextAttribute = lambda *_: None
|
||||||
|
else:
|
||||||
|
from ctypes import byref, Structure, c_char, POINTER
|
||||||
|
|
||||||
|
COORD = wintypes._COORD
|
||||||
|
|
||||||
|
class CONSOLE_SCREEN_BUFFER_INFO(Structure):
|
||||||
|
"""struct in wincon.h."""
|
||||||
|
_fields_ = [
|
||||||
|
("dwSize", COORD),
|
||||||
|
("dwCursorPosition", COORD),
|
||||||
|
("wAttributes", wintypes.WORD),
|
||||||
|
("srWindow", wintypes.SMALL_RECT),
|
||||||
|
("dwMaximumWindowSize", COORD),
|
||||||
|
]
|
||||||
|
def __str__(self):
|
||||||
|
return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % (
|
||||||
|
self.dwSize.Y, self.dwSize.X
|
||||||
|
, self.dwCursorPosition.Y, self.dwCursorPosition.X
|
||||||
|
, self.wAttributes
|
||||||
|
, self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right
|
||||||
|
, self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X
|
||||||
|
)
|
||||||
|
|
||||||
|
_GetStdHandle = windll.kernel32.GetStdHandle
|
||||||
|
_GetStdHandle.argtypes = [
|
||||||
|
wintypes.DWORD,
|
||||||
|
]
|
||||||
|
_GetStdHandle.restype = wintypes.HANDLE
|
||||||
|
|
||||||
|
_GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo
|
||||||
|
_GetConsoleScreenBufferInfo.argtypes = [
|
||||||
|
wintypes.HANDLE,
|
||||||
|
POINTER(CONSOLE_SCREEN_BUFFER_INFO),
|
||||||
|
]
|
||||||
|
_GetConsoleScreenBufferInfo.restype = wintypes.BOOL
|
||||||
|
|
||||||
|
_SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute
|
||||||
|
_SetConsoleTextAttribute.argtypes = [
|
||||||
|
wintypes.HANDLE,
|
||||||
|
wintypes.WORD,
|
||||||
|
]
|
||||||
|
_SetConsoleTextAttribute.restype = wintypes.BOOL
|
||||||
|
|
||||||
|
_SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition
|
||||||
|
_SetConsoleCursorPosition.argtypes = [
|
||||||
|
wintypes.HANDLE,
|
||||||
|
COORD,
|
||||||
|
]
|
||||||
|
_SetConsoleCursorPosition.restype = wintypes.BOOL
|
||||||
|
|
||||||
|
_FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA
|
||||||
|
_FillConsoleOutputCharacterA.argtypes = [
|
||||||
|
wintypes.HANDLE,
|
||||||
|
c_char,
|
||||||
|
wintypes.DWORD,
|
||||||
|
COORD,
|
||||||
|
POINTER(wintypes.DWORD),
|
||||||
|
]
|
||||||
|
_FillConsoleOutputCharacterA.restype = wintypes.BOOL
|
||||||
|
|
||||||
|
_FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute
|
||||||
|
_FillConsoleOutputAttribute.argtypes = [
|
||||||
|
wintypes.HANDLE,
|
||||||
|
wintypes.WORD,
|
||||||
|
wintypes.DWORD,
|
||||||
|
COORD,
|
||||||
|
POINTER(wintypes.DWORD),
|
||||||
|
]
|
||||||
|
_FillConsoleOutputAttribute.restype = wintypes.BOOL
|
||||||
|
|
||||||
|
_SetConsoleTitleW = windll.kernel32.SetConsoleTitleA
|
||||||
|
_SetConsoleTitleW.argtypes = [
|
||||||
|
wintypes.LPCSTR
|
||||||
|
]
|
||||||
|
_SetConsoleTitleW.restype = wintypes.BOOL
|
||||||
|
|
||||||
|
handles = {
|
||||||
|
STDOUT: _GetStdHandle(STDOUT),
|
||||||
|
STDERR: _GetStdHandle(STDERR),
|
||||||
|
}
|
||||||
|
|
||||||
|
def GetConsoleScreenBufferInfo(stream_id=STDOUT):
|
||||||
|
handle = handles[stream_id]
|
||||||
|
csbi = CONSOLE_SCREEN_BUFFER_INFO()
|
||||||
|
success = _GetConsoleScreenBufferInfo(
|
||||||
|
handle, byref(csbi))
|
||||||
|
return csbi
|
||||||
|
|
||||||
|
def SetConsoleTextAttribute(stream_id, attrs):
|
||||||
|
handle = handles[stream_id]
|
||||||
|
return _SetConsoleTextAttribute(handle, attrs)
|
||||||
|
|
||||||
|
def SetConsoleCursorPosition(stream_id, position, adjust=True):
|
||||||
|
position = COORD(*position)
|
||||||
|
# If the position is out of range, do nothing.
|
||||||
|
if position.Y <= 0 or position.X <= 0:
|
||||||
|
return
|
||||||
|
# Adjust for Windows' SetConsoleCursorPosition:
|
||||||
|
# 1. being 0-based, while ANSI is 1-based.
|
||||||
|
# 2. expecting (x,y), while ANSI uses (y,x).
|
||||||
|
adjusted_position = COORD(position.Y - 1, position.X - 1)
|
||||||
|
if adjust:
|
||||||
|
# Adjust for viewport's scroll position
|
||||||
|
sr = GetConsoleScreenBufferInfo(STDOUT).srWindow
|
||||||
|
adjusted_position.Y += sr.Top
|
||||||
|
adjusted_position.X += sr.Left
|
||||||
|
# Resume normal processing
|
||||||
|
handle = handles[stream_id]
|
||||||
|
return _SetConsoleCursorPosition(handle, adjusted_position)
|
||||||
|
|
||||||
|
def FillConsoleOutputCharacter(stream_id, char, length, start):
|
||||||
|
handle = handles[stream_id]
|
||||||
|
char = c_char(char.encode())
|
||||||
|
length = wintypes.DWORD(length)
|
||||||
|
num_written = wintypes.DWORD(0)
|
||||||
|
# Note that this is hard-coded for ANSI (vs wide) bytes.
|
||||||
|
success = _FillConsoleOutputCharacterA(
|
||||||
|
handle, char, length, start, byref(num_written))
|
||||||
|
return num_written.value
|
||||||
|
|
||||||
|
def FillConsoleOutputAttribute(stream_id, attr, length, start):
|
||||||
|
''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )'''
|
||||||
|
handle = handles[stream_id]
|
||||||
|
attribute = wintypes.WORD(attr)
|
||||||
|
length = wintypes.DWORD(length)
|
||||||
|
num_written = wintypes.DWORD(0)
|
||||||
|
# Note that this is hard-coded for ANSI (vs wide) bytes.
|
||||||
|
return _FillConsoleOutputAttribute(
|
||||||
|
handle, attribute, length, start, byref(num_written))
|
||||||
|
|
||||||
|
def SetConsoleTitle(title):
|
||||||
|
return _SetConsoleTitleW(title)
|
|
@ -0,0 +1,151 @@
|
||||||
|
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||||
|
from . import win32
|
||||||
|
|
||||||
|
|
||||||
|
# from wincon.h
|
||||||
|
class WinColor(object):
|
||||||
|
BLACK = 0
|
||||||
|
BLUE = 1
|
||||||
|
GREEN = 2
|
||||||
|
CYAN = 3
|
||||||
|
RED = 4
|
||||||
|
MAGENTA = 5
|
||||||
|
YELLOW = 6
|
||||||
|
GREY = 7
|
||||||
|
|
||||||
|
# from wincon.h
|
||||||
|
class WinStyle(object):
|
||||||
|
NORMAL = 0x00 # dim text, dim background
|
||||||
|
BRIGHT = 0x08 # bright text, dim background
|
||||||
|
BRIGHT_BACKGROUND = 0x80 # dim text, bright background
|
||||||
|
|
||||||
|
class WinTerm(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._default = win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes
|
||||||
|
self.set_attrs(self._default)
|
||||||
|
self._default_fore = self._fore
|
||||||
|
self._default_back = self._back
|
||||||
|
self._default_style = self._style
|
||||||
|
|
||||||
|
def get_attrs(self):
|
||||||
|
return self._fore + self._back * 16 + self._style
|
||||||
|
|
||||||
|
def set_attrs(self, value):
|
||||||
|
self._fore = value & 7
|
||||||
|
self._back = (value >> 4) & 7
|
||||||
|
self._style = value & (WinStyle.BRIGHT | WinStyle.BRIGHT_BACKGROUND)
|
||||||
|
|
||||||
|
def reset_all(self, on_stderr=None):
|
||||||
|
self.set_attrs(self._default)
|
||||||
|
self.set_console(attrs=self._default)
|
||||||
|
|
||||||
|
def fore(self, fore=None, light=False, on_stderr=False):
|
||||||
|
if fore is None:
|
||||||
|
fore = self._default_fore
|
||||||
|
self._fore = fore
|
||||||
|
if light:
|
||||||
|
self._style |= WinStyle.BRIGHT
|
||||||
|
self.set_console(on_stderr=on_stderr)
|
||||||
|
|
||||||
|
def back(self, back=None, light=False, on_stderr=False):
|
||||||
|
if back is None:
|
||||||
|
back = self._default_back
|
||||||
|
self._back = back
|
||||||
|
if light:
|
||||||
|
self._style |= WinStyle.BRIGHT_BACKGROUND
|
||||||
|
self.set_console(on_stderr=on_stderr)
|
||||||
|
|
||||||
|
def style(self, style=None, on_stderr=False):
|
||||||
|
if style is None:
|
||||||
|
style = self._default_style
|
||||||
|
self._style = style
|
||||||
|
self.set_console(on_stderr=on_stderr)
|
||||||
|
|
||||||
|
def set_console(self, attrs=None, on_stderr=False):
|
||||||
|
if attrs is None:
|
||||||
|
attrs = self.get_attrs()
|
||||||
|
handle = win32.STDOUT
|
||||||
|
if on_stderr:
|
||||||
|
handle = win32.STDERR
|
||||||
|
win32.SetConsoleTextAttribute(handle, attrs)
|
||||||
|
|
||||||
|
def get_position(self, handle):
|
||||||
|
position = win32.GetConsoleScreenBufferInfo(handle).dwCursorPosition
|
||||||
|
# Because Windows coordinates are 0-based,
|
||||||
|
# and win32.SetConsoleCursorPosition expects 1-based.
|
||||||
|
position.X += 1
|
||||||
|
position.Y += 1
|
||||||
|
return position
|
||||||
|
|
||||||
|
def set_cursor_position(self, position=None, on_stderr=False):
|
||||||
|
if position is None:
|
||||||
|
#I'm not currently tracking the position, so there is no default.
|
||||||
|
#position = self.get_position()
|
||||||
|
return
|
||||||
|
handle = win32.STDOUT
|
||||||
|
if on_stderr:
|
||||||
|
handle = win32.STDERR
|
||||||
|
win32.SetConsoleCursorPosition(handle, position)
|
||||||
|
|
||||||
|
def cursor_adjust(self, x, y, on_stderr=False):
|
||||||
|
handle = win32.STDOUT
|
||||||
|
if on_stderr:
|
||||||
|
handle = win32.STDERR
|
||||||
|
position = self.get_position(handle)
|
||||||
|
adjusted_position = (position.Y + y, position.X + x)
|
||||||
|
win32.SetConsoleCursorPosition(handle, adjusted_position, adjust=False)
|
||||||
|
|
||||||
|
def erase_screen(self, mode=0, on_stderr=False):
|
||||||
|
# 0 should clear from the cursor to the end of the screen.
|
||||||
|
# 1 should clear from the cursor to the beginning of the screen.
|
||||||
|
# 2 should clear the entire screen, and move cursor to (1,1)
|
||||||
|
handle = win32.STDOUT
|
||||||
|
if on_stderr:
|
||||||
|
handle = win32.STDERR
|
||||||
|
csbi = win32.GetConsoleScreenBufferInfo(handle)
|
||||||
|
# get the number of character cells in the current buffer
|
||||||
|
cells_in_screen = csbi.dwSize.X * csbi.dwSize.Y
|
||||||
|
# get number of character cells before current cursor position
|
||||||
|
cells_before_cursor = csbi.dwSize.X * csbi.dwCursorPosition.Y + csbi.dwCursorPosition.X
|
||||||
|
if mode == 0:
|
||||||
|
from_coord = csbi.dwCursorPosition
|
||||||
|
cells_to_erase = cells_in_screen - cells_before_cursor
|
||||||
|
if mode == 1:
|
||||||
|
from_coord = win32.COORD(0, 0)
|
||||||
|
cells_to_erase = cells_before_cursor
|
||||||
|
elif mode == 2:
|
||||||
|
from_coord = win32.COORD(0, 0)
|
||||||
|
cells_to_erase = cells_in_screen
|
||||||
|
# fill the entire screen with blanks
|
||||||
|
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
|
||||||
|
# now set the buffer's attributes accordingly
|
||||||
|
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
|
||||||
|
if mode == 2:
|
||||||
|
# put the cursor where needed
|
||||||
|
win32.SetConsoleCursorPosition(handle, (1, 1))
|
||||||
|
|
||||||
|
def erase_line(self, mode=0, on_stderr=False):
|
||||||
|
# 0 should clear from the cursor to the end of the line.
|
||||||
|
# 1 should clear from the cursor to the beginning of the line.
|
||||||
|
# 2 should clear the entire line.
|
||||||
|
handle = win32.STDOUT
|
||||||
|
if on_stderr:
|
||||||
|
handle = win32.STDERR
|
||||||
|
csbi = win32.GetConsoleScreenBufferInfo(handle)
|
||||||
|
if mode == 0:
|
||||||
|
from_coord = csbi.dwCursorPosition
|
||||||
|
cells_to_erase = csbi.dwSize.X - csbi.dwCursorPosition.X
|
||||||
|
if mode == 1:
|
||||||
|
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
|
||||||
|
cells_to_erase = csbi.dwCursorPosition.X
|
||||||
|
elif mode == 2:
|
||||||
|
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
|
||||||
|
cells_to_erase = csbi.dwSize.X
|
||||||
|
# fill the entire screen with blanks
|
||||||
|
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
|
||||||
|
# now set the buffer's attributes accordingly
|
||||||
|
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
|
||||||
|
|
||||||
|
def set_title(self, title):
|
||||||
|
win32.SetConsoleTitle(title)
|
|
@ -0,0 +1,23 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2012-2014 Vinay Sajip.
|
||||||
|
# Licensed to the Python Software Foundation under a contributor agreement.
|
||||||
|
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||||
|
#
|
||||||
|
import logging
|
||||||
|
|
||||||
|
__version__ = '0.2.0'
|
||||||
|
|
||||||
|
class DistlibException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
from logging import NullHandler
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
class NullHandler(logging.Handler):
|
||||||
|
def handle(self, record): pass
|
||||||
|
def emit(self, record): pass
|
||||||
|
def createLock(self): self.lock = None
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.addHandler(NullHandler())
|
|
@ -0,0 +1,6 @@
|
||||||
|
"""Modules copied from Python 3 standard libraries, for internal use only.
|
||||||
|
|
||||||
|
Individual classes and functions are found in d2._backport.misc. Intended
|
||||||
|
usage is to always import things missing from 3.1 from that module: the
|
||||||
|
built-in/stdlib objects will be used if found.
|
||||||
|
"""
|
|
@ -0,0 +1,41 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2012 The Python Software Foundation.
|
||||||
|
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||||
|
#
|
||||||
|
"""Backports for individual classes and functions."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
__all__ = ['cache_from_source', 'callable', 'fsencode']
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
from imp import cache_from_source
|
||||||
|
except ImportError:
|
||||||
|
def cache_from_source(py_file, debug=__debug__):
|
||||||
|
ext = debug and 'c' or 'o'
|
||||||
|
return py_file + ext
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
callable = callable
|
||||||
|
except NameError:
|
||||||
|
from collections import Callable
|
||||||
|
|
||||||
|
def callable(obj):
|
||||||
|
return isinstance(obj, Callable)
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
fsencode = os.fsencode
|
||||||
|
except AttributeError:
|
||||||
|
def fsencode(filename):
|
||||||
|
if isinstance(filename, bytes):
|
||||||
|
return filename
|
||||||
|
elif isinstance(filename, str):
|
||||||
|
return filename.encode(sys.getfilesystemencoding())
|
||||||
|
else:
|
||||||
|
raise TypeError("expect bytes or str, not %s" %
|
||||||
|
type(filename).__name__)
|
|
@ -0,0 +1,761 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2012 The Python Software Foundation.
|
||||||
|
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||||
|
#
|
||||||
|
"""Utility functions for copying and archiving files and directory trees.
|
||||||
|
|
||||||
|
XXX The functions here don't copy the resource fork or other metadata on Mac.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import stat
|
||||||
|
from os.path import abspath
|
||||||
|
import fnmatch
|
||||||
|
import collections
|
||||||
|
import errno
|
||||||
|
from . import tarfile
|
||||||
|
|
||||||
|
try:
|
||||||
|
import bz2
|
||||||
|
_BZ2_SUPPORTED = True
|
||||||
|
except ImportError:
|
||||||
|
_BZ2_SUPPORTED = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
from pwd import getpwnam
|
||||||
|
except ImportError:
|
||||||
|
getpwnam = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from grp import getgrnam
|
||||||
|
except ImportError:
|
||||||
|
getgrnam = None
|
||||||
|
|
||||||
|
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
|
||||||
|
"copytree", "move", "rmtree", "Error", "SpecialFileError",
|
||||||
|
"ExecError", "make_archive", "get_archive_formats",
|
||||||
|
"register_archive_format", "unregister_archive_format",
|
||||||
|
"get_unpack_formats", "register_unpack_format",
|
||||||
|
"unregister_unpack_format", "unpack_archive", "ignore_patterns"]
|
||||||
|
|
||||||
|
class Error(EnvironmentError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class SpecialFileError(EnvironmentError):
|
||||||
|
"""Raised when trying to do a kind of operation (e.g. copying) which is
|
||||||
|
not supported on a special file (e.g. a named pipe)"""
|
||||||
|
|
||||||
|
class ExecError(EnvironmentError):
|
||||||
|
"""Raised when a command could not be executed"""
|
||||||
|
|
||||||
|
class ReadError(EnvironmentError):
|
||||||
|
"""Raised when an archive cannot be read"""
|
||||||
|
|
||||||
|
class RegistryError(Exception):
|
||||||
|
"""Raised when a registery operation with the archiving
|
||||||
|
and unpacking registeries fails"""
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
WindowsError
|
||||||
|
except NameError:
|
||||||
|
WindowsError = None
|
||||||
|
|
||||||
|
def copyfileobj(fsrc, fdst, length=16*1024):
|
||||||
|
"""copy data from file-like object fsrc to file-like object fdst"""
|
||||||
|
while 1:
|
||||||
|
buf = fsrc.read(length)
|
||||||
|
if not buf:
|
||||||
|
break
|
||||||
|
fdst.write(buf)
|
||||||
|
|
||||||
|
def _samefile(src, dst):
|
||||||
|
# Macintosh, Unix.
|
||||||
|
if hasattr(os.path, 'samefile'):
|
||||||
|
try:
|
||||||
|
return os.path.samefile(src, dst)
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# All other platforms: check for same pathname.
|
||||||
|
return (os.path.normcase(os.path.abspath(src)) ==
|
||||||
|
os.path.normcase(os.path.abspath(dst)))
|
||||||
|
|
||||||
|
def copyfile(src, dst):
|
||||||
|
"""Copy data from src to dst"""
|
||||||
|
if _samefile(src, dst):
|
||||||
|
raise Error("`%s` and `%s` are the same file" % (src, dst))
|
||||||
|
|
||||||
|
for fn in [src, dst]:
|
||||||
|
try:
|
||||||
|
st = os.stat(fn)
|
||||||
|
except OSError:
|
||||||
|
# File most likely does not exist
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# XXX What about other special files? (sockets, devices...)
|
||||||
|
if stat.S_ISFIFO(st.st_mode):
|
||||||
|
raise SpecialFileError("`%s` is a named pipe" % fn)
|
||||||
|
|
||||||
|
with open(src, 'rb') as fsrc:
|
||||||
|
with open(dst, 'wb') as fdst:
|
||||||
|
copyfileobj(fsrc, fdst)
|
||||||
|
|
||||||
|
def copymode(src, dst):
|
||||||
|
"""Copy mode bits from src to dst"""
|
||||||
|
if hasattr(os, 'chmod'):
|
||||||
|
st = os.stat(src)
|
||||||
|
mode = stat.S_IMODE(st.st_mode)
|
||||||
|
os.chmod(dst, mode)
|
||||||
|
|
||||||
|
def copystat(src, dst):
|
||||||
|
"""Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
|
||||||
|
st = os.stat(src)
|
||||||
|
mode = stat.S_IMODE(st.st_mode)
|
||||||
|
if hasattr(os, 'utime'):
|
||||||
|
os.utime(dst, (st.st_atime, st.st_mtime))
|
||||||
|
if hasattr(os, 'chmod'):
|
||||||
|
os.chmod(dst, mode)
|
||||||
|
if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
|
||||||
|
try:
|
||||||
|
os.chflags(dst, st.st_flags)
|
||||||
|
except OSError as why:
|
||||||
|
if (not hasattr(errno, 'EOPNOTSUPP') or
|
||||||
|
why.errno != errno.EOPNOTSUPP):
|
||||||
|
raise
|
||||||
|
|
||||||
|
def copy(src, dst):
|
||||||
|
"""Copy data and mode bits ("cp src dst").
|
||||||
|
|
||||||
|
The destination may be a directory.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if os.path.isdir(dst):
|
||||||
|
dst = os.path.join(dst, os.path.basename(src))
|
||||||
|
copyfile(src, dst)
|
||||||
|
copymode(src, dst)
|
||||||
|
|
||||||
|
def copy2(src, dst):
|
||||||
|
"""Copy data and all stat info ("cp -p src dst").
|
||||||
|
|
||||||
|
The destination may be a directory.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if os.path.isdir(dst):
|
||||||
|
dst = os.path.join(dst, os.path.basename(src))
|
||||||
|
copyfile(src, dst)
|
||||||
|
copystat(src, dst)
|
||||||
|
|
||||||
|
def ignore_patterns(*patterns):
|
||||||
|
"""Function that can be used as copytree() ignore parameter.
|
||||||
|
|
||||||
|
Patterns is a sequence of glob-style patterns
|
||||||
|
that are used to exclude files"""
|
||||||
|
def _ignore_patterns(path, names):
|
||||||
|
ignored_names = []
|
||||||
|
for pattern in patterns:
|
||||||
|
ignored_names.extend(fnmatch.filter(names, pattern))
|
||||||
|
return set(ignored_names)
|
||||||
|
return _ignore_patterns
|
||||||
|
|
||||||
|
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
|
||||||
|
ignore_dangling_symlinks=False):
|
||||||
|
"""Recursively copy a directory tree.
|
||||||
|
|
||||||
|
The destination directory must not already exist.
|
||||||
|
If exception(s) occur, an Error is raised with a list of reasons.
|
||||||
|
|
||||||
|
If the optional symlinks flag is true, symbolic links in the
|
||||||
|
source tree result in symbolic links in the destination tree; if
|
||||||
|
it is false, the contents of the files pointed to by symbolic
|
||||||
|
links are copied. If the file pointed by the symlink doesn't
|
||||||
|
exist, an exception will be added in the list of errors raised in
|
||||||
|
an Error exception at the end of the copy process.
|
||||||
|
|
||||||
|
You can set the optional ignore_dangling_symlinks flag to true if you
|
||||||
|
want to silence this exception. Notice that this has no effect on
|
||||||
|
platforms that don't support os.symlink.
|
||||||
|
|
||||||
|
The optional ignore argument is a callable. If given, it
|
||||||
|
is called with the `src` parameter, which is the directory
|
||||||
|
being visited by copytree(), and `names` which is the list of
|
||||||
|
`src` contents, as returned by os.listdir():
|
||||||
|
|
||||||
|
callable(src, names) -> ignored_names
|
||||||
|
|
||||||
|
Since copytree() is called recursively, the callable will be
|
||||||
|
called once for each directory that is copied. It returns a
|
||||||
|
list of names relative to the `src` directory that should
|
||||||
|
not be copied.
|
||||||
|
|
||||||
|
The optional copy_function argument is a callable that will be used
|
||||||
|
to copy each file. It will be called with the source path and the
|
||||||
|
destination path as arguments. By default, copy2() is used, but any
|
||||||
|
function that supports the same signature (like copy()) can be used.
|
||||||
|
|
||||||
|
"""
|
||||||
|
names = os.listdir(src)
|
||||||
|
if ignore is not None:
|
||||||
|
ignored_names = ignore(src, names)
|
||||||
|
else:
|
||||||
|
ignored_names = set()
|
||||||
|
|
||||||
|
os.makedirs(dst)
|
||||||
|
errors = []
|
||||||
|
for name in names:
|
||||||
|
if name in ignored_names:
|
||||||
|
continue
|
||||||
|
srcname = os.path.join(src, name)
|
||||||
|
dstname = os.path.join(dst, name)
|
||||||
|
try:
|
||||||
|
if os.path.islink(srcname):
|
||||||
|
linkto = os.readlink(srcname)
|
||||||
|
if symlinks:
|
||||||
|
os.symlink(linkto, dstname)
|
||||||
|
else:
|
||||||
|
# ignore dangling symlink if the flag is on
|
||||||
|
if not os.path.exists(linkto) and ignore_dangling_symlinks:
|
||||||
|
continue
|
||||||
|
# otherwise let the copy occurs. copy2 will raise an error
|
||||||
|
copy_function(srcname, dstname)
|
||||||
|
elif os.path.isdir(srcname):
|
||||||
|
copytree(srcname, dstname, symlinks, ignore, copy_function)
|
||||||
|
else:
|
||||||
|
# Will raise a SpecialFileError for unsupported file types
|
||||||
|
copy_function(srcname, dstname)
|
||||||
|
# catch the Error from the recursive copytree so that we can
|
||||||
|
# continue with other files
|
||||||
|
except Error as err:
|
||||||
|
errors.extend(err.args[0])
|
||||||
|
except EnvironmentError as why:
|
||||||
|
errors.append((srcname, dstname, str(why)))
|
||||||
|
try:
|
||||||
|
copystat(src, dst)
|
||||||
|
except OSError as why:
|
||||||
|
if WindowsError is not None and isinstance(why, WindowsError):
|
||||||
|
# Copying file access times may fail on Windows
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
errors.extend((src, dst, str(why)))
|
||||||
|
if errors:
|
||||||
|
raise Error(errors)
|
||||||
|
|
||||||
|
def rmtree(path, ignore_errors=False, onerror=None):
|
||||||
|
"""Recursively delete a directory tree.
|
||||||
|
|
||||||
|
If ignore_errors is set, errors are ignored; otherwise, if onerror
|
||||||
|
is set, it is called to handle the error with arguments (func,
|
||||||
|
path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
|
||||||
|
path is the argument to that function that caused it to fail; and
|
||||||
|
exc_info is a tuple returned by sys.exc_info(). If ignore_errors
|
||||||
|
is false and onerror is None, an exception is raised.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if ignore_errors:
|
||||||
|
def onerror(*args):
|
||||||
|
pass
|
||||||
|
elif onerror is None:
|
||||||
|
def onerror(*args):
|
||||||
|
raise
|
||||||
|
try:
|
||||||
|
if os.path.islink(path):
|
||||||
|
# symlinks to directories are forbidden, see bug #1669
|
||||||
|
raise OSError("Cannot call rmtree on a symbolic link")
|
||||||
|
except OSError:
|
||||||
|
onerror(os.path.islink, path, sys.exc_info())
|
||||||
|
# can't continue even if onerror hook returns
|
||||||
|
return
|
||||||
|
names = []
|
||||||
|
try:
|
||||||
|
names = os.listdir(path)
|
||||||
|
except os.error:
|
||||||
|
onerror(os.listdir, path, sys.exc_info())
|
||||||
|
for name in names:
|
||||||
|
fullname = os.path.join(path, name)
|
||||||
|
try:
|
||||||
|
mode = os.lstat(fullname).st_mode
|
||||||
|
except os.error:
|
||||||
|
mode = 0
|
||||||
|
if stat.S_ISDIR(mode):
|
||||||
|
rmtree(fullname, ignore_errors, onerror)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
os.remove(fullname)
|
||||||
|
except os.error:
|
||||||
|
onerror(os.remove, fullname, sys.exc_info())
|
||||||
|
try:
|
||||||
|
os.rmdir(path)
|
||||||
|
except os.error:
|
||||||
|
onerror(os.rmdir, path, sys.exc_info())
|
||||||
|
|
||||||
|
|
||||||
|
def _basename(path):
|
||||||
|
# A basename() variant which first strips the trailing slash, if present.
|
||||||
|
# Thus we always get the last component of the path, even for directories.
|
||||||
|
return os.path.basename(path.rstrip(os.path.sep))
|
||||||
|
|
||||||
|
def move(src, dst):
|
||||||
|
"""Recursively move a file or directory to another location. This is
|
||||||
|
similar to the Unix "mv" command.
|
||||||
|
|
||||||
|
If the destination is a directory or a symlink to a directory, the source
|
||||||
|
is moved inside the directory. The destination path must not already
|
||||||
|
exist.
|
||||||
|
|
||||||
|
If the destination already exists but is not a directory, it may be
|
||||||
|
overwritten depending on os.rename() semantics.
|
||||||
|
|
||||||
|
If the destination is on our current filesystem, then rename() is used.
|
||||||
|
Otherwise, src is copied to the destination and then removed.
|
||||||
|
A lot more could be done here... A look at a mv.c shows a lot of
|
||||||
|
the issues this implementation glosses over.
|
||||||
|
|
||||||
|
"""
|
||||||
|
real_dst = dst
|
||||||
|
if os.path.isdir(dst):
|
||||||
|
if _samefile(src, dst):
|
||||||
|
# We might be on a case insensitive filesystem,
|
||||||
|
# perform the rename anyway.
|
||||||
|
os.rename(src, dst)
|
||||||
|
return
|
||||||
|
|
||||||
|
real_dst = os.path.join(dst, _basename(src))
|
||||||
|
if os.path.exists(real_dst):
|
||||||
|
raise Error("Destination path '%s' already exists" % real_dst)
|
||||||
|
try:
|
||||||
|
os.rename(src, real_dst)
|
||||||
|
except OSError:
|
||||||
|
if os.path.isdir(src):
|
||||||
|
if _destinsrc(src, dst):
|
||||||
|
raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
|
||||||
|
copytree(src, real_dst, symlinks=True)
|
||||||
|
rmtree(src)
|
||||||
|
else:
|
||||||
|
copy2(src, real_dst)
|
||||||
|
os.unlink(src)
|
||||||
|
|
||||||
|
def _destinsrc(src, dst):
|
||||||
|
src = abspath(src)
|
||||||
|
dst = abspath(dst)
|
||||||
|
if not src.endswith(os.path.sep):
|
||||||
|
src += os.path.sep
|
||||||
|
if not dst.endswith(os.path.sep):
|
||||||
|
dst += os.path.sep
|
||||||
|
return dst.startswith(src)
|
||||||
|
|
||||||
|
def _get_gid(name):
|
||||||
|
"""Returns a gid, given a group name."""
|
||||||
|
if getgrnam is None or name is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
result = getgrnam(name)
|
||||||
|
except KeyError:
|
||||||
|
result = None
|
||||||
|
if result is not None:
|
||||||
|
return result[2]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_uid(name):
|
||||||
|
"""Returns an uid, given a user name."""
|
||||||
|
if getpwnam is None or name is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
result = getpwnam(name)
|
||||||
|
except KeyError:
|
||||||
|
result = None
|
||||||
|
if result is not None:
|
||||||
|
return result[2]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
|
||||||
|
owner=None, group=None, logger=None):
|
||||||
|
"""Create a (possibly compressed) tar file from all the files under
|
||||||
|
'base_dir'.
|
||||||
|
|
||||||
|
'compress' must be "gzip" (the default), "bzip2", or None.
|
||||||
|
|
||||||
|
'owner' and 'group' can be used to define an owner and a group for the
|
||||||
|
archive that is being built. If not provided, the current owner and group
|
||||||
|
will be used.
|
||||||
|
|
||||||
|
The output tar file will be named 'base_name' + ".tar", possibly plus
|
||||||
|
the appropriate compression extension (".gz", or ".bz2").
|
||||||
|
|
||||||
|
Returns the output filename.
|
||||||
|
"""
|
||||||
|
tar_compression = {'gzip': 'gz', None: ''}
|
||||||
|
compress_ext = {'gzip': '.gz'}
|
||||||
|
|
||||||
|
if _BZ2_SUPPORTED:
|
||||||
|
tar_compression['bzip2'] = 'bz2'
|
||||||
|
compress_ext['bzip2'] = '.bz2'
|
||||||
|
|
||||||
|
# flags for compression program, each element of list will be an argument
|
||||||
|
if compress is not None and compress not in compress_ext:
|
||||||
|
raise ValueError("bad value for 'compress', or compression format not "
|
||||||
|
"supported : {0}".format(compress))
|
||||||
|
|
||||||
|
archive_name = base_name + '.tar' + compress_ext.get(compress, '')
|
||||||
|
archive_dir = os.path.dirname(archive_name)
|
||||||
|
|
||||||
|
if not os.path.exists(archive_dir):
|
||||||
|
if logger is not None:
|
||||||
|
logger.info("creating %s", archive_dir)
|
||||||
|
if not dry_run:
|
||||||
|
os.makedirs(archive_dir)
|
||||||
|
|
||||||
|
# creating the tarball
|
||||||
|
if logger is not None:
|
||||||
|
logger.info('Creating tar archive')
|
||||||
|
|
||||||
|
uid = _get_uid(owner)
|
||||||
|
gid = _get_gid(group)
|
||||||
|
|
||||||
|
def _set_uid_gid(tarinfo):
|
||||||
|
if gid is not None:
|
||||||
|
tarinfo.gid = gid
|
||||||
|
tarinfo.gname = group
|
||||||
|
if uid is not None:
|
||||||
|
tarinfo.uid = uid
|
||||||
|
tarinfo.uname = owner
|
||||||
|
return tarinfo
|
||||||
|
|
||||||
|
if not dry_run:
|
||||||
|
tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
|
||||||
|
try:
|
||||||
|
tar.add(base_dir, filter=_set_uid_gid)
|
||||||
|
finally:
|
||||||
|
tar.close()
|
||||||
|
|
||||||
|
return archive_name
|
||||||
|
|
||||||
|
def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
|
||||||
|
# XXX see if we want to keep an external call here
|
||||||
|
if verbose:
|
||||||
|
zipoptions = "-r"
|
||||||
|
else:
|
||||||
|
zipoptions = "-rq"
|
||||||
|
from distutils.errors import DistutilsExecError
|
||||||
|
from distutils.spawn import spawn
|
||||||
|
try:
|
||||||
|
spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
|
||||||
|
except DistutilsExecError:
|
||||||
|
# XXX really should distinguish between "couldn't find
|
||||||
|
# external 'zip' command" and "zip failed".
|
||||||
|
raise ExecError("unable to create zip file '%s': "
|
||||||
|
"could neither import the 'zipfile' module nor "
|
||||||
|
"find a standalone zip utility") % zip_filename
|
||||||
|
|
||||||
|
def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
|
||||||
|
"""Create a zip file from all the files under 'base_dir'.
|
||||||
|
|
||||||
|
The output zip file will be named 'base_name' + ".zip". Uses either the
|
||||||
|
"zipfile" Python module (if available) or the InfoZIP "zip" utility
|
||||||
|
(if installed and found on the default search path). If neither tool is
|
||||||
|
available, raises ExecError. Returns the name of the output zip
|
||||||
|
file.
|
||||||
|
"""
|
||||||
|
zip_filename = base_name + ".zip"
|
||||||
|
archive_dir = os.path.dirname(base_name)
|
||||||
|
|
||||||
|
if not os.path.exists(archive_dir):
|
||||||
|
if logger is not None:
|
||||||
|
logger.info("creating %s", archive_dir)
|
||||||
|
if not dry_run:
|
||||||
|
os.makedirs(archive_dir)
|
||||||
|
|
||||||
|
# If zipfile module is not available, try spawning an external 'zip'
|
||||||
|
# command.
|
||||||
|
try:
|
||||||
|
import zipfile
|
||||||
|
except ImportError:
|
||||||
|
zipfile = None
|
||||||
|
|
||||||
|
if zipfile is None:
|
||||||
|
_call_external_zip(base_dir, zip_filename, verbose, dry_run)
|
||||||
|
else:
|
||||||
|
if logger is not None:
|
||||||
|
logger.info("creating '%s' and adding '%s' to it",
|
||||||
|
zip_filename, base_dir)
|
||||||
|
|
||||||
|
if not dry_run:
|
||||||
|
zip = zipfile.ZipFile(zip_filename, "w",
|
||||||
|
compression=zipfile.ZIP_DEFLATED)
|
||||||
|
|
||||||
|
for dirpath, dirnames, filenames in os.walk(base_dir):
|
||||||
|
for name in filenames:
|
||||||
|
path = os.path.normpath(os.path.join(dirpath, name))
|
||||||
|
if os.path.isfile(path):
|
||||||
|
zip.write(path, path)
|
||||||
|
if logger is not None:
|
||||||
|
logger.info("adding '%s'", path)
|
||||||
|
zip.close()
|
||||||
|
|
||||||
|
return zip_filename
|
||||||
|
|
||||||
|
_ARCHIVE_FORMATS = {
|
||||||
|
'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
|
||||||
|
'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
|
||||||
|
'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
|
||||||
|
'zip': (_make_zipfile, [], "ZIP file"),
|
||||||
|
}
|
||||||
|
|
||||||
|
if _BZ2_SUPPORTED:
|
||||||
|
_ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
|
||||||
|
"bzip2'ed tar-file")
|
||||||
|
|
||||||
|
def get_archive_formats():
|
||||||
|
"""Returns a list of supported formats for archiving and unarchiving.
|
||||||
|
|
||||||
|
Each element of the returned sequence is a tuple (name, description)
|
||||||
|
"""
|
||||||
|
formats = [(name, registry[2]) for name, registry in
|
||||||
|
_ARCHIVE_FORMATS.items()]
|
||||||
|
formats.sort()
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def register_archive_format(name, function, extra_args=None, description=''):
|
||||||
|
"""Registers an archive format.
|
||||||
|
|
||||||
|
name is the name of the format. function is the callable that will be
|
||||||
|
used to create archives. If provided, extra_args is a sequence of
|
||||||
|
(name, value) tuples that will be passed as arguments to the callable.
|
||||||
|
description can be provided to describe the format, and will be returned
|
||||||
|
by the get_archive_formats() function.
|
||||||
|
"""
|
||||||
|
if extra_args is None:
|
||||||
|
extra_args = []
|
||||||
|
if not isinstance(function, collections.Callable):
|
||||||
|
raise TypeError('The %s object is not callable' % function)
|
||||||
|
if not isinstance(extra_args, (tuple, list)):
|
||||||
|
raise TypeError('extra_args needs to be a sequence')
|
||||||
|
for element in extra_args:
|
||||||
|
if not isinstance(element, (tuple, list)) or len(element) !=2:
|
||||||
|
raise TypeError('extra_args elements are : (arg_name, value)')
|
||||||
|
|
||||||
|
_ARCHIVE_FORMATS[name] = (function, extra_args, description)
|
||||||
|
|
||||||
|
def unregister_archive_format(name):
|
||||||
|
del _ARCHIVE_FORMATS[name]
|
||||||
|
|
||||||
|
def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
|
||||||
|
dry_run=0, owner=None, group=None, logger=None):
|
||||||
|
"""Create an archive file (eg. zip or tar).
|
||||||
|
|
||||||
|
'base_name' is the name of the file to create, minus any format-specific
|
||||||
|
extension; 'format' is the archive format: one of "zip", "tar", "bztar"
|
||||||
|
or "gztar".
|
||||||
|
|
||||||
|
'root_dir' is a directory that will be the root directory of the
|
||||||
|
archive; ie. we typically chdir into 'root_dir' before creating the
|
||||||
|
archive. 'base_dir' is the directory where we start archiving from;
|
||||||
|
ie. 'base_dir' will be the common prefix of all files and
|
||||||
|
directories in the archive. 'root_dir' and 'base_dir' both default
|
||||||
|
to the current directory. Returns the name of the archive file.
|
||||||
|
|
||||||
|
'owner' and 'group' are used when creating a tar archive. By default,
|
||||||
|
uses the current owner and group.
|
||||||
|
"""
|
||||||
|
save_cwd = os.getcwd()
|
||||||
|
if root_dir is not None:
|
||||||
|
if logger is not None:
|
||||||
|
logger.debug("changing into '%s'", root_dir)
|
||||||
|
base_name = os.path.abspath(base_name)
|
||||||
|
if not dry_run:
|
||||||
|
os.chdir(root_dir)
|
||||||
|
|
||||||
|
if base_dir is None:
|
||||||
|
base_dir = os.curdir
|
||||||
|
|
||||||
|
kwargs = {'dry_run': dry_run, 'logger': logger}
|
||||||
|
|
||||||
|
try:
|
||||||
|
format_info = _ARCHIVE_FORMATS[format]
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError("unknown archive format '%s'" % format)
|
||||||
|
|
||||||
|
func = format_info[0]
|
||||||
|
for arg, val in format_info[1]:
|
||||||
|
kwargs[arg] = val
|
||||||
|
|
||||||
|
if format != 'zip':
|
||||||
|
kwargs['owner'] = owner
|
||||||
|
kwargs['group'] = group
|
||||||
|
|
||||||
|
try:
|
||||||
|
filename = func(base_name, base_dir, **kwargs)
|
||||||
|
finally:
|
||||||
|
if root_dir is not None:
|
||||||
|
if logger is not None:
|
||||||
|
logger.debug("changing back to '%s'", save_cwd)
|
||||||
|
os.chdir(save_cwd)
|
||||||
|
|
||||||
|
return filename
|
||||||
|
|
||||||
|
|
||||||
|
def get_unpack_formats():
|
||||||
|
"""Returns a list of supported formats for unpacking.
|
||||||
|
|
||||||
|
Each element of the returned sequence is a tuple
|
||||||
|
(name, extensions, description)
|
||||||
|
"""
|
||||||
|
formats = [(name, info[0], info[3]) for name, info in
|
||||||
|
_UNPACK_FORMATS.items()]
|
||||||
|
formats.sort()
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _check_unpack_options(extensions, function, extra_args):
|
||||||
|
"""Checks what gets registered as an unpacker."""
|
||||||
|
# first make sure no other unpacker is registered for this extension
|
||||||
|
existing_extensions = {}
|
||||||
|
for name, info in _UNPACK_FORMATS.items():
|
||||||
|
for ext in info[0]:
|
||||||
|
existing_extensions[ext] = name
|
||||||
|
|
||||||
|
for extension in extensions:
|
||||||
|
if extension in existing_extensions:
|
||||||
|
msg = '%s is already registered for "%s"'
|
||||||
|
raise RegistryError(msg % (extension,
|
||||||
|
existing_extensions[extension]))
|
||||||
|
|
||||||
|
if not isinstance(function, collections.Callable):
|
||||||
|
raise TypeError('The registered function must be a callable')
|
||||||
|
|
||||||
|
|
||||||
|
def register_unpack_format(name, extensions, function, extra_args=None,
|
||||||
|
description=''):
|
||||||
|
"""Registers an unpack format.
|
||||||
|
|
||||||
|
`name` is the name of the format. `extensions` is a list of extensions
|
||||||
|
corresponding to the format.
|
||||||
|
|
||||||
|
`function` is the callable that will be
|
||||||
|
used to unpack archives. The callable will receive archives to unpack.
|
||||||
|
If it's unable to handle an archive, it needs to raise a ReadError
|
||||||
|
exception.
|
||||||
|
|
||||||
|
If provided, `extra_args` is a sequence of
|
||||||
|
(name, value) tuples that will be passed as arguments to the callable.
|
||||||
|
description can be provided to describe the format, and will be returned
|
||||||
|
by the get_unpack_formats() function.
|
||||||
|
"""
|
||||||
|
if extra_args is None:
|
||||||
|
extra_args = []
|
||||||
|
_check_unpack_options(extensions, function, extra_args)
|
||||||
|
_UNPACK_FORMATS[name] = extensions, function, extra_args, description
|
||||||
|
|
||||||
|
def unregister_unpack_format(name):
|
||||||
|
"""Removes the pack format from the registery."""
|
||||||
|
del _UNPACK_FORMATS[name]
|
||||||
|
|
||||||
|
def _ensure_directory(path):
|
||||||
|
"""Ensure that the parent directory of `path` exists"""
|
||||||
|
dirname = os.path.dirname(path)
|
||||||
|
if not os.path.isdir(dirname):
|
||||||
|
os.makedirs(dirname)
|
||||||
|
|
||||||
|
def _unpack_zipfile(filename, extract_dir):
|
||||||
|
"""Unpack zip `filename` to `extract_dir`
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import zipfile
|
||||||
|
except ImportError:
|
||||||
|
raise ReadError('zlib not supported, cannot unpack this archive.')
|
||||||
|
|
||||||
|
if not zipfile.is_zipfile(filename):
|
||||||
|
raise ReadError("%s is not a zip file" % filename)
|
||||||
|
|
||||||
|
zip = zipfile.ZipFile(filename)
|
||||||
|
try:
|
||||||
|
for info in zip.infolist():
|
||||||
|
name = info.filename
|
||||||
|
|
||||||
|
# don't extract absolute paths or ones with .. in them
|
||||||
|
if name.startswith('/') or '..' in name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
target = os.path.join(extract_dir, *name.split('/'))
|
||||||
|
if not target:
|
||||||
|
continue
|
||||||
|
|
||||||
|
_ensure_directory(target)
|
||||||
|
if not name.endswith('/'):
|
||||||
|
# file
|
||||||
|
data = zip.read(info.filename)
|
||||||
|
f = open(target, 'wb')
|
||||||
|
try:
|
||||||
|
f.write(data)
|
||||||
|
finally:
|
||||||
|
f.close()
|
||||||
|
del data
|
||||||
|
finally:
|
||||||
|
zip.close()
|
||||||
|
|
||||||
|
def _unpack_tarfile(filename, extract_dir):
|
||||||
|
"""Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
tarobj = tarfile.open(filename)
|
||||||
|
except tarfile.TarError:
|
||||||
|
raise ReadError(
|
||||||
|
"%s is not a compressed or uncompressed tar file" % filename)
|
||||||
|
try:
|
||||||
|
tarobj.extractall(extract_dir)
|
||||||
|
finally:
|
||||||
|
tarobj.close()
|
||||||
|
|
||||||
|
_UNPACK_FORMATS = {
|
||||||
|
'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
|
||||||
|
'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
|
||||||
|
'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
|
||||||
|
}
|
||||||
|
|
||||||
|
if _BZ2_SUPPORTED:
|
||||||
|
_UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
|
||||||
|
"bzip2'ed tar-file")
|
||||||
|
|
||||||
|
def _find_unpack_format(filename):
|
||||||
|
for name, info in _UNPACK_FORMATS.items():
|
||||||
|
for extension in info[0]:
|
||||||
|
if filename.endswith(extension):
|
||||||
|
return name
|
||||||
|
return None
|
||||||
|
|
||||||
|
def unpack_archive(filename, extract_dir=None, format=None):
|
||||||
|
"""Unpack an archive.
|
||||||
|
|
||||||
|
`filename` is the name of the archive.
|
||||||
|
|
||||||
|
`extract_dir` is the name of the target directory, where the archive
|
||||||
|
is unpacked. If not provided, the current working directory is used.
|
||||||
|
|
||||||
|
`format` is the archive format: one of "zip", "tar", or "gztar". Or any
|
||||||
|
other registered format. If not provided, unpack_archive will use the
|
||||||
|
filename extension and see if an unpacker was registered for that
|
||||||
|
extension.
|
||||||
|
|
||||||
|
In case none is found, a ValueError is raised.
|
||||||
|
"""
|
||||||
|
if extract_dir is None:
|
||||||
|
extract_dir = os.getcwd()
|
||||||
|
|
||||||
|
if format is not None:
|
||||||
|
try:
|
||||||
|
format_info = _UNPACK_FORMATS[format]
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError("Unknown unpack format '{0}'".format(format))
|
||||||
|
|
||||||
|
func = format_info[1]
|
||||||
|
func(filename, extract_dir, **dict(format_info[2]))
|
||||||
|
else:
|
||||||
|
# we need to look at the registered unpackers supported extensions
|
||||||
|
format = _find_unpack_format(filename)
|
||||||
|
if format is None:
|
||||||
|
raise ReadError("Unknown archive format '{0}'".format(filename))
|
||||||
|
|
||||||
|
func = _UNPACK_FORMATS[format][1]
|
||||||
|
kwargs = dict(_UNPACK_FORMATS[format][2])
|
||||||
|
func(filename, extract_dir, **kwargs)
|
|
@ -0,0 +1,84 @@
|
||||||
|
[posix_prefix]
|
||||||
|
# Configuration directories. Some of these come straight out of the
|
||||||
|
# configure script. They are for implementing the other variables, not to
|
||||||
|
# be used directly in [resource_locations].
|
||||||
|
confdir = /etc
|
||||||
|
datadir = /usr/share
|
||||||
|
libdir = /usr/lib
|
||||||
|
statedir = /var
|
||||||
|
# User resource directory
|
||||||
|
local = ~/.local/{distribution.name}
|
||||||
|
|
||||||
|
stdlib = {base}/lib/python{py_version_short}
|
||||||
|
platstdlib = {platbase}/lib/python{py_version_short}
|
||||||
|
purelib = {base}/lib/python{py_version_short}/site-packages
|
||||||
|
platlib = {platbase}/lib/python{py_version_short}/site-packages
|
||||||
|
include = {base}/include/python{py_version_short}{abiflags}
|
||||||
|
platinclude = {platbase}/include/python{py_version_short}{abiflags}
|
||||||
|
data = {base}
|
||||||
|
|
||||||
|
[posix_home]
|
||||||
|
stdlib = {base}/lib/python
|
||||||
|
platstdlib = {base}/lib/python
|
||||||
|
purelib = {base}/lib/python
|
||||||
|
platlib = {base}/lib/python
|
||||||
|
include = {base}/include/python
|
||||||
|
platinclude = {base}/include/python
|
||||||
|
scripts = {base}/bin
|
||||||
|
data = {base}
|
||||||
|
|
||||||
|
[nt]
|
||||||
|
stdlib = {base}/Lib
|
||||||
|
platstdlib = {base}/Lib
|
||||||
|
purelib = {base}/Lib/site-packages
|
||||||
|
platlib = {base}/Lib/site-packages
|
||||||
|
include = {base}/Include
|
||||||
|
platinclude = {base}/Include
|
||||||
|
scripts = {base}/Scripts
|
||||||
|
data = {base}
|
||||||
|
|
||||||
|
[os2]
|
||||||
|
stdlib = {base}/Lib
|
||||||
|
platstdlib = {base}/Lib
|
||||||
|
purelib = {base}/Lib/site-packages
|
||||||
|
platlib = {base}/Lib/site-packages
|
||||||
|
include = {base}/Include
|
||||||
|
platinclude = {base}/Include
|
||||||
|
scripts = {base}/Scripts
|
||||||
|
data = {base}
|
||||||
|
|
||||||
|
[os2_home]
|
||||||
|
stdlib = {userbase}/lib/python{py_version_short}
|
||||||
|
platstdlib = {userbase}/lib/python{py_version_short}
|
||||||
|
purelib = {userbase}/lib/python{py_version_short}/site-packages
|
||||||
|
platlib = {userbase}/lib/python{py_version_short}/site-packages
|
||||||
|
include = {userbase}/include/python{py_version_short}
|
||||||
|
scripts = {userbase}/bin
|
||||||
|
data = {userbase}
|
||||||
|
|
||||||
|
[nt_user]
|
||||||
|
stdlib = {userbase}/Python{py_version_nodot}
|
||||||
|
platstdlib = {userbase}/Python{py_version_nodot}
|
||||||
|
purelib = {userbase}/Python{py_version_nodot}/site-packages
|
||||||
|
platlib = {userbase}/Python{py_version_nodot}/site-packages
|
||||||
|
include = {userbase}/Python{py_version_nodot}/Include
|
||||||
|
scripts = {userbase}/Scripts
|
||||||
|
data = {userbase}
|
||||||
|
|
||||||
|
[posix_user]
|
||||||
|
stdlib = {userbase}/lib/python{py_version_short}
|
||||||
|
platstdlib = {userbase}/lib/python{py_version_short}
|
||||||
|
purelib = {userbase}/lib/python{py_version_short}/site-packages
|
||||||
|
platlib = {userbase}/lib/python{py_version_short}/site-packages
|
||||||
|
include = {userbase}/include/python{py_version_short}
|
||||||
|
scripts = {userbase}/bin
|
||||||
|
data = {userbase}
|
||||||
|
|
||||||
|
[osx_framework_user]
|
||||||
|
stdlib = {userbase}/lib/python
|
||||||
|
platstdlib = {userbase}/lib/python
|
||||||
|
purelib = {userbase}/lib/python/site-packages
|
||||||
|
platlib = {userbase}/lib/python/site-packages
|
||||||
|
include = {userbase}/include
|
||||||
|
scripts = {userbase}/bin
|
||||||
|
data = {userbase}
|
|
@ -0,0 +1,788 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2012 The Python Software Foundation.
|
||||||
|
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||||
|
#
|
||||||
|
"""Access to Python's configuration information."""
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from os.path import pardir, realpath
|
||||||
|
try:
|
||||||
|
import configparser
|
||||||
|
except ImportError:
|
||||||
|
import ConfigParser as configparser
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'get_config_h_filename',
|
||||||
|
'get_config_var',
|
||||||
|
'get_config_vars',
|
||||||
|
'get_makefile_filename',
|
||||||
|
'get_path',
|
||||||
|
'get_path_names',
|
||||||
|
'get_paths',
|
||||||
|
'get_platform',
|
||||||
|
'get_python_version',
|
||||||
|
'get_scheme_names',
|
||||||
|
'parse_config_h',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_realpath(path):
|
||||||
|
try:
|
||||||
|
return realpath(path)
|
||||||
|
except OSError:
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
if sys.executable:
|
||||||
|
_PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable))
|
||||||
|
else:
|
||||||
|
# sys.executable can be empty if argv[0] has been changed and Python is
|
||||||
|
# unable to retrieve the real program name
|
||||||
|
_PROJECT_BASE = _safe_realpath(os.getcwd())
|
||||||
|
|
||||||
|
if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower():
|
||||||
|
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir))
|
||||||
|
# PC/VS7.1
|
||||||
|
if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower():
|
||||||
|
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
|
||||||
|
# PC/AMD64
|
||||||
|
if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower():
|
||||||
|
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
|
||||||
|
|
||||||
|
|
||||||
|
def is_python_build():
|
||||||
|
for fn in ("Setup.dist", "Setup.local"):
|
||||||
|
if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
_PYTHON_BUILD = is_python_build()
|
||||||
|
|
||||||
|
_cfg_read = False
|
||||||
|
|
||||||
|
def _ensure_cfg_read():
|
||||||
|
global _cfg_read
|
||||||
|
if not _cfg_read:
|
||||||
|
from ..resources import finder
|
||||||
|
backport_package = __name__.rsplit('.', 1)[0]
|
||||||
|
_finder = finder(backport_package)
|
||||||
|
_cfgfile = _finder.find('sysconfig.cfg')
|
||||||
|
assert _cfgfile, 'sysconfig.cfg exists'
|
||||||
|
with _cfgfile.as_stream() as s:
|
||||||
|
_SCHEMES.readfp(s)
|
||||||
|
if _PYTHON_BUILD:
|
||||||
|
for scheme in ('posix_prefix', 'posix_home'):
|
||||||
|
_SCHEMES.set(scheme, 'include', '{srcdir}/Include')
|
||||||
|
_SCHEMES.set(scheme, 'platinclude', '{projectbase}/.')
|
||||||
|
|
||||||
|
_cfg_read = True
|
||||||
|
|
||||||
|
|
||||||
|
_SCHEMES = configparser.RawConfigParser()
|
||||||
|
_VAR_REPL = re.compile(r'\{([^{]*?)\}')
|
||||||
|
|
||||||
|
def _expand_globals(config):
|
||||||
|
_ensure_cfg_read()
|
||||||
|
if config.has_section('globals'):
|
||||||
|
globals = config.items('globals')
|
||||||
|
else:
|
||||||
|
globals = tuple()
|
||||||
|
|
||||||
|
sections = config.sections()
|
||||||
|
for section in sections:
|
||||||
|
if section == 'globals':
|
||||||
|
continue
|
||||||
|
for option, value in globals:
|
||||||
|
if config.has_option(section, option):
|
||||||
|
continue
|
||||||
|
config.set(section, option, value)
|
||||||
|
config.remove_section('globals')
|
||||||
|
|
||||||
|
# now expanding local variables defined in the cfg file
|
||||||
|
#
|
||||||
|
for section in config.sections():
|
||||||
|
variables = dict(config.items(section))
|
||||||
|
|
||||||
|
def _replacer(matchobj):
|
||||||
|
name = matchobj.group(1)
|
||||||
|
if name in variables:
|
||||||
|
return variables[name]
|
||||||
|
return matchobj.group(0)
|
||||||
|
|
||||||
|
for option, value in config.items(section):
|
||||||
|
config.set(section, option, _VAR_REPL.sub(_replacer, value))
|
||||||
|
|
||||||
|
#_expand_globals(_SCHEMES)
|
||||||
|
|
||||||
|
# FIXME don't rely on sys.version here, its format is an implementation detail
|
||||||
|
# of CPython, use sys.version_info or sys.hexversion
|
||||||
|
_PY_VERSION = sys.version.split()[0]
|
||||||
|
_PY_VERSION_SHORT = sys.version[:3]
|
||||||
|
_PY_VERSION_SHORT_NO_DOT = _PY_VERSION[0] + _PY_VERSION[2]
|
||||||
|
_PREFIX = os.path.normpath(sys.prefix)
|
||||||
|
_EXEC_PREFIX = os.path.normpath(sys.exec_prefix)
|
||||||
|
_CONFIG_VARS = None
|
||||||
|
_USER_BASE = None
|
||||||
|
|
||||||
|
|
||||||
|
def _subst_vars(path, local_vars):
|
||||||
|
"""In the string `path`, replace tokens like {some.thing} with the
|
||||||
|
corresponding value from the map `local_vars`.
|
||||||
|
|
||||||
|
If there is no corresponding value, leave the token unchanged.
|
||||||
|
"""
|
||||||
|
def _replacer(matchobj):
|
||||||
|
name = matchobj.group(1)
|
||||||
|
if name in local_vars:
|
||||||
|
return local_vars[name]
|
||||||
|
elif name in os.environ:
|
||||||
|
return os.environ[name]
|
||||||
|
return matchobj.group(0)
|
||||||
|
return _VAR_REPL.sub(_replacer, path)
|
||||||
|
|
||||||
|
|
||||||
|
def _extend_dict(target_dict, other_dict):
|
||||||
|
target_keys = target_dict.keys()
|
||||||
|
for key, value in other_dict.items():
|
||||||
|
if key in target_keys:
|
||||||
|
continue
|
||||||
|
target_dict[key] = value
|
||||||
|
|
||||||
|
|
||||||
|
def _expand_vars(scheme, vars):
|
||||||
|
res = {}
|
||||||
|
if vars is None:
|
||||||
|
vars = {}
|
||||||
|
_extend_dict(vars, get_config_vars())
|
||||||
|
|
||||||
|
for key, value in _SCHEMES.items(scheme):
|
||||||
|
if os.name in ('posix', 'nt'):
|
||||||
|
value = os.path.expanduser(value)
|
||||||
|
res[key] = os.path.normpath(_subst_vars(value, vars))
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def format_value(value, vars):
|
||||||
|
def _replacer(matchobj):
|
||||||
|
name = matchobj.group(1)
|
||||||
|
if name in vars:
|
||||||
|
return vars[name]
|
||||||
|
return matchobj.group(0)
|
||||||
|
return _VAR_REPL.sub(_replacer, value)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_default_scheme():
|
||||||
|
if os.name == 'posix':
|
||||||
|
# the default scheme for posix is posix_prefix
|
||||||
|
return 'posix_prefix'
|
||||||
|
return os.name
|
||||||
|
|
||||||
|
|
||||||
|
def _getuserbase():
|
||||||
|
env_base = os.environ.get("PYTHONUSERBASE", None)
|
||||||
|
|
||||||
|
def joinuser(*args):
|
||||||
|
return os.path.expanduser(os.path.join(*args))
|
||||||
|
|
||||||
|
# what about 'os2emx', 'riscos' ?
|
||||||
|
if os.name == "nt":
|
||||||
|
base = os.environ.get("APPDATA") or "~"
|
||||||
|
if env_base:
|
||||||
|
return env_base
|
||||||
|
else:
|
||||||
|
return joinuser(base, "Python")
|
||||||
|
|
||||||
|
if sys.platform == "darwin":
|
||||||
|
framework = get_config_var("PYTHONFRAMEWORK")
|
||||||
|
if framework:
|
||||||
|
if env_base:
|
||||||
|
return env_base
|
||||||
|
else:
|
||||||
|
return joinuser("~", "Library", framework, "%d.%d" %
|
||||||
|
sys.version_info[:2])
|
||||||
|
|
||||||
|
if env_base:
|
||||||
|
return env_base
|
||||||
|
else:
|
||||||
|
return joinuser("~", ".local")
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_makefile(filename, vars=None):
|
||||||
|
"""Parse a Makefile-style file.
|
||||||
|
|
||||||
|
A dictionary containing name/value pairs is returned. If an
|
||||||
|
optional dictionary is passed in as the second argument, it is
|
||||||
|
used instead of a new dictionary.
|
||||||
|
"""
|
||||||
|
# Regexes needed for parsing Makefile (and similar syntaxes,
|
||||||
|
# like old-style Setup files).
|
||||||
|
_variable_rx = re.compile("([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)")
|
||||||
|
_findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)")
|
||||||
|
_findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}")
|
||||||
|
|
||||||
|
if vars is None:
|
||||||
|
vars = {}
|
||||||
|
done = {}
|
||||||
|
notdone = {}
|
||||||
|
|
||||||
|
with codecs.open(filename, encoding='utf-8', errors="surrogateescape") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith('#') or line.strip() == '':
|
||||||
|
continue
|
||||||
|
m = _variable_rx.match(line)
|
||||||
|
if m:
|
||||||
|
n, v = m.group(1, 2)
|
||||||
|
v = v.strip()
|
||||||
|
# `$$' is a literal `$' in make
|
||||||
|
tmpv = v.replace('$$', '')
|
||||||
|
|
||||||
|
if "$" in tmpv:
|
||||||
|
notdone[n] = v
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
v = int(v)
|
||||||
|
except ValueError:
|
||||||
|
# insert literal `$'
|
||||||
|
done[n] = v.replace('$$', '$')
|
||||||
|
else:
|
||||||
|
done[n] = v
|
||||||
|
|
||||||
|
# do variable interpolation here
|
||||||
|
variables = list(notdone.keys())
|
||||||
|
|
||||||
|
# Variables with a 'PY_' prefix in the makefile. These need to
|
||||||
|
# be made available without that prefix through sysconfig.
|
||||||
|
# Special care is needed to ensure that variable expansion works, even
|
||||||
|
# if the expansion uses the name without a prefix.
|
||||||
|
renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS')
|
||||||
|
|
||||||
|
while len(variables) > 0:
|
||||||
|
for name in tuple(variables):
|
||||||
|
value = notdone[name]
|
||||||
|
m = _findvar1_rx.search(value) or _findvar2_rx.search(value)
|
||||||
|
if m is not None:
|
||||||
|
n = m.group(1)
|
||||||
|
found = True
|
||||||
|
if n in done:
|
||||||
|
item = str(done[n])
|
||||||
|
elif n in notdone:
|
||||||
|
# get it on a subsequent round
|
||||||
|
found = False
|
||||||
|
elif n in os.environ:
|
||||||
|
# do it like make: fall back to environment
|
||||||
|
item = os.environ[n]
|
||||||
|
|
||||||
|
elif n in renamed_variables:
|
||||||
|
if (name.startswith('PY_') and
|
||||||
|
name[3:] in renamed_variables):
|
||||||
|
item = ""
|
||||||
|
|
||||||
|
elif 'PY_' + n in notdone:
|
||||||
|
found = False
|
||||||
|
|
||||||
|
else:
|
||||||
|
item = str(done['PY_' + n])
|
||||||
|
|
||||||
|
else:
|
||||||
|
done[n] = item = ""
|
||||||
|
|
||||||
|
if found:
|
||||||
|
after = value[m.end():]
|
||||||
|
value = value[:m.start()] + item + after
|
||||||
|
if "$" in after:
|
||||||
|
notdone[name] = value
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
value = int(value)
|
||||||
|
except ValueError:
|
||||||
|
done[name] = value.strip()
|
||||||
|
else:
|
||||||
|
done[name] = value
|
||||||
|
variables.remove(name)
|
||||||
|
|
||||||
|
if (name.startswith('PY_') and
|
||||||
|
name[3:] in renamed_variables):
|
||||||
|
|
||||||
|
name = name[3:]
|
||||||
|
if name not in done:
|
||||||
|
done[name] = value
|
||||||
|
|
||||||
|
else:
|
||||||
|
# bogus variable reference (e.g. "prefix=$/opt/python");
|
||||||
|
# just drop it since we can't deal
|
||||||
|
done[name] = value
|
||||||
|
variables.remove(name)
|
||||||
|
|
||||||
|
# strip spurious spaces
|
||||||
|
for k, v in done.items():
|
||||||
|
if isinstance(v, str):
|
||||||
|
done[k] = v.strip()
|
||||||
|
|
||||||
|
# save the results in the global dictionary
|
||||||
|
vars.update(done)
|
||||||
|
return vars
|
||||||
|
|
||||||
|
|
||||||
|
def get_makefile_filename():
|
||||||
|
"""Return the path of the Makefile."""
|
||||||
|
if _PYTHON_BUILD:
|
||||||
|
return os.path.join(_PROJECT_BASE, "Makefile")
|
||||||
|
if hasattr(sys, 'abiflags'):
|
||||||
|
config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags)
|
||||||
|
else:
|
||||||
|
config_dir_name = 'config'
|
||||||
|
return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile')
|
||||||
|
|
||||||
|
|
||||||
|
def _init_posix(vars):
|
||||||
|
"""Initialize the module as appropriate for POSIX systems."""
|
||||||
|
# load the installed Makefile:
|
||||||
|
makefile = get_makefile_filename()
|
||||||
|
try:
|
||||||
|
_parse_makefile(makefile, vars)
|
||||||
|
except IOError as e:
|
||||||
|
msg = "invalid Python installation: unable to open %s" % makefile
|
||||||
|
if hasattr(e, "strerror"):
|
||||||
|
msg = msg + " (%s)" % e.strerror
|
||||||
|
raise IOError(msg)
|
||||||
|
# load the installed pyconfig.h:
|
||||||
|
config_h = get_config_h_filename()
|
||||||
|
try:
|
||||||
|
with open(config_h) as f:
|
||||||
|
parse_config_h(f, vars)
|
||||||
|
except IOError as e:
|
||||||
|
msg = "invalid Python installation: unable to open %s" % config_h
|
||||||
|
if hasattr(e, "strerror"):
|
||||||
|
msg = msg + " (%s)" % e.strerror
|
||||||
|
raise IOError(msg)
|
||||||
|
# On AIX, there are wrong paths to the linker scripts in the Makefile
|
||||||
|
# -- these paths are relative to the Python source, but when installed
|
||||||
|
# the scripts are in another directory.
|
||||||
|
if _PYTHON_BUILD:
|
||||||
|
vars['LDSHARED'] = vars['BLDSHARED']
|
||||||
|
|
||||||
|
|
||||||
|
def _init_non_posix(vars):
|
||||||
|
"""Initialize the module as appropriate for NT"""
|
||||||
|
# set basic install directories
|
||||||
|
vars['LIBDEST'] = get_path('stdlib')
|
||||||
|
vars['BINLIBDEST'] = get_path('platstdlib')
|
||||||
|
vars['INCLUDEPY'] = get_path('include')
|
||||||
|
vars['SO'] = '.pyd'
|
||||||
|
vars['EXE'] = '.exe'
|
||||||
|
vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT
|
||||||
|
vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable))
|
||||||
|
|
||||||
|
#
|
||||||
|
# public APIs
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
def parse_config_h(fp, vars=None):
|
||||||
|
"""Parse a config.h-style file.
|
||||||
|
|
||||||
|
A dictionary containing name/value pairs is returned. If an
|
||||||
|
optional dictionary is passed in as the second argument, it is
|
||||||
|
used instead of a new dictionary.
|
||||||
|
"""
|
||||||
|
if vars is None:
|
||||||
|
vars = {}
|
||||||
|
define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n")
|
||||||
|
undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
line = fp.readline()
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
m = define_rx.match(line)
|
||||||
|
if m:
|
||||||
|
n, v = m.group(1, 2)
|
||||||
|
try:
|
||||||
|
v = int(v)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
vars[n] = v
|
||||||
|
else:
|
||||||
|
m = undef_rx.match(line)
|
||||||
|
if m:
|
||||||
|
vars[m.group(1)] = 0
|
||||||
|
return vars
|
||||||
|
|
||||||
|
|
||||||
|
def get_config_h_filename():
|
||||||
|
"""Return the path of pyconfig.h."""
|
||||||
|
if _PYTHON_BUILD:
|
||||||
|
if os.name == "nt":
|
||||||
|
inc_dir = os.path.join(_PROJECT_BASE, "PC")
|
||||||
|
else:
|
||||||
|
inc_dir = _PROJECT_BASE
|
||||||
|
else:
|
||||||
|
inc_dir = get_path('platinclude')
|
||||||
|
return os.path.join(inc_dir, 'pyconfig.h')
|
||||||
|
|
||||||
|
|
||||||
|
def get_scheme_names():
|
||||||
|
"""Return a tuple containing the schemes names."""
|
||||||
|
return tuple(sorted(_SCHEMES.sections()))
|
||||||
|
|
||||||
|
|
||||||
|
def get_path_names():
|
||||||
|
"""Return a tuple containing the paths names."""
|
||||||
|
# xxx see if we want a static list
|
||||||
|
return _SCHEMES.options('posix_prefix')
|
||||||
|
|
||||||
|
|
||||||
|
def get_paths(scheme=_get_default_scheme(), vars=None, expand=True):
|
||||||
|
"""Return a mapping containing an install scheme.
|
||||||
|
|
||||||
|
``scheme`` is the install scheme name. If not provided, it will
|
||||||
|
return the default scheme for the current platform.
|
||||||
|
"""
|
||||||
|
_ensure_cfg_read()
|
||||||
|
if expand:
|
||||||
|
return _expand_vars(scheme, vars)
|
||||||
|
else:
|
||||||
|
return dict(_SCHEMES.items(scheme))
|
||||||
|
|
||||||
|
|
||||||
|
def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True):
|
||||||
|
"""Return a path corresponding to the scheme.
|
||||||
|
|
||||||
|
``scheme`` is the install scheme name.
|
||||||
|
"""
|
||||||
|
return get_paths(scheme, vars, expand)[name]
|
||||||
|
|
||||||
|
|
||||||
|
def get_config_vars(*args):
|
||||||
|
"""With no arguments, return a dictionary of all configuration
|
||||||
|
variables relevant for the current platform.
|
||||||
|
|
||||||
|
On Unix, this means every variable defined in Python's installed Makefile;
|
||||||
|
On Windows and Mac OS it's a much smaller set.
|
||||||
|
|
||||||
|
With arguments, return a list of values that result from looking up
|
||||||
|
each argument in the configuration variable dictionary.
|
||||||
|
"""
|
||||||
|
global _CONFIG_VARS
|
||||||
|
if _CONFIG_VARS is None:
|
||||||
|
_CONFIG_VARS = {}
|
||||||
|
# Normalized versions of prefix and exec_prefix are handy to have;
|
||||||
|
# in fact, these are the standard versions used most places in the
|
||||||
|
# distutils2 module.
|
||||||
|
_CONFIG_VARS['prefix'] = _PREFIX
|
||||||
|
_CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX
|
||||||
|
_CONFIG_VARS['py_version'] = _PY_VERSION
|
||||||
|
_CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT
|
||||||
|
_CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2]
|
||||||
|
_CONFIG_VARS['base'] = _PREFIX
|
||||||
|
_CONFIG_VARS['platbase'] = _EXEC_PREFIX
|
||||||
|
_CONFIG_VARS['projectbase'] = _PROJECT_BASE
|
||||||
|
try:
|
||||||
|
_CONFIG_VARS['abiflags'] = sys.abiflags
|
||||||
|
except AttributeError:
|
||||||
|
# sys.abiflags may not be defined on all platforms.
|
||||||
|
_CONFIG_VARS['abiflags'] = ''
|
||||||
|
|
||||||
|
if os.name in ('nt', 'os2'):
|
||||||
|
_init_non_posix(_CONFIG_VARS)
|
||||||
|
if os.name == 'posix':
|
||||||
|
_init_posix(_CONFIG_VARS)
|
||||||
|
# Setting 'userbase' is done below the call to the
|
||||||
|
# init function to enable using 'get_config_var' in
|
||||||
|
# the init-function.
|
||||||
|
if sys.version >= '2.6':
|
||||||
|
_CONFIG_VARS['userbase'] = _getuserbase()
|
||||||
|
|
||||||
|
if 'srcdir' not in _CONFIG_VARS:
|
||||||
|
_CONFIG_VARS['srcdir'] = _PROJECT_BASE
|
||||||
|
else:
|
||||||
|
_CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir'])
|
||||||
|
|
||||||
|
# Convert srcdir into an absolute path if it appears necessary.
|
||||||
|
# Normally it is relative to the build directory. However, during
|
||||||
|
# testing, for example, we might be running a non-installed python
|
||||||
|
# from a different directory.
|
||||||
|
if _PYTHON_BUILD and os.name == "posix":
|
||||||
|
base = _PROJECT_BASE
|
||||||
|
try:
|
||||||
|
cwd = os.getcwd()
|
||||||
|
except OSError:
|
||||||
|
cwd = None
|
||||||
|
if (not os.path.isabs(_CONFIG_VARS['srcdir']) and
|
||||||
|
base != cwd):
|
||||||
|
# srcdir is relative and we are not in the same directory
|
||||||
|
# as the executable. Assume executable is in the build
|
||||||
|
# directory and make srcdir absolute.
|
||||||
|
srcdir = os.path.join(base, _CONFIG_VARS['srcdir'])
|
||||||
|
_CONFIG_VARS['srcdir'] = os.path.normpath(srcdir)
|
||||||
|
|
||||||
|
if sys.platform == 'darwin':
|
||||||
|
kernel_version = os.uname()[2] # Kernel version (8.4.3)
|
||||||
|
major_version = int(kernel_version.split('.')[0])
|
||||||
|
|
||||||
|
if major_version < 8:
|
||||||
|
# On Mac OS X before 10.4, check if -arch and -isysroot
|
||||||
|
# are in CFLAGS or LDFLAGS and remove them if they are.
|
||||||
|
# This is needed when building extensions on a 10.3 system
|
||||||
|
# using a universal build of python.
|
||||||
|
for key in ('LDFLAGS', 'BASECFLAGS',
|
||||||
|
# a number of derived variables. These need to be
|
||||||
|
# patched up as well.
|
||||||
|
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
||||||
|
flags = _CONFIG_VARS[key]
|
||||||
|
flags = re.sub('-arch\s+\w+\s', ' ', flags)
|
||||||
|
flags = re.sub('-isysroot [^ \t]*', ' ', flags)
|
||||||
|
_CONFIG_VARS[key] = flags
|
||||||
|
else:
|
||||||
|
# Allow the user to override the architecture flags using
|
||||||
|
# an environment variable.
|
||||||
|
# NOTE: This name was introduced by Apple in OSX 10.5 and
|
||||||
|
# is used by several scripting languages distributed with
|
||||||
|
# that OS release.
|
||||||
|
if 'ARCHFLAGS' in os.environ:
|
||||||
|
arch = os.environ['ARCHFLAGS']
|
||||||
|
for key in ('LDFLAGS', 'BASECFLAGS',
|
||||||
|
# a number of derived variables. These need to be
|
||||||
|
# patched up as well.
|
||||||
|
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
||||||
|
|
||||||
|
flags = _CONFIG_VARS[key]
|
||||||
|
flags = re.sub('-arch\s+\w+\s', ' ', flags)
|
||||||
|
flags = flags + ' ' + arch
|
||||||
|
_CONFIG_VARS[key] = flags
|
||||||
|
|
||||||
|
# If we're on OSX 10.5 or later and the user tries to
|
||||||
|
# compiles an extension using an SDK that is not present
|
||||||
|
# on the current machine it is better to not use an SDK
|
||||||
|
# than to fail.
|
||||||
|
#
|
||||||
|
# The major usecase for this is users using a Python.org
|
||||||
|
# binary installer on OSX 10.6: that installer uses
|
||||||
|
# the 10.4u SDK, but that SDK is not installed by default
|
||||||
|
# when you install Xcode.
|
||||||
|
#
|
||||||
|
CFLAGS = _CONFIG_VARS.get('CFLAGS', '')
|
||||||
|
m = re.search('-isysroot\s+(\S+)', CFLAGS)
|
||||||
|
if m is not None:
|
||||||
|
sdk = m.group(1)
|
||||||
|
if not os.path.exists(sdk):
|
||||||
|
for key in ('LDFLAGS', 'BASECFLAGS',
|
||||||
|
# a number of derived variables. These need to be
|
||||||
|
# patched up as well.
|
||||||
|
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
||||||
|
|
||||||
|
flags = _CONFIG_VARS[key]
|
||||||
|
flags = re.sub('-isysroot\s+\S+(\s|$)', ' ', flags)
|
||||||
|
_CONFIG_VARS[key] = flags
|
||||||
|
|
||||||
|
if args:
|
||||||
|
vals = []
|
||||||
|
for name in args:
|
||||||
|
vals.append(_CONFIG_VARS.get(name))
|
||||||
|
return vals
|
||||||
|
else:
|
||||||
|
return _CONFIG_VARS
|
||||||
|
|
||||||
|
|
||||||
|
def get_config_var(name):
|
||||||
|
"""Return the value of a single variable using the dictionary returned by
|
||||||
|
'get_config_vars()'.
|
||||||
|
|
||||||
|
Equivalent to get_config_vars().get(name)
|
||||||
|
"""
|
||||||
|
return get_config_vars().get(name)
|
||||||
|
|
||||||
|
|
||||||
|
def get_platform():
|
||||||
|
"""Return a string that identifies the current platform.
|
||||||
|
|
||||||
|
This is used mainly to distinguish platform-specific build directories and
|
||||||
|
platform-specific built distributions. Typically includes the OS name
|
||||||
|
and version and the architecture (as supplied by 'os.uname()'),
|
||||||
|
although the exact information included depends on the OS; eg. for IRIX
|
||||||
|
the architecture isn't particularly important (IRIX only runs on SGI
|
||||||
|
hardware), but for Linux the kernel version isn't particularly
|
||||||
|
important.
|
||||||
|
|
||||||
|
Examples of returned values:
|
||||||
|
linux-i586
|
||||||
|
linux-alpha (?)
|
||||||
|
solaris-2.6-sun4u
|
||||||
|
irix-5.3
|
||||||
|
irix64-6.2
|
||||||
|
|
||||||
|
Windows will return one of:
|
||||||
|
win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
|
||||||
|
win-ia64 (64bit Windows on Itanium)
|
||||||
|
win32 (all others - specifically, sys.platform is returned)
|
||||||
|
|
||||||
|
For other non-POSIX platforms, currently just returns 'sys.platform'.
|
||||||
|
"""
|
||||||
|
if os.name == 'nt':
|
||||||
|
# sniff sys.version for architecture.
|
||||||
|
prefix = " bit ("
|
||||||
|
i = sys.version.find(prefix)
|
||||||
|
if i == -1:
|
||||||
|
return sys.platform
|
||||||
|
j = sys.version.find(")", i)
|
||||||
|
look = sys.version[i+len(prefix):j].lower()
|
||||||
|
if look == 'amd64':
|
||||||
|
return 'win-amd64'
|
||||||
|
if look == 'itanium':
|
||||||
|
return 'win-ia64'
|
||||||
|
return sys.platform
|
||||||
|
|
||||||
|
if os.name != "posix" or not hasattr(os, 'uname'):
|
||||||
|
# XXX what about the architecture? NT is Intel or Alpha,
|
||||||
|
# Mac OS is M68k or PPC, etc.
|
||||||
|
return sys.platform
|
||||||
|
|
||||||
|
# Try to distinguish various flavours of Unix
|
||||||
|
osname, host, release, version, machine = os.uname()
|
||||||
|
|
||||||
|
# Convert the OS name to lowercase, remove '/' characters
|
||||||
|
# (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
|
||||||
|
osname = osname.lower().replace('/', '')
|
||||||
|
machine = machine.replace(' ', '_')
|
||||||
|
machine = machine.replace('/', '-')
|
||||||
|
|
||||||
|
if osname[:5] == "linux":
|
||||||
|
# At least on Linux/Intel, 'machine' is the processor --
|
||||||
|
# i386, etc.
|
||||||
|
# XXX what about Alpha, SPARC, etc?
|
||||||
|
return "%s-%s" % (osname, machine)
|
||||||
|
elif osname[:5] == "sunos":
|
||||||
|
if release[0] >= "5": # SunOS 5 == Solaris 2
|
||||||
|
osname = "solaris"
|
||||||
|
release = "%d.%s" % (int(release[0]) - 3, release[2:])
|
||||||
|
# fall through to standard osname-release-machine representation
|
||||||
|
elif osname[:4] == "irix": # could be "irix64"!
|
||||||
|
return "%s-%s" % (osname, release)
|
||||||
|
elif osname[:3] == "aix":
|
||||||
|
return "%s-%s.%s" % (osname, version, release)
|
||||||
|
elif osname[:6] == "cygwin":
|
||||||
|
osname = "cygwin"
|
||||||
|
rel_re = re.compile(r'[\d.]+')
|
||||||
|
m = rel_re.match(release)
|
||||||
|
if m:
|
||||||
|
release = m.group()
|
||||||
|
elif osname[:6] == "darwin":
|
||||||
|
#
|
||||||
|
# For our purposes, we'll assume that the system version from
|
||||||
|
# distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
|
||||||
|
# to. This makes the compatibility story a bit more sane because the
|
||||||
|
# machine is going to compile and link as if it were
|
||||||
|
# MACOSX_DEPLOYMENT_TARGET.
|
||||||
|
cfgvars = get_config_vars()
|
||||||
|
macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
|
||||||
|
|
||||||
|
if True:
|
||||||
|
# Always calculate the release of the running machine,
|
||||||
|
# needed to determine if we can build fat binaries or not.
|
||||||
|
|
||||||
|
macrelease = macver
|
||||||
|
# Get the system version. Reading this plist is a documented
|
||||||
|
# way to get the system version (see the documentation for
|
||||||
|
# the Gestalt Manager)
|
||||||
|
try:
|
||||||
|
f = open('/System/Library/CoreServices/SystemVersion.plist')
|
||||||
|
except IOError:
|
||||||
|
# We're on a plain darwin box, fall back to the default
|
||||||
|
# behaviour.
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
m = re.search(r'<key>ProductUserVisibleVersion</key>\s*'
|
||||||
|
r'<string>(.*?)</string>', f.read())
|
||||||
|
finally:
|
||||||
|
f.close()
|
||||||
|
if m is not None:
|
||||||
|
macrelease = '.'.join(m.group(1).split('.')[:2])
|
||||||
|
# else: fall back to the default behaviour
|
||||||
|
|
||||||
|
if not macver:
|
||||||
|
macver = macrelease
|
||||||
|
|
||||||
|
if macver:
|
||||||
|
release = macver
|
||||||
|
osname = "macosx"
|
||||||
|
|
||||||
|
if ((macrelease + '.') >= '10.4.' and
|
||||||
|
'-arch' in get_config_vars().get('CFLAGS', '').strip()):
|
||||||
|
# The universal build will build fat binaries, but not on
|
||||||
|
# systems before 10.4
|
||||||
|
#
|
||||||
|
# Try to detect 4-way universal builds, those have machine-type
|
||||||
|
# 'universal' instead of 'fat'.
|
||||||
|
|
||||||
|
machine = 'fat'
|
||||||
|
cflags = get_config_vars().get('CFLAGS')
|
||||||
|
|
||||||
|
archs = re.findall('-arch\s+(\S+)', cflags)
|
||||||
|
archs = tuple(sorted(set(archs)))
|
||||||
|
|
||||||
|
if len(archs) == 1:
|
||||||
|
machine = archs[0]
|
||||||
|
elif archs == ('i386', 'ppc'):
|
||||||
|
machine = 'fat'
|
||||||
|
elif archs == ('i386', 'x86_64'):
|
||||||
|
machine = 'intel'
|
||||||
|
elif archs == ('i386', 'ppc', 'x86_64'):
|
||||||
|
machine = 'fat3'
|
||||||
|
elif archs == ('ppc64', 'x86_64'):
|
||||||
|
machine = 'fat64'
|
||||||
|
elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
|
||||||
|
machine = 'universal'
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"Don't know machine value for archs=%r" % (archs,))
|
||||||
|
|
||||||
|
elif machine == 'i386':
|
||||||
|
# On OSX the machine type returned by uname is always the
|
||||||
|
# 32-bit variant, even if the executable architecture is
|
||||||
|
# the 64-bit variant
|
||||||
|
if sys.maxsize >= 2**32:
|
||||||
|
machine = 'x86_64'
|
||||||
|
|
||||||
|
elif machine in ('PowerPC', 'Power_Macintosh'):
|
||||||
|
# Pick a sane name for the PPC architecture.
|
||||||
|
# See 'i386' case
|
||||||
|
if sys.maxsize >= 2**32:
|
||||||
|
machine = 'ppc64'
|
||||||
|
else:
|
||||||
|
machine = 'ppc'
|
||||||
|
|
||||||
|
return "%s-%s-%s" % (osname, release, machine)
|
||||||
|
|
||||||
|
|
||||||
|
def get_python_version():
|
||||||
|
return _PY_VERSION_SHORT
|
||||||
|
|
||||||
|
|
||||||
|
def _print_dict(title, data):
|
||||||
|
for index, (key, value) in enumerate(sorted(data.items())):
|
||||||
|
if index == 0:
|
||||||
|
print('%s: ' % (title))
|
||||||
|
print('\t%s = "%s"' % (key, value))
|
||||||
|
|
||||||
|
|
||||||
|
def _main():
|
||||||
|
"""Display all information sysconfig detains."""
|
||||||
|
print('Platform: "%s"' % get_platform())
|
||||||
|
print('Python version: "%s"' % get_python_version())
|
||||||
|
print('Current installation scheme: "%s"' % _get_default_scheme())
|
||||||
|
print()
|
||||||
|
_print_dict('Paths', get_paths())
|
||||||
|
print()
|
||||||
|
_print_dict('Variables', get_config_vars())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
_main()
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,513 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013 Vinay Sajip.
|
||||||
|
# Licensed to the Python Software Foundation under a contributor agreement.
|
||||||
|
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||||
|
#
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
try:
|
||||||
|
from threading import Thread
|
||||||
|
except ImportError:
|
||||||
|
from dummy_threading import Thread
|
||||||
|
|
||||||
|
from . import DistlibException
|
||||||
|
from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
|
||||||
|
urlparse, build_opener, string_types)
|
||||||
|
from .util import cached_property, zip_dir, ServerProxy
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DEFAULT_INDEX = 'https://pypi.python.org/pypi'
|
||||||
|
DEFAULT_REALM = 'pypi'
|
||||||
|
|
||||||
|
class PackageIndex(object):
|
||||||
|
"""
|
||||||
|
This class represents a package index compatible with PyPI, the Python
|
||||||
|
Package Index.
|
||||||
|
"""
|
||||||
|
|
||||||
|
boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
|
||||||
|
|
||||||
|
def __init__(self, url=None):
|
||||||
|
"""
|
||||||
|
Initialise an instance.
|
||||||
|
|
||||||
|
:param url: The URL of the index. If not specified, the URL for PyPI is
|
||||||
|
used.
|
||||||
|
"""
|
||||||
|
self.url = url or DEFAULT_INDEX
|
||||||
|
self.read_configuration()
|
||||||
|
scheme, netloc, path, params, query, frag = urlparse(self.url)
|
||||||
|
if params or query or frag or scheme not in ('http', 'https'):
|
||||||
|
raise DistlibException('invalid repository: %s' % self.url)
|
||||||
|
self.password_handler = None
|
||||||
|
self.ssl_verifier = None
|
||||||
|
self.gpg = None
|
||||||
|
self.gpg_home = None
|
||||||
|
self.rpc_proxy = None
|
||||||
|
with open(os.devnull, 'w') as sink:
|
||||||
|
for s in ('gpg2', 'gpg'):
|
||||||
|
try:
|
||||||
|
rc = subprocess.check_call([s, '--version'], stdout=sink,
|
||||||
|
stderr=sink)
|
||||||
|
if rc == 0:
|
||||||
|
self.gpg = s
|
||||||
|
break
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _get_pypirc_command(self):
|
||||||
|
"""
|
||||||
|
Get the distutils command for interacting with PyPI configurations.
|
||||||
|
:return: the command.
|
||||||
|
"""
|
||||||
|
from distutils.core import Distribution
|
||||||
|
from distutils.config import PyPIRCCommand
|
||||||
|
d = Distribution()
|
||||||
|
return PyPIRCCommand(d)
|
||||||
|
|
||||||
|
def read_configuration(self):
|
||||||
|
"""
|
||||||
|
Read the PyPI access configuration as supported by distutils, getting
|
||||||
|
PyPI to do the acutal work. This populates ``username``, ``password``,
|
||||||
|
``realm`` and ``url`` attributes from the configuration.
|
||||||
|
"""
|
||||||
|
# get distutils to do the work
|
||||||
|
c = self._get_pypirc_command()
|
||||||
|
c.repository = self.url
|
||||||
|
cfg = c._read_pypirc()
|
||||||
|
self.username = cfg.get('username')
|
||||||
|
self.password = cfg.get('password')
|
||||||
|
self.realm = cfg.get('realm', 'pypi')
|
||||||
|
self.url = cfg.get('repository', self.url)
|
||||||
|
|
||||||
|
def save_configuration(self):
|
||||||
|
"""
|
||||||
|
Save the PyPI access configuration. You must have set ``username`` and
|
||||||
|
``password`` attributes before calling this method.
|
||||||
|
|
||||||
|
Again, distutils is used to do the actual work.
|
||||||
|
"""
|
||||||
|
self.check_credentials()
|
||||||
|
# get distutils to do the work
|
||||||
|
c = self._get_pypirc_command()
|
||||||
|
c._store_pypirc(self.username, self.password)
|
||||||
|
|
||||||
|
def check_credentials(self):
|
||||||
|
"""
|
||||||
|
Check that ``username`` and ``password`` have been set, and raise an
|
||||||
|
exception if not.
|
||||||
|
"""
|
||||||
|
if self.username is None or self.password is None:
|
||||||
|
raise DistlibException('username and password must be set')
|
||||||
|
pm = HTTPPasswordMgr()
|
||||||
|
_, netloc, _, _, _, _ = urlparse(self.url)
|
||||||
|
pm.add_password(self.realm, netloc, self.username, self.password)
|
||||||
|
self.password_handler = HTTPBasicAuthHandler(pm)
|
||||||
|
|
||||||
|
def register(self, metadata):
|
||||||
|
"""
|
||||||
|
Register a distribution on PyPI, using the provided metadata.
|
||||||
|
|
||||||
|
:param metadata: A :class:`Metadata` instance defining at least a name
|
||||||
|
and version number for the distribution to be
|
||||||
|
registered.
|
||||||
|
:return: The HTTP response received from PyPI upon submission of the
|
||||||
|
request.
|
||||||
|
"""
|
||||||
|
self.check_credentials()
|
||||||
|
metadata.validate()
|
||||||
|
d = metadata.todict()
|
||||||
|
d[':action'] = 'verify'
|
||||||
|
request = self.encode_request(d.items(), [])
|
||||||
|
response = self.send_request(request)
|
||||||
|
d[':action'] = 'submit'
|
||||||
|
request = self.encode_request(d.items(), [])
|
||||||
|
return self.send_request(request)
|
||||||
|
|
||||||
|
def _reader(self, name, stream, outbuf):
|
||||||
|
"""
|
||||||
|
Thread runner for reading lines of from a subprocess into a buffer.
|
||||||
|
|
||||||
|
:param name: The logical name of the stream (used for logging only).
|
||||||
|
:param stream: The stream to read from. This will typically a pipe
|
||||||
|
connected to the output stream of a subprocess.
|
||||||
|
:param outbuf: The list to append the read lines to.
|
||||||
|
"""
|
||||||
|
while True:
|
||||||
|
s = stream.readline()
|
||||||
|
if not s:
|
||||||
|
break
|
||||||
|
s = s.decode('utf-8').rstrip()
|
||||||
|
outbuf.append(s)
|
||||||
|
logger.debug('%s: %s' % (name, s))
|
||||||
|
stream.close()
|
||||||
|
|
||||||
|
def get_sign_command(self, filename, signer, sign_password,
|
||||||
|
keystore=None):
|
||||||
|
"""
|
||||||
|
Return a suitable command for signing a file.
|
||||||
|
|
||||||
|
:param filename: The pathname to the file to be signed.
|
||||||
|
:param signer: The identifier of the signer of the file.
|
||||||
|
:param sign_password: The passphrase for the signer's
|
||||||
|
private key used for signing.
|
||||||
|
:param keystore: The path to a directory which contains the keys
|
||||||
|
used in verification. If not specified, the
|
||||||
|
instance's ``gpg_home`` attribute is used instead.
|
||||||
|
:return: The signing command as a list suitable to be
|
||||||
|
passed to :class:`subprocess.Popen`.
|
||||||
|
"""
|
||||||
|
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
|
||||||
|
if keystore is None:
|
||||||
|
keystore = self.gpg_home
|
||||||
|
if keystore:
|
||||||
|
cmd.extend(['--homedir', keystore])
|
||||||
|
if sign_password is not None:
|
||||||
|
cmd.extend(['--batch', '--passphrase-fd', '0'])
|
||||||
|
td = tempfile.mkdtemp()
|
||||||
|
sf = os.path.join(td, os.path.basename(filename) + '.asc')
|
||||||
|
cmd.extend(['--detach-sign', '--armor', '--local-user',
|
||||||
|
signer, '--output', sf, filename])
|
||||||
|
logger.debug('invoking: %s', ' '.join(cmd))
|
||||||
|
return cmd, sf
|
||||||
|
|
||||||
|
def run_command(self, cmd, input_data=None):
|
||||||
|
"""
|
||||||
|
Run a command in a child process , passing it any input data specified.
|
||||||
|
|
||||||
|
:param cmd: The command to run.
|
||||||
|
:param input_data: If specified, this must be a byte string containing
|
||||||
|
data to be sent to the child process.
|
||||||
|
:return: A tuple consisting of the subprocess' exit code, a list of
|
||||||
|
lines read from the subprocess' ``stdout``, and a list of
|
||||||
|
lines read from the subprocess' ``stderr``.
|
||||||
|
"""
|
||||||
|
kwargs = {
|
||||||
|
'stdout': subprocess.PIPE,
|
||||||
|
'stderr': subprocess.PIPE,
|
||||||
|
}
|
||||||
|
if input_data is not None:
|
||||||
|
kwargs['stdin'] = subprocess.PIPE
|
||||||
|
stdout = []
|
||||||
|
stderr = []
|
||||||
|
p = subprocess.Popen(cmd, **kwargs)
|
||||||
|
# We don't use communicate() here because we may need to
|
||||||
|
# get clever with interacting with the command
|
||||||
|
t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
|
||||||
|
t1.start()
|
||||||
|
t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
|
||||||
|
t2.start()
|
||||||
|
if input_data is not None:
|
||||||
|
p.stdin.write(input_data)
|
||||||
|
p.stdin.close()
|
||||||
|
|
||||||
|
p.wait()
|
||||||
|
t1.join()
|
||||||
|
t2.join()
|
||||||
|
return p.returncode, stdout, stderr
|
||||||
|
|
||||||
|
def sign_file(self, filename, signer, sign_password, keystore=None):
|
||||||
|
"""
|
||||||
|
Sign a file.
|
||||||
|
|
||||||
|
:param filename: The pathname to the file to be signed.
|
||||||
|
:param signer: The identifier of the signer of the file.
|
||||||
|
:param sign_password: The passphrase for the signer's
|
||||||
|
private key used for signing.
|
||||||
|
:param keystore: The path to a directory which contains the keys
|
||||||
|
used in signing. If not specified, the instance's
|
||||||
|
``gpg_home`` attribute is used instead.
|
||||||
|
:return: The absolute pathname of the file where the signature is
|
||||||
|
stored.
|
||||||
|
"""
|
||||||
|
cmd, sig_file = self.get_sign_command(filename, signer, sign_password,
|
||||||
|
keystore)
|
||||||
|
rc, stdout, stderr = self.run_command(cmd,
|
||||||
|
sign_password.encode('utf-8'))
|
||||||
|
if rc != 0:
|
||||||
|
raise DistlibException('sign command failed with error '
|
||||||
|
'code %s' % rc)
|
||||||
|
return sig_file
|
||||||
|
|
||||||
|
def upload_file(self, metadata, filename, signer=None, sign_password=None,
|
||||||
|
filetype='sdist', pyversion='source', keystore=None):
|
||||||
|
"""
|
||||||
|
Upload a release file to the index.
|
||||||
|
|
||||||
|
:param metadata: A :class:`Metadata` instance defining at least a name
|
||||||
|
and version number for the file to be uploaded.
|
||||||
|
:param filename: The pathname of the file to be uploaded.
|
||||||
|
:param signer: The identifier of the signer of the file.
|
||||||
|
:param sign_password: The passphrase for the signer's
|
||||||
|
private key used for signing.
|
||||||
|
:param filetype: The type of the file being uploaded. This is the
|
||||||
|
distutils command which produced that file, e.g.
|
||||||
|
``sdist`` or ``bdist_wheel``.
|
||||||
|
:param pyversion: The version of Python which the release relates
|
||||||
|
to. For code compatible with any Python, this would
|
||||||
|
be ``source``, otherwise it would be e.g. ``3.2``.
|
||||||
|
:param keystore: The path to a directory which contains the keys
|
||||||
|
used in signing. If not specified, the instance's
|
||||||
|
``gpg_home`` attribute is used instead.
|
||||||
|
:return: The HTTP response received from PyPI upon submission of the
|
||||||
|
request.
|
||||||
|
"""
|
||||||
|
self.check_credentials()
|
||||||
|
if not os.path.exists(filename):
|
||||||
|
raise DistlibException('not found: %s' % filename)
|
||||||
|
metadata.validate()
|
||||||
|
d = metadata.todict()
|
||||||
|
sig_file = None
|
||||||
|
if signer:
|
||||||
|
if not self.gpg:
|
||||||
|
logger.warning('no signing program available - not signed')
|
||||||
|
else:
|
||||||
|
sig_file = self.sign_file(filename, signer, sign_password,
|
||||||
|
keystore)
|
||||||
|
with open(filename, 'rb') as f:
|
||||||
|
file_data = f.read()
|
||||||
|
md5_digest = hashlib.md5(file_data).hexdigest()
|
||||||
|
sha256_digest = hashlib.sha256(file_data).hexdigest()
|
||||||
|
d.update({
|
||||||
|
':action': 'file_upload',
|
||||||
|
'protcol_version': '1',
|
||||||
|
'filetype': filetype,
|
||||||
|
'pyversion': pyversion,
|
||||||
|
'md5_digest': md5_digest,
|
||||||
|
'sha256_digest': sha256_digest,
|
||||||
|
})
|
||||||
|
files = [('content', os.path.basename(filename), file_data)]
|
||||||
|
if sig_file:
|
||||||
|
with open(sig_file, 'rb') as f:
|
||||||
|
sig_data = f.read()
|
||||||
|
files.append(('gpg_signature', os.path.basename(sig_file),
|
||||||
|
sig_data))
|
||||||
|
shutil.rmtree(os.path.dirname(sig_file))
|
||||||
|
request = self.encode_request(d.items(), files)
|
||||||
|
return self.send_request(request)
|
||||||
|
|
||||||
|
def upload_documentation(self, metadata, doc_dir):
|
||||||
|
"""
|
||||||
|
Upload documentation to the index.
|
||||||
|
|
||||||
|
:param metadata: A :class:`Metadata` instance defining at least a name
|
||||||
|
and version number for the documentation to be
|
||||||
|
uploaded.
|
||||||
|
:param doc_dir: The pathname of the directory which contains the
|
||||||
|
documentation. This should be the directory that
|
||||||
|
contains the ``index.html`` for the documentation.
|
||||||
|
:return: The HTTP response received from PyPI upon submission of the
|
||||||
|
request.
|
||||||
|
"""
|
||||||
|
self.check_credentials()
|
||||||
|
if not os.path.isdir(doc_dir):
|
||||||
|
raise DistlibException('not a directory: %r' % doc_dir)
|
||||||
|
fn = os.path.join(doc_dir, 'index.html')
|
||||||
|
if not os.path.exists(fn):
|
||||||
|
raise DistlibException('not found: %r' % fn)
|
||||||
|
metadata.validate()
|
||||||
|
name, version = metadata.name, metadata.version
|
||||||
|
zip_data = zip_dir(doc_dir).getvalue()
|
||||||
|
fields = [(':action', 'doc_upload'),
|
||||||
|
('name', name), ('version', version)]
|
||||||
|
files = [('content', name, zip_data)]
|
||||||
|
request = self.encode_request(fields, files)
|
||||||
|
return self.send_request(request)
|
||||||
|
|
||||||
|
def get_verify_command(self, signature_filename, data_filename,
|
||||||
|
keystore=None):
|
||||||
|
"""
|
||||||
|
Return a suitable command for verifying a file.
|
||||||
|
|
||||||
|
:param signature_filename: The pathname to the file containing the
|
||||||
|
signature.
|
||||||
|
:param data_filename: The pathname to the file containing the
|
||||||
|
signed data.
|
||||||
|
:param keystore: The path to a directory which contains the keys
|
||||||
|
used in verification. If not specified, the
|
||||||
|
instance's ``gpg_home`` attribute is used instead.
|
||||||
|
:return: The verifying command as a list suitable to be
|
||||||
|
passed to :class:`subprocess.Popen`.
|
||||||
|
"""
|
||||||
|
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
|
||||||
|
if keystore is None:
|
||||||
|
keystore = self.gpg_home
|
||||||
|
if keystore:
|
||||||
|
cmd.extend(['--homedir', keystore])
|
||||||
|
cmd.extend(['--verify', signature_filename, data_filename])
|
||||||
|
logger.debug('invoking: %s', ' '.join(cmd))
|
||||||
|
return cmd
|
||||||
|
|
||||||
|
def verify_signature(self, signature_filename, data_filename,
|
||||||
|
keystore=None):
|
||||||
|
"""
|
||||||
|
Verify a signature for a file.
|
||||||
|
|
||||||
|
:param signature_filename: The pathname to the file containing the
|
||||||
|
signature.
|
||||||
|
:param data_filename: The pathname to the file containing the
|
||||||
|
signed data.
|
||||||
|
:param keystore: The path to a directory which contains the keys
|
||||||
|
used in verification. If not specified, the
|
||||||
|
instance's ``gpg_home`` attribute is used instead.
|
||||||
|
:return: True if the signature was verified, else False.
|
||||||
|
"""
|
||||||
|
if not self.gpg:
|
||||||
|
raise DistlibException('verification unavailable because gpg '
|
||||||
|
'unavailable')
|
||||||
|
cmd = self.get_verify_command(signature_filename, data_filename,
|
||||||
|
keystore)
|
||||||
|
rc, stdout, stderr = self.run_command(cmd)
|
||||||
|
if rc not in (0, 1):
|
||||||
|
raise DistlibException('verify command failed with error '
|
||||||
|
'code %s' % rc)
|
||||||
|
return rc == 0
|
||||||
|
|
||||||
|
def download_file(self, url, destfile, digest=None, reporthook=None):
|
||||||
|
"""
|
||||||
|
This is a convenience method for downloading a file from an URL.
|
||||||
|
Normally, this will be a file from the index, though currently
|
||||||
|
no check is made for this (i.e. a file can be downloaded from
|
||||||
|
anywhere).
|
||||||
|
|
||||||
|
The method is just like the :func:`urlretrieve` function in the
|
||||||
|
standard library, except that it allows digest computation to be
|
||||||
|
done during download and checking that the downloaded data
|
||||||
|
matched any expected value.
|
||||||
|
|
||||||
|
:param url: The URL of the file to be downloaded (assumed to be
|
||||||
|
available via an HTTP GET request).
|
||||||
|
:param destfile: The pathname where the downloaded file is to be
|
||||||
|
saved.
|
||||||
|
:param digest: If specified, this must be a (hasher, value)
|
||||||
|
tuple, where hasher is the algorithm used (e.g.
|
||||||
|
``'md5'``) and ``value`` is the expected value.
|
||||||
|
:param reporthook: The same as for :func:`urlretrieve` in the
|
||||||
|
standard library.
|
||||||
|
"""
|
||||||
|
if digest is None:
|
||||||
|
digester = None
|
||||||
|
logger.debug('No digest specified')
|
||||||
|
else:
|
||||||
|
if isinstance(digest, (list, tuple)):
|
||||||
|
hasher, digest = digest
|
||||||
|
else:
|
||||||
|
hasher = 'md5'
|
||||||
|
digester = getattr(hashlib, hasher)()
|
||||||
|
logger.debug('Digest specified: %s' % digest)
|
||||||
|
# The following code is equivalent to urlretrieve.
|
||||||
|
# We need to do it this way so that we can compute the
|
||||||
|
# digest of the file as we go.
|
||||||
|
with open(destfile, 'wb') as dfp:
|
||||||
|
# addinfourl is not a context manager on 2.x
|
||||||
|
# so we have to use try/finally
|
||||||
|
sfp = self.send_request(Request(url))
|
||||||
|
try:
|
||||||
|
headers = sfp.info()
|
||||||
|
blocksize = 8192
|
||||||
|
size = -1
|
||||||
|
read = 0
|
||||||
|
blocknum = 0
|
||||||
|
if "content-length" in headers:
|
||||||
|
size = int(headers["Content-Length"])
|
||||||
|
if reporthook:
|
||||||
|
reporthook(blocknum, blocksize, size)
|
||||||
|
while True:
|
||||||
|
block = sfp.read(blocksize)
|
||||||
|
if not block:
|
||||||
|
break
|
||||||
|
read += len(block)
|
||||||
|
dfp.write(block)
|
||||||
|
if digester:
|
||||||
|
digester.update(block)
|
||||||
|
blocknum += 1
|
||||||
|
if reporthook:
|
||||||
|
reporthook(blocknum, blocksize, size)
|
||||||
|
finally:
|
||||||
|
sfp.close()
|
||||||
|
|
||||||
|
# check that we got the whole file, if we can
|
||||||
|
if size >= 0 and read < size:
|
||||||
|
raise DistlibException(
|
||||||
|
'retrieval incomplete: got only %d out of %d bytes'
|
||||||
|
% (read, size))
|
||||||
|
# if we have a digest, it must match.
|
||||||
|
if digester:
|
||||||
|
actual = digester.hexdigest()
|
||||||
|
if digest != actual:
|
||||||
|
raise DistlibException('%s digest mismatch for %s: expected '
|
||||||
|
'%s, got %s' % (hasher, destfile,
|
||||||
|
digest, actual))
|
||||||
|
logger.debug('Digest verified: %s', digest)
|
||||||
|
|
||||||
|
def send_request(self, req):
|
||||||
|
"""
|
||||||
|
Send a standard library :class:`Request` to PyPI and return its
|
||||||
|
response.
|
||||||
|
|
||||||
|
:param req: The request to send.
|
||||||
|
:return: The HTTP response from PyPI (a standard library HTTPResponse).
|
||||||
|
"""
|
||||||
|
handlers = []
|
||||||
|
if self.password_handler:
|
||||||
|
handlers.append(self.password_handler)
|
||||||
|
if self.ssl_verifier:
|
||||||
|
handlers.append(self.ssl_verifier)
|
||||||
|
opener = build_opener(*handlers)
|
||||||
|
return opener.open(req)
|
||||||
|
|
||||||
|
def encode_request(self, fields, files):
|
||||||
|
"""
|
||||||
|
Encode fields and files for posting to an HTTP server.
|
||||||
|
|
||||||
|
:param fields: The fields to send as a list of (fieldname, value)
|
||||||
|
tuples.
|
||||||
|
:param files: The files to send as a list of (fieldname, filename,
|
||||||
|
file_bytes) tuple.
|
||||||
|
"""
|
||||||
|
# Adapted from packaging, which in turn was adapted from
|
||||||
|
# http://code.activestate.com/recipes/146306
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
boundary = self.boundary
|
||||||
|
for k, values in fields:
|
||||||
|
if not isinstance(values, (list, tuple)):
|
||||||
|
values = [values]
|
||||||
|
|
||||||
|
for v in values:
|
||||||
|
parts.extend((
|
||||||
|
b'--' + boundary,
|
||||||
|
('Content-Disposition: form-data; name="%s"' %
|
||||||
|
k).encode('utf-8'),
|
||||||
|
b'',
|
||||||
|
v.encode('utf-8')))
|
||||||
|
for key, filename, value in files:
|
||||||
|
parts.extend((
|
||||||
|
b'--' + boundary,
|
||||||
|
('Content-Disposition: form-data; name="%s"; filename="%s"' %
|
||||||
|
(key, filename)).encode('utf-8'),
|
||||||
|
b'',
|
||||||
|
value))
|
||||||
|
|
||||||
|
parts.extend((b'--' + boundary + b'--', b''))
|
||||||
|
|
||||||
|
body = b'\r\n'.join(parts)
|
||||||
|
ct = b'multipart/form-data; boundary=' + boundary
|
||||||
|
headers = {
|
||||||
|
'Content-type': ct,
|
||||||
|
'Content-length': str(len(body))
|
||||||
|
}
|
||||||
|
return Request(self.url, body, headers)
|
||||||
|
|
||||||
|
def search(self, terms, operator=None):
|
||||||
|
if isinstance(terms, string_types):
|
||||||
|
terms = {'name': terms}
|
||||||
|
if self.rpc_proxy is None:
|
||||||
|
self.rpc_proxy = ServerProxy(self.url, timeout=3.0)
|
||||||
|
return self.rpc_proxy.search(terms, operator or 'and')
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,367 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2012-2013 Python Software Foundation.
|
||||||
|
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||||
|
#
|
||||||
|
"""
|
||||||
|
Class representing the list of files in a distribution.
|
||||||
|
|
||||||
|
Equivalent to distutils.filelist, but fixes some problems.
|
||||||
|
"""
|
||||||
|
import fnmatch
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from . import DistlibException
|
||||||
|
from .compat import fsdecode
|
||||||
|
from .util import convert_path
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['Manifest']
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# a \ followed by some spaces + EOL
|
||||||
|
_COLLAPSE_PATTERN = re.compile('\\\w*\n', re.M)
|
||||||
|
_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
|
||||||
|
|
||||||
|
|
||||||
|
class Manifest(object):
|
||||||
|
"""A list of files built by on exploring the filesystem and filtered by
|
||||||
|
applying various patterns to what we find there.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, base=None):
|
||||||
|
"""
|
||||||
|
Initialise an instance.
|
||||||
|
|
||||||
|
:param base: The base directory to explore under.
|
||||||
|
"""
|
||||||
|
self.base = os.path.abspath(os.path.normpath(base or os.getcwd()))
|
||||||
|
self.prefix = self.base + os.sep
|
||||||
|
self.allfiles = None
|
||||||
|
self.files = set()
|
||||||
|
|
||||||
|
#
|
||||||
|
# Public API
|
||||||
|
#
|
||||||
|
|
||||||
|
def findall(self):
|
||||||
|
"""Find all files under the base and set ``allfiles`` to the absolute
|
||||||
|
pathnames of files found.
|
||||||
|
"""
|
||||||
|
from stat import S_ISREG, S_ISDIR, S_ISLNK
|
||||||
|
|
||||||
|
self.allfiles = allfiles = []
|
||||||
|
root = self.base
|
||||||
|
stack = [root]
|
||||||
|
pop = stack.pop
|
||||||
|
push = stack.append
|
||||||
|
|
||||||
|
while stack:
|
||||||
|
root = pop()
|
||||||
|
names = os.listdir(root)
|
||||||
|
|
||||||
|
for name in names:
|
||||||
|
fullname = os.path.join(root, name)
|
||||||
|
|
||||||
|
# Avoid excess stat calls -- just one will do, thank you!
|
||||||
|
stat = os.stat(fullname)
|
||||||
|
mode = stat.st_mode
|
||||||
|
if S_ISREG(mode):
|
||||||
|
allfiles.append(fsdecode(fullname))
|
||||||
|
elif S_ISDIR(mode) and not S_ISLNK(mode):
|
||||||
|
push(fullname)
|
||||||
|
|
||||||
|
def add(self, item):
|
||||||
|
"""
|
||||||
|
Add a file to the manifest.
|
||||||
|
|
||||||
|
:param item: The pathname to add. This can be relative to the base.
|
||||||
|
"""
|
||||||
|
if not item.startswith(self.prefix):
|
||||||
|
item = os.path.join(self.base, item)
|
||||||
|
self.files.add(os.path.normpath(item))
|
||||||
|
|
||||||
|
def add_many(self, items):
|
||||||
|
"""
|
||||||
|
Add a list of files to the manifest.
|
||||||
|
|
||||||
|
:param items: The pathnames to add. These can be relative to the base.
|
||||||
|
"""
|
||||||
|
for item in items:
|
||||||
|
self.add(item)
|
||||||
|
|
||||||
|
def sorted(self, wantdirs=False):
|
||||||
|
"""
|
||||||
|
Return sorted files in directory order
|
||||||
|
"""
|
||||||
|
|
||||||
|
def add_dir(dirs, d):
|
||||||
|
dirs.add(d)
|
||||||
|
logger.debug('add_dir added %s', d)
|
||||||
|
if d != self.base:
|
||||||
|
parent, _ = os.path.split(d)
|
||||||
|
assert parent not in ('', '/')
|
||||||
|
add_dir(dirs, parent)
|
||||||
|
|
||||||
|
result = set(self.files) # make a copy!
|
||||||
|
if wantdirs:
|
||||||
|
dirs = set()
|
||||||
|
for f in result:
|
||||||
|
add_dir(dirs, os.path.dirname(f))
|
||||||
|
result |= dirs
|
||||||
|
return [os.path.join(*path_tuple) for path_tuple in
|
||||||
|
sorted(os.path.split(path) for path in result)]
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
"""Clear all collected files."""
|
||||||
|
self.files = set()
|
||||||
|
self.allfiles = []
|
||||||
|
|
||||||
|
def process_directive(self, directive):
|
||||||
|
"""
|
||||||
|
Process a directive which either adds some files from ``allfiles`` to
|
||||||
|
``files``, or removes some files from ``files``.
|
||||||
|
|
||||||
|
:param directive: The directive to process. This should be in a format
|
||||||
|
compatible with distutils ``MANIFEST.in`` files:
|
||||||
|
|
||||||
|
http://docs.python.org/distutils/sourcedist.html#commands
|
||||||
|
"""
|
||||||
|
# Parse the line: split it up, make sure the right number of words
|
||||||
|
# is there, and return the relevant words. 'action' is always
|
||||||
|
# defined: it's the first word of the line. Which of the other
|
||||||
|
# three are defined depends on the action; it'll be either
|
||||||
|
# patterns, (dir and patterns), or (dirpattern).
|
||||||
|
action, patterns, thedir, dirpattern = self._parse_directive(directive)
|
||||||
|
|
||||||
|
# OK, now we know that the action is valid and we have the
|
||||||
|
# right number of words on the line for that action -- so we
|
||||||
|
# can proceed with minimal error-checking.
|
||||||
|
if action == 'include':
|
||||||
|
for pattern in patterns:
|
||||||
|
if not self._include_pattern(pattern, anchor=True):
|
||||||
|
logger.warning('no files found matching %r', pattern)
|
||||||
|
|
||||||
|
elif action == 'exclude':
|
||||||
|
for pattern in patterns:
|
||||||
|
found = self._exclude_pattern(pattern, anchor=True)
|
||||||
|
#if not found:
|
||||||
|
# logger.warning('no previously-included files '
|
||||||
|
# 'found matching %r', pattern)
|
||||||
|
|
||||||
|
elif action == 'global-include':
|
||||||
|
for pattern in patterns:
|
||||||
|
if not self._include_pattern(pattern, anchor=False):
|
||||||
|
logger.warning('no files found matching %r '
|
||||||
|
'anywhere in distribution', pattern)
|
||||||
|
|
||||||
|
elif action == 'global-exclude':
|
||||||
|
for pattern in patterns:
|
||||||
|
found = self._exclude_pattern(pattern, anchor=False)
|
||||||
|
#if not found:
|
||||||
|
# logger.warning('no previously-included files '
|
||||||
|
# 'matching %r found anywhere in '
|
||||||
|
# 'distribution', pattern)
|
||||||
|
|
||||||
|
elif action == 'recursive-include':
|
||||||
|
for pattern in patterns:
|
||||||
|
if not self._include_pattern(pattern, prefix=thedir):
|
||||||
|
logger.warning('no files found matching %r '
|
||||||
|
'under directory %r', pattern, thedir)
|
||||||
|
|
||||||
|
elif action == 'recursive-exclude':
|
||||||
|
for pattern in patterns:
|
||||||
|
found = self._exclude_pattern(pattern, prefix=thedir)
|
||||||
|
#if not found:
|
||||||
|
# logger.warning('no previously-included files '
|
||||||
|
# 'matching %r found under directory %r',
|
||||||
|
# pattern, thedir)
|
||||||
|
|
||||||
|
elif action == 'graft':
|
||||||
|
if not self._include_pattern(None, prefix=dirpattern):
|
||||||
|
logger.warning('no directories found matching %r',
|
||||||
|
dirpattern)
|
||||||
|
|
||||||
|
elif action == 'prune':
|
||||||
|
if not self._exclude_pattern(None, prefix=dirpattern):
|
||||||
|
logger.warning('no previously-included directories found '
|
||||||
|
'matching %r', dirpattern)
|
||||||
|
else: # pragma: no cover
|
||||||
|
# This should never happen, as it should be caught in
|
||||||
|
# _parse_template_line
|
||||||
|
raise DistlibException(
|
||||||
|
'invalid action %r' % action)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Private API
|
||||||
|
#
|
||||||
|
|
||||||
|
def _parse_directive(self, directive):
|
||||||
|
"""
|
||||||
|
Validate a directive.
|
||||||
|
:param directive: The directive to validate.
|
||||||
|
:return: A tuple of action, patterns, thedir, dir_patterns
|
||||||
|
"""
|
||||||
|
words = directive.split()
|
||||||
|
if len(words) == 1 and words[0] not in ('include', 'exclude',
|
||||||
|
'global-include',
|
||||||
|
'global-exclude',
|
||||||
|
'recursive-include',
|
||||||
|
'recursive-exclude',
|
||||||
|
'graft', 'prune'):
|
||||||
|
# no action given, let's use the default 'include'
|
||||||
|
words.insert(0, 'include')
|
||||||
|
|
||||||
|
action = words[0]
|
||||||
|
patterns = thedir = dir_pattern = None
|
||||||
|
|
||||||
|
if action in ('include', 'exclude',
|
||||||
|
'global-include', 'global-exclude'):
|
||||||
|
if len(words) < 2:
|
||||||
|
raise DistlibException(
|
||||||
|
'%r expects <pattern1> <pattern2> ...' % action)
|
||||||
|
|
||||||
|
patterns = [convert_path(word) for word in words[1:]]
|
||||||
|
|
||||||
|
elif action in ('recursive-include', 'recursive-exclude'):
|
||||||
|
if len(words) < 3:
|
||||||
|
raise DistlibException(
|
||||||
|
'%r expects <dir> <pattern1> <pattern2> ...' % action)
|
||||||
|
|
||||||
|
thedir = convert_path(words[1])
|
||||||
|
patterns = [convert_path(word) for word in words[2:]]
|
||||||
|
|
||||||
|
elif action in ('graft', 'prune'):
|
||||||
|
if len(words) != 2:
|
||||||
|
raise DistlibException(
|
||||||
|
'%r expects a single <dir_pattern>' % action)
|
||||||
|
|
||||||
|
dir_pattern = convert_path(words[1])
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise DistlibException('unknown action %r' % action)
|
||||||
|
|
||||||
|
return action, patterns, thedir, dir_pattern
|
||||||
|
|
||||||
|
def _include_pattern(self, pattern, anchor=True, prefix=None,
|
||||||
|
is_regex=False):
|
||||||
|
"""Select strings (presumably filenames) from 'self.files' that
|
||||||
|
match 'pattern', a Unix-style wildcard (glob) pattern.
|
||||||
|
|
||||||
|
Patterns are not quite the same as implemented by the 'fnmatch'
|
||||||
|
module: '*' and '?' match non-special characters, where "special"
|
||||||
|
is platform-dependent: slash on Unix; colon, slash, and backslash on
|
||||||
|
DOS/Windows; and colon on Mac OS.
|
||||||
|
|
||||||
|
If 'anchor' is true (the default), then the pattern match is more
|
||||||
|
stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
|
||||||
|
'anchor' is false, both of these will match.
|
||||||
|
|
||||||
|
If 'prefix' is supplied, then only filenames starting with 'prefix'
|
||||||
|
(itself a pattern) and ending with 'pattern', with anything in between
|
||||||
|
them, will match. 'anchor' is ignored in this case.
|
||||||
|
|
||||||
|
If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
|
||||||
|
'pattern' is assumed to be either a string containing a regex or a
|
||||||
|
regex object -- no translation is done, the regex is just compiled
|
||||||
|
and used as-is.
|
||||||
|
|
||||||
|
Selected strings will be added to self.files.
|
||||||
|
|
||||||
|
Return True if files are found.
|
||||||
|
"""
|
||||||
|
# XXX docstring lying about what the special chars are?
|
||||||
|
found = False
|
||||||
|
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
|
||||||
|
|
||||||
|
# delayed loading of allfiles list
|
||||||
|
if self.allfiles is None:
|
||||||
|
self.findall()
|
||||||
|
|
||||||
|
for name in self.allfiles:
|
||||||
|
if pattern_re.search(name):
|
||||||
|
self.files.add(name)
|
||||||
|
found = True
|
||||||
|
return found
|
||||||
|
|
||||||
|
def _exclude_pattern(self, pattern, anchor=True, prefix=None,
|
||||||
|
is_regex=False):
|
||||||
|
"""Remove strings (presumably filenames) from 'files' that match
|
||||||
|
'pattern'.
|
||||||
|
|
||||||
|
Other parameters are the same as for 'include_pattern()', above.
|
||||||
|
The list 'self.files' is modified in place. Return True if files are
|
||||||
|
found.
|
||||||
|
|
||||||
|
This API is public to allow e.g. exclusion of SCM subdirs, e.g. when
|
||||||
|
packaging source distributions
|
||||||
|
"""
|
||||||
|
found = False
|
||||||
|
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
|
||||||
|
for f in list(self.files):
|
||||||
|
if pattern_re.search(f):
|
||||||
|
self.files.remove(f)
|
||||||
|
found = True
|
||||||
|
return found
|
||||||
|
|
||||||
|
def _translate_pattern(self, pattern, anchor=True, prefix=None,
|
||||||
|
is_regex=False):
|
||||||
|
"""Translate a shell-like wildcard pattern to a compiled regular
|
||||||
|
expression.
|
||||||
|
|
||||||
|
Return the compiled regex. If 'is_regex' true,
|
||||||
|
then 'pattern' is directly compiled to a regex (if it's a string)
|
||||||
|
or just returned as-is (assumes it's a regex object).
|
||||||
|
"""
|
||||||
|
if is_regex:
|
||||||
|
if isinstance(pattern, str):
|
||||||
|
return re.compile(pattern)
|
||||||
|
else:
|
||||||
|
return pattern
|
||||||
|
|
||||||
|
if pattern:
|
||||||
|
pattern_re = self._glob_to_re(pattern)
|
||||||
|
else:
|
||||||
|
pattern_re = ''
|
||||||
|
|
||||||
|
base = re.escape(os.path.join(self.base, ''))
|
||||||
|
if prefix is not None:
|
||||||
|
# ditch end of pattern character
|
||||||
|
empty_pattern = self._glob_to_re('')
|
||||||
|
prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
|
||||||
|
sep = os.sep
|
||||||
|
if os.sep == '\\':
|
||||||
|
sep = r'\\'
|
||||||
|
pattern_re = '^' + base + sep.join((prefix_re,
|
||||||
|
'.*' + pattern_re))
|
||||||
|
else: # no prefix -- respect anchor flag
|
||||||
|
if anchor:
|
||||||
|
pattern_re = '^' + base + pattern_re
|
||||||
|
|
||||||
|
return re.compile(pattern_re)
|
||||||
|
|
||||||
|
def _glob_to_re(self, pattern):
|
||||||
|
"""Translate a shell-like glob pattern to a regular expression.
|
||||||
|
|
||||||
|
Return a string containing the regex. Differs from
|
||||||
|
'fnmatch.translate()' in that '*' does not match "special characters"
|
||||||
|
(which are platform-specific).
|
||||||
|
"""
|
||||||
|
pattern_re = fnmatch.translate(pattern)
|
||||||
|
|
||||||
|
# '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
|
||||||
|
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
|
||||||
|
# and by extension they shouldn't match such "special characters" under
|
||||||
|
# any OS. So change all non-escaped dots in the RE to match any
|
||||||
|
# character except the special characters (currently: just os.sep).
|
||||||
|
sep = os.sep
|
||||||
|
if os.sep == '\\':
|
||||||
|
# we're using a regex to manipulate a regex, so we need
|
||||||
|
# to escape the backslash twice
|
||||||
|
sep = r'\\\\'
|
||||||
|
escaped = r'\1[^%s]' % sep
|
||||||
|
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
|
||||||
|
return pattern_re
|
|
@ -0,0 +1,190 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2012-2013 Vinay Sajip.
|
||||||
|
# Licensed to the Python Software Foundation under a contributor agreement.
|
||||||
|
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||||
|
#
|
||||||
|
"""Parser for the environment markers micro-language defined in PEP 345."""
|
||||||
|
|
||||||
|
import ast
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import platform
|
||||||
|
|
||||||
|
from .compat import python_implementation, string_types
|
||||||
|
from .util import in_venv
|
||||||
|
|
||||||
|
__all__ = ['interpret']
|
||||||
|
|
||||||
|
|
||||||
|
class Evaluator(object):
|
||||||
|
"""
|
||||||
|
A limited evaluator for Python expressions.
|
||||||
|
"""
|
||||||
|
|
||||||
|
operators = {
|
||||||
|
'eq': lambda x, y: x == y,
|
||||||
|
'gt': lambda x, y: x > y,
|
||||||
|
'gte': lambda x, y: x >= y,
|
||||||
|
'in': lambda x, y: x in y,
|
||||||
|
'lt': lambda x, y: x < y,
|
||||||
|
'lte': lambda x, y: x <= y,
|
||||||
|
'not': lambda x: not x,
|
||||||
|
'noteq': lambda x, y: x != y,
|
||||||
|
'notin': lambda x, y: x not in y,
|
||||||
|
}
|
||||||
|
|
||||||
|
allowed_values = {
|
||||||
|
'sys_platform': sys.platform,
|
||||||
|
'python_version': '%s.%s' % sys.version_info[:2],
|
||||||
|
# parsing sys.platform is not reliable, but there is no other
|
||||||
|
# way to get e.g. 2.7.2+, and the PEP is defined with sys.version
|
||||||
|
'python_full_version': sys.version.split(' ', 1)[0],
|
||||||
|
'os_name': os.name,
|
||||||
|
'platform_in_venv': str(in_venv()),
|
||||||
|
'platform_release': platform.release(),
|
||||||
|
'platform_version': platform.version(),
|
||||||
|
'platform_machine': platform.machine(),
|
||||||
|
'platform_python_implementation': python_implementation(),
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, context=None):
|
||||||
|
"""
|
||||||
|
Initialise an instance.
|
||||||
|
|
||||||
|
:param context: If specified, names are looked up in this mapping.
|
||||||
|
"""
|
||||||
|
self.context = context or {}
|
||||||
|
self.source = None
|
||||||
|
|
||||||
|
def get_fragment(self, offset):
|
||||||
|
"""
|
||||||
|
Get the part of the source which is causing a problem.
|
||||||
|
"""
|
||||||
|
fragment_len = 10
|
||||||
|
s = '%r' % (self.source[offset:offset + fragment_len])
|
||||||
|
if offset + fragment_len < len(self.source):
|
||||||
|
s += '...'
|
||||||
|
return s
|
||||||
|
|
||||||
|
def get_handler(self, node_type):
|
||||||
|
"""
|
||||||
|
Get a handler for the specified AST node type.
|
||||||
|
"""
|
||||||
|
return getattr(self, 'do_%s' % node_type, None)
|
||||||
|
|
||||||
|
def evaluate(self, node, filename=None):
|
||||||
|
"""
|
||||||
|
Evaluate a source string or node, using ``filename`` when
|
||||||
|
displaying errors.
|
||||||
|
"""
|
||||||
|
if isinstance(node, string_types):
|
||||||
|
self.source = node
|
||||||
|
kwargs = {'mode': 'eval'}
|
||||||
|
if filename:
|
||||||
|
kwargs['filename'] = filename
|
||||||
|
try:
|
||||||
|
node = ast.parse(node, **kwargs)
|
||||||
|
except SyntaxError as e:
|
||||||
|
s = self.get_fragment(e.offset)
|
||||||
|
raise SyntaxError('syntax error %s' % s)
|
||||||
|
node_type = node.__class__.__name__.lower()
|
||||||
|
handler = self.get_handler(node_type)
|
||||||
|
if handler is None:
|
||||||
|
if self.source is None:
|
||||||
|
s = '(source not available)'
|
||||||
|
else:
|
||||||
|
s = self.get_fragment(node.col_offset)
|
||||||
|
raise SyntaxError("don't know how to evaluate %r %s" % (
|
||||||
|
node_type, s))
|
||||||
|
return handler(node)
|
||||||
|
|
||||||
|
def get_attr_key(self, node):
|
||||||
|
assert isinstance(node, ast.Attribute), 'attribute node expected'
|
||||||
|
return '%s.%s' % (node.value.id, node.attr)
|
||||||
|
|
||||||
|
def do_attribute(self, node):
|
||||||
|
if not isinstance(node.value, ast.Name):
|
||||||
|
valid = False
|
||||||
|
else:
|
||||||
|
key = self.get_attr_key(node)
|
||||||
|
valid = key in self.context or key in self.allowed_values
|
||||||
|
if not valid:
|
||||||
|
raise SyntaxError('invalid expression: %s' % key)
|
||||||
|
if key in self.context:
|
||||||
|
result = self.context[key]
|
||||||
|
else:
|
||||||
|
result = self.allowed_values[key]
|
||||||
|
return result
|
||||||
|
|
||||||
|
def do_boolop(self, node):
|
||||||
|
result = self.evaluate(node.values[0])
|
||||||
|
is_or = node.op.__class__ is ast.Or
|
||||||
|
is_and = node.op.__class__ is ast.And
|
||||||
|
assert is_or or is_and
|
||||||
|
if (is_and and result) or (is_or and not result):
|
||||||
|
for n in node.values[1:]:
|
||||||
|
result = self.evaluate(n)
|
||||||
|
if (is_or and result) or (is_and and not result):
|
||||||
|
break
|
||||||
|
return result
|
||||||
|
|
||||||
|
def do_compare(self, node):
|
||||||
|
def sanity_check(lhsnode, rhsnode):
|
||||||
|
valid = True
|
||||||
|
if isinstance(lhsnode, ast.Str) and isinstance(rhsnode, ast.Str):
|
||||||
|
valid = False
|
||||||
|
#elif (isinstance(lhsnode, ast.Attribute)
|
||||||
|
# and isinstance(rhsnode, ast.Attribute)):
|
||||||
|
# klhs = self.get_attr_key(lhsnode)
|
||||||
|
# krhs = self.get_attr_key(rhsnode)
|
||||||
|
# valid = klhs != krhs
|
||||||
|
if not valid:
|
||||||
|
s = self.get_fragment(node.col_offset)
|
||||||
|
raise SyntaxError('Invalid comparison: %s' % s)
|
||||||
|
|
||||||
|
lhsnode = node.left
|
||||||
|
lhs = self.evaluate(lhsnode)
|
||||||
|
result = True
|
||||||
|
for op, rhsnode in zip(node.ops, node.comparators):
|
||||||
|
sanity_check(lhsnode, rhsnode)
|
||||||
|
op = op.__class__.__name__.lower()
|
||||||
|
if op not in self.operators:
|
||||||
|
raise SyntaxError('unsupported operation: %r' % op)
|
||||||
|
rhs = self.evaluate(rhsnode)
|
||||||
|
result = self.operators[op](lhs, rhs)
|
||||||
|
if not result:
|
||||||
|
break
|
||||||
|
lhs = rhs
|
||||||
|
lhsnode = rhsnode
|
||||||
|
return result
|
||||||
|
|
||||||
|
def do_expression(self, node):
|
||||||
|
return self.evaluate(node.body)
|
||||||
|
|
||||||
|
def do_name(self, node):
|
||||||
|
valid = False
|
||||||
|
if node.id in self.context:
|
||||||
|
valid = True
|
||||||
|
result = self.context[node.id]
|
||||||
|
elif node.id in self.allowed_values:
|
||||||
|
valid = True
|
||||||
|
result = self.allowed_values[node.id]
|
||||||
|
if not valid:
|
||||||
|
raise SyntaxError('invalid expression: %s' % node.id)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def do_str(self, node):
|
||||||
|
return node.s
|
||||||
|
|
||||||
|
|
||||||
|
def interpret(marker, execution_context=None):
|
||||||
|
"""
|
||||||
|
Interpret a marker and return a result depending on environment.
|
||||||
|
|
||||||
|
:param marker: The marker to interpret.
|
||||||
|
:type marker: str
|
||||||
|
:param execution_context: The context used for name lookup.
|
||||||
|
:type execution_context: mapping
|
||||||
|
"""
|
||||||
|
return Evaluator(execution_context).evaluate(marker.strip())
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,323 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013 Vinay Sajip.
|
||||||
|
# Licensed to the Python Software Foundation under a contributor agreement.
|
||||||
|
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import bisect
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import pkgutil
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
import zipimport
|
||||||
|
|
||||||
|
from . import DistlibException
|
||||||
|
from .util import cached_property, get_cache_base, path_to_cache_dir, Cache
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
cache = None # created when needed
|
||||||
|
|
||||||
|
|
||||||
|
class ResourceCache(Cache):
|
||||||
|
def __init__(self, base=None):
|
||||||
|
if base is None:
|
||||||
|
# Use native string to avoid issues on 2.x: see Python #20140.
|
||||||
|
base = os.path.join(get_cache_base(), str('resource-cache'))
|
||||||
|
super(ResourceCache, self).__init__(base)
|
||||||
|
|
||||||
|
def is_stale(self, resource, path):
|
||||||
|
"""
|
||||||
|
Is the cache stale for the given resource?
|
||||||
|
|
||||||
|
:param resource: The :class:`Resource` being cached.
|
||||||
|
:param path: The path of the resource in the cache.
|
||||||
|
:return: True if the cache is stale.
|
||||||
|
"""
|
||||||
|
# Cache invalidation is a hard problem :-)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get(self, resource):
|
||||||
|
"""
|
||||||
|
Get a resource into the cache,
|
||||||
|
|
||||||
|
:param resource: A :class:`Resource` instance.
|
||||||
|
:return: The pathname of the resource in the cache.
|
||||||
|
"""
|
||||||
|
prefix, path = resource.finder.get_cache_info(resource)
|
||||||
|
if prefix is None:
|
||||||
|
result = path
|
||||||
|
else:
|
||||||
|
result = os.path.join(self.base, self.prefix_to_dir(prefix), path)
|
||||||
|
dirname = os.path.dirname(result)
|
||||||
|
if not os.path.isdir(dirname):
|
||||||
|
os.makedirs(dirname)
|
||||||
|
if not os.path.exists(result):
|
||||||
|
stale = True
|
||||||
|
else:
|
||||||
|
stale = self.is_stale(resource, path)
|
||||||
|
if stale:
|
||||||
|
# write the bytes of the resource to the cache location
|
||||||
|
with open(result, 'wb') as f:
|
||||||
|
f.write(resource.bytes)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class ResourceBase(object):
|
||||||
|
def __init__(self, finder, name):
|
||||||
|
self.finder = finder
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
|
||||||
|
class Resource(ResourceBase):
|
||||||
|
"""
|
||||||
|
A class representing an in-package resource, such as a data file. This is
|
||||||
|
not normally instantiated by user code, but rather by a
|
||||||
|
:class:`ResourceFinder` which manages the resource.
|
||||||
|
"""
|
||||||
|
is_container = False # Backwards compatibility
|
||||||
|
|
||||||
|
def as_stream(self):
|
||||||
|
"""
|
||||||
|
Get the resource as a stream.
|
||||||
|
|
||||||
|
This is not a property to make it obvious that it returns a new stream
|
||||||
|
each time.
|
||||||
|
"""
|
||||||
|
return self.finder.get_stream(self)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def file_path(self):
|
||||||
|
global cache
|
||||||
|
if cache is None:
|
||||||
|
cache = ResourceCache()
|
||||||
|
return cache.get(self)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def bytes(self):
|
||||||
|
return self.finder.get_bytes(self)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def size(self):
|
||||||
|
return self.finder.get_size(self)
|
||||||
|
|
||||||
|
|
||||||
|
class ResourceContainer(ResourceBase):
|
||||||
|
is_container = True # Backwards compatibility
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def resources(self):
|
||||||
|
return self.finder.get_resources(self)
|
||||||
|
|
||||||
|
|
||||||
|
class ResourceFinder(object):
|
||||||
|
"""
|
||||||
|
Resource finder for file system resources.
|
||||||
|
"""
|
||||||
|
def __init__(self, module):
|
||||||
|
self.module = module
|
||||||
|
self.loader = getattr(module, '__loader__', None)
|
||||||
|
self.base = os.path.dirname(getattr(module, '__file__', ''))
|
||||||
|
|
||||||
|
def _adjust_path(self, path):
|
||||||
|
return os.path.realpath(path)
|
||||||
|
|
||||||
|
def _make_path(self, resource_name):
|
||||||
|
# Issue #50: need to preserve type of path on Python 2.x
|
||||||
|
# like os.path._get_sep
|
||||||
|
if isinstance(resource_name, bytes): # should only happen on 2.x
|
||||||
|
sep = b'/'
|
||||||
|
else:
|
||||||
|
sep = '/'
|
||||||
|
parts = resource_name.split(sep)
|
||||||
|
parts.insert(0, self.base)
|
||||||
|
result = os.path.join(*parts)
|
||||||
|
return self._adjust_path(result)
|
||||||
|
|
||||||
|
def _find(self, path):
|
||||||
|
return os.path.exists(path)
|
||||||
|
|
||||||
|
def get_cache_info(self, resource):
|
||||||
|
return None, resource.path
|
||||||
|
|
||||||
|
def find(self, resource_name):
|
||||||
|
path = self._make_path(resource_name)
|
||||||
|
if not self._find(path):
|
||||||
|
result = None
|
||||||
|
else:
|
||||||
|
if self._is_directory(path):
|
||||||
|
result = ResourceContainer(self, resource_name)
|
||||||
|
else:
|
||||||
|
result = Resource(self, resource_name)
|
||||||
|
result.path = path
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_stream(self, resource):
|
||||||
|
return open(resource.path, 'rb')
|
||||||
|
|
||||||
|
def get_bytes(self, resource):
|
||||||
|
with open(resource.path, 'rb') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def get_size(self, resource):
|
||||||
|
return os.path.getsize(resource.path)
|
||||||
|
|
||||||
|
def get_resources(self, resource):
|
||||||
|
def allowed(f):
|
||||||
|
return f != '__pycache__' and not f.endswith(('.pyc', '.pyo'))
|
||||||
|
return set([f for f in os.listdir(resource.path) if allowed(f)])
|
||||||
|
|
||||||
|
def is_container(self, resource):
|
||||||
|
return self._is_directory(resource.path)
|
||||||
|
|
||||||
|
_is_directory = staticmethod(os.path.isdir)
|
||||||
|
|
||||||
|
|
||||||
|
class ZipResourceFinder(ResourceFinder):
|
||||||
|
"""
|
||||||
|
Resource finder for resources in .zip files.
|
||||||
|
"""
|
||||||
|
def __init__(self, module):
|
||||||
|
super(ZipResourceFinder, self).__init__(module)
|
||||||
|
archive = self.loader.archive
|
||||||
|
self.prefix_len = 1 + len(archive)
|
||||||
|
# PyPy doesn't have a _files attr on zipimporter, and you can't set one
|
||||||
|
if hasattr(self.loader, '_files'):
|
||||||
|
self._files = self.loader._files
|
||||||
|
else:
|
||||||
|
self._files = zipimport._zip_directory_cache[archive]
|
||||||
|
self.index = sorted(self._files)
|
||||||
|
|
||||||
|
def _adjust_path(self, path):
|
||||||
|
return path
|
||||||
|
|
||||||
|
def _find(self, path):
|
||||||
|
path = path[self.prefix_len:]
|
||||||
|
if path in self._files:
|
||||||
|
result = True
|
||||||
|
else:
|
||||||
|
if path and path[-1] != os.sep:
|
||||||
|
path = path + os.sep
|
||||||
|
i = bisect.bisect(self.index, path)
|
||||||
|
try:
|
||||||
|
result = self.index[i].startswith(path)
|
||||||
|
except IndexError:
|
||||||
|
result = False
|
||||||
|
if not result:
|
||||||
|
logger.debug('_find failed: %r %r', path, self.loader.prefix)
|
||||||
|
else:
|
||||||
|
logger.debug('_find worked: %r %r', path, self.loader.prefix)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_cache_info(self, resource):
|
||||||
|
prefix = self.loader.archive
|
||||||
|
path = resource.path[1 + len(prefix):]
|
||||||
|
return prefix, path
|
||||||
|
|
||||||
|
def get_bytes(self, resource):
|
||||||
|
return self.loader.get_data(resource.path)
|
||||||
|
|
||||||
|
def get_stream(self, resource):
|
||||||
|
return io.BytesIO(self.get_bytes(resource))
|
||||||
|
|
||||||
|
def get_size(self, resource):
|
||||||
|
path = resource.path[self.prefix_len:]
|
||||||
|
return self._files[path][3]
|
||||||
|
|
||||||
|
def get_resources(self, resource):
|
||||||
|
path = resource.path[self.prefix_len:]
|
||||||
|
if path and path[-1] != os.sep:
|
||||||
|
path += os.sep
|
||||||
|
plen = len(path)
|
||||||
|
result = set()
|
||||||
|
i = bisect.bisect(self.index, path)
|
||||||
|
while i < len(self.index):
|
||||||
|
if not self.index[i].startswith(path):
|
||||||
|
break
|
||||||
|
s = self.index[i][plen:]
|
||||||
|
result.add(s.split(os.sep, 1)[0]) # only immediate children
|
||||||
|
i += 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _is_directory(self, path):
|
||||||
|
path = path[self.prefix_len:]
|
||||||
|
if path and path[-1] != os.sep:
|
||||||
|
path += os.sep
|
||||||
|
i = bisect.bisect(self.index, path)
|
||||||
|
try:
|
||||||
|
result = self.index[i].startswith(path)
|
||||||
|
except IndexError:
|
||||||
|
result = False
|
||||||
|
return result
|
||||||
|
|
||||||
|
_finder_registry = {
|
||||||
|
type(None): ResourceFinder,
|
||||||
|
zipimport.zipimporter: ZipResourceFinder
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
import _frozen_importlib
|
||||||
|
_finder_registry[_frozen_importlib.SourceFileLoader] = ResourceFinder
|
||||||
|
_finder_registry[_frozen_importlib.FileFinder] = ResourceFinder
|
||||||
|
except (ImportError, AttributeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def register_finder(loader, finder_maker):
|
||||||
|
_finder_registry[type(loader)] = finder_maker
|
||||||
|
|
||||||
|
_finder_cache = {}
|
||||||
|
|
||||||
|
|
||||||
|
def finder(package):
|
||||||
|
"""
|
||||||
|
Return a resource finder for a package.
|
||||||
|
:param package: The name of the package.
|
||||||
|
:return: A :class:`ResourceFinder` instance for the package.
|
||||||
|
"""
|
||||||
|
if package in _finder_cache:
|
||||||
|
result = _finder_cache[package]
|
||||||
|
else:
|
||||||
|
if package not in sys.modules:
|
||||||
|
__import__(package)
|
||||||
|
module = sys.modules[package]
|
||||||
|
path = getattr(module, '__path__', None)
|
||||||
|
if path is None:
|
||||||
|
raise DistlibException('You cannot get a finder for a module, '
|
||||||
|
'only for a package')
|
||||||
|
loader = getattr(module, '__loader__', None)
|
||||||
|
finder_maker = _finder_registry.get(type(loader))
|
||||||
|
if finder_maker is None:
|
||||||
|
raise DistlibException('Unable to locate finder for %r' % package)
|
||||||
|
result = finder_maker(module)
|
||||||
|
_finder_cache[package] = result
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
_dummy_module = types.ModuleType(str('__dummy__'))
|
||||||
|
|
||||||
|
|
||||||
|
def finder_for_path(path):
|
||||||
|
"""
|
||||||
|
Return a resource finder for a path, which should represent a container.
|
||||||
|
|
||||||
|
:param path: The path.
|
||||||
|
:return: A :class:`ResourceFinder` instance for the path.
|
||||||
|
"""
|
||||||
|
result = None
|
||||||
|
# calls any path hooks, gets importer into cache
|
||||||
|
pkgutil.get_importer(path)
|
||||||
|
loader = sys.path_importer_cache.get(path)
|
||||||
|
finder = _finder_registry.get(type(loader))
|
||||||
|
if finder:
|
||||||
|
module = _dummy_module
|
||||||
|
module.__file__ = os.path.join(path, '')
|
||||||
|
module.__loader__ = loader
|
||||||
|
result = finder(module)
|
||||||
|
return result
|
|
@ -0,0 +1,335 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2014 Vinay Sajip.
|
||||||
|
# Licensed to the Python Software Foundation under a contributor agreement.
|
||||||
|
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||||
|
#
|
||||||
|
from io import BytesIO
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import struct
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from .compat import sysconfig, detect_encoding, ZipFile
|
||||||
|
from .resources import finder
|
||||||
|
from .util import (FileOperator, get_export_entry, convert_path,
|
||||||
|
get_executable, in_venv)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_DEFAULT_MANIFEST = '''
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
|
||||||
|
<assemblyIdentity version="1.0.0.0"
|
||||||
|
processorArchitecture="X86"
|
||||||
|
name="%s"
|
||||||
|
type="win32"/>
|
||||||
|
|
||||||
|
<!-- Identify the application security requirements. -->
|
||||||
|
<trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
|
||||||
|
<security>
|
||||||
|
<requestedPrivileges>
|
||||||
|
<requestedExecutionLevel level="asInvoker" uiAccess="false"/>
|
||||||
|
</requestedPrivileges>
|
||||||
|
</security>
|
||||||
|
</trustInfo>
|
||||||
|
</assembly>'''.strip()
|
||||||
|
|
||||||
|
# check if Python is called on the first line with this expression
|
||||||
|
FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$')
|
||||||
|
SCRIPT_TEMPLATE = '''# -*- coding: utf-8 -*-
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys, re
|
||||||
|
|
||||||
|
def _resolve(module, func):
|
||||||
|
__import__(module)
|
||||||
|
mod = sys.modules[module]
|
||||||
|
parts = func.split('.')
|
||||||
|
result = getattr(mod, parts.pop(0))
|
||||||
|
for p in parts:
|
||||||
|
result = getattr(result, p)
|
||||||
|
return result
|
||||||
|
|
||||||
|
try:
|
||||||
|
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||||
|
|
||||||
|
func = _resolve('%(module)s', '%(func)s')
|
||||||
|
rc = func() # None interpreted as 0
|
||||||
|
except Exception as e: # only supporting Python >= 2.6
|
||||||
|
sys.stderr.write('%%s\\n' %% e)
|
||||||
|
rc = 1
|
||||||
|
sys.exit(rc)
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
class ScriptMaker(object):
|
||||||
|
"""
|
||||||
|
A class to copy or create scripts from source scripts or callable
|
||||||
|
specifications.
|
||||||
|
"""
|
||||||
|
script_template = SCRIPT_TEMPLATE
|
||||||
|
|
||||||
|
executable = None # for shebangs
|
||||||
|
|
||||||
|
def __init__(self, source_dir, target_dir, add_launchers=True,
|
||||||
|
dry_run=False, fileop=None):
|
||||||
|
self.source_dir = source_dir
|
||||||
|
self.target_dir = target_dir
|
||||||
|
self.add_launchers = add_launchers
|
||||||
|
self.force = False
|
||||||
|
self.clobber = False
|
||||||
|
# It only makes sense to set mode bits on POSIX.
|
||||||
|
self.set_mode = (os.name == 'posix')
|
||||||
|
self.variants = set(('', 'X.Y'))
|
||||||
|
self._fileop = fileop or FileOperator(dry_run)
|
||||||
|
|
||||||
|
def _get_alternate_executable(self, executable, options):
|
||||||
|
if options.get('gui', False) and os.name == 'nt':
|
||||||
|
dn, fn = os.path.split(executable)
|
||||||
|
fn = fn.replace('python', 'pythonw')
|
||||||
|
executable = os.path.join(dn, fn)
|
||||||
|
return executable
|
||||||
|
|
||||||
|
def _get_shebang(self, encoding, post_interp=b'', options=None):
|
||||||
|
enquote = True
|
||||||
|
if self.executable:
|
||||||
|
executable = self.executable
|
||||||
|
enquote = False # assume this will be taken care of
|
||||||
|
elif not sysconfig.is_python_build():
|
||||||
|
executable = get_executable()
|
||||||
|
elif in_venv():
|
||||||
|
executable = os.path.join(sysconfig.get_path('scripts'),
|
||||||
|
'python%s' % sysconfig.get_config_var('EXE'))
|
||||||
|
else:
|
||||||
|
executable = os.path.join(
|
||||||
|
sysconfig.get_config_var('BINDIR'),
|
||||||
|
'python%s%s' % (sysconfig.get_config_var('VERSION'),
|
||||||
|
sysconfig.get_config_var('EXE')))
|
||||||
|
if options:
|
||||||
|
executable = self._get_alternate_executable(executable, options)
|
||||||
|
|
||||||
|
# If the user didn't specify an executable, it may be necessary to
|
||||||
|
# cater for executable paths with spaces (not uncommon on Windows)
|
||||||
|
if enquote and ' ' in executable:
|
||||||
|
executable = '"%s"' % executable
|
||||||
|
# Issue #51: don't use fsencode, since we later try to
|
||||||
|
# check that the shebang is decodable using utf-8.
|
||||||
|
executable = executable.encode('utf-8')
|
||||||
|
# in case of IronPython, play safe and enable frames support
|
||||||
|
if (sys.platform == 'cli' and '-X:Frames' not in post_interp
|
||||||
|
and '-X:FullFrames' not in post_interp):
|
||||||
|
post_interp += b' -X:Frames'
|
||||||
|
shebang = b'#!' + executable + post_interp + b'\n'
|
||||||
|
# Python parser starts to read a script using UTF-8 until
|
||||||
|
# it gets a #coding:xxx cookie. The shebang has to be the
|
||||||
|
# first line of a file, the #coding:xxx cookie cannot be
|
||||||
|
# written before. So the shebang has to be decodable from
|
||||||
|
# UTF-8.
|
||||||
|
try:
|
||||||
|
shebang.decode('utf-8')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
raise ValueError(
|
||||||
|
'The shebang (%r) is not decodable from utf-8' % shebang)
|
||||||
|
# If the script is encoded to a custom encoding (use a
|
||||||
|
# #coding:xxx cookie), the shebang has to be decodable from
|
||||||
|
# the script encoding too.
|
||||||
|
if encoding != 'utf-8':
|
||||||
|
try:
|
||||||
|
shebang.decode(encoding)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
raise ValueError(
|
||||||
|
'The shebang (%r) is not decodable '
|
||||||
|
'from the script encoding (%r)' % (shebang, encoding))
|
||||||
|
return shebang
|
||||||
|
|
||||||
|
def _get_script_text(self, entry):
|
||||||
|
return self.script_template % dict(module=entry.prefix,
|
||||||
|
func=entry.suffix)
|
||||||
|
|
||||||
|
manifest = _DEFAULT_MANIFEST
|
||||||
|
|
||||||
|
def get_manifest(self, exename):
|
||||||
|
base = os.path.basename(exename)
|
||||||
|
return self.manifest % base
|
||||||
|
|
||||||
|
def _write_script(self, names, shebang, script_bytes, filenames, ext):
|
||||||
|
use_launcher = self.add_launchers and os.name == 'nt'
|
||||||
|
linesep = os.linesep.encode('utf-8')
|
||||||
|
if not use_launcher:
|
||||||
|
script_bytes = shebang + linesep + script_bytes
|
||||||
|
else:
|
||||||
|
if ext == 'py':
|
||||||
|
launcher = self._get_launcher('t')
|
||||||
|
else:
|
||||||
|
launcher = self._get_launcher('w')
|
||||||
|
stream = BytesIO()
|
||||||
|
with ZipFile(stream, 'w') as zf:
|
||||||
|
zf.writestr('__main__.py', script_bytes)
|
||||||
|
zip_data = stream.getvalue()
|
||||||
|
script_bytes = launcher + shebang + linesep + zip_data
|
||||||
|
for name in names:
|
||||||
|
outname = os.path.join(self.target_dir, name)
|
||||||
|
if use_launcher:
|
||||||
|
n, e = os.path.splitext(outname)
|
||||||
|
if e.startswith('.py'):
|
||||||
|
outname = n
|
||||||
|
outname = '%s.exe' % outname
|
||||||
|
try:
|
||||||
|
self._fileop.write_binary_file(outname, script_bytes)
|
||||||
|
except Exception:
|
||||||
|
# Failed writing an executable - it might be in use.
|
||||||
|
logger.warning('Failed to write executable - trying to '
|
||||||
|
'use .deleteme logic')
|
||||||
|
dfname = '%s.deleteme' % outname
|
||||||
|
if os.path.exists(dfname):
|
||||||
|
os.remove(dfname) # Not allowed to fail here
|
||||||
|
os.rename(outname, dfname) # nor here
|
||||||
|
self._fileop.write_binary_file(outname, script_bytes)
|
||||||
|
logger.debug('Able to replace executable using '
|
||||||
|
'.deleteme logic')
|
||||||
|
try:
|
||||||
|
os.remove(dfname)
|
||||||
|
except Exception:
|
||||||
|
pass # still in use - ignore error
|
||||||
|
else:
|
||||||
|
if os.name == 'nt' and not outname.endswith('.' + ext):
|
||||||
|
outname = '%s.%s' % (outname, ext)
|
||||||
|
if os.path.exists(outname) and not self.clobber:
|
||||||
|
logger.warning('Skipping existing file %s', outname)
|
||||||
|
continue
|
||||||
|
self._fileop.write_binary_file(outname, script_bytes)
|
||||||
|
if self.set_mode:
|
||||||
|
self._fileop.set_executable_mode([outname])
|
||||||
|
filenames.append(outname)
|
||||||
|
|
||||||
|
def _make_script(self, entry, filenames, options=None):
|
||||||
|
post_interp = b''
|
||||||
|
if options:
|
||||||
|
args = options.get('interpreter_args', [])
|
||||||
|
if args:
|
||||||
|
args = ' %s' % ' '.join(args)
|
||||||
|
post_interp = args.encode('utf-8')
|
||||||
|
shebang = self._get_shebang('utf-8', post_interp, options=options)
|
||||||
|
script = self._get_script_text(entry).encode('utf-8')
|
||||||
|
name = entry.name
|
||||||
|
scriptnames = set()
|
||||||
|
if '' in self.variants:
|
||||||
|
scriptnames.add(name)
|
||||||
|
if 'X' in self.variants:
|
||||||
|
scriptnames.add('%s%s' % (name, sys.version[0]))
|
||||||
|
if 'X.Y' in self.variants:
|
||||||
|
scriptnames.add('%s-%s' % (name, sys.version[:3]))
|
||||||
|
if options and options.get('gui', False):
|
||||||
|
ext = 'pyw'
|
||||||
|
else:
|
||||||
|
ext = 'py'
|
||||||
|
self._write_script(scriptnames, shebang, script, filenames, ext)
|
||||||
|
|
||||||
|
def _copy_script(self, script, filenames):
|
||||||
|
adjust = False
|
||||||
|
script = os.path.join(self.source_dir, convert_path(script))
|
||||||
|
outname = os.path.join(self.target_dir, os.path.basename(script))
|
||||||
|
if not self.force and not self._fileop.newer(script, outname):
|
||||||
|
logger.debug('not copying %s (up-to-date)', script)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Always open the file, but ignore failures in dry-run mode --
|
||||||
|
# that way, we'll get accurate feedback if we can read the
|
||||||
|
# script.
|
||||||
|
try:
|
||||||
|
f = open(script, 'rb')
|
||||||
|
except IOError:
|
||||||
|
if not self.dry_run:
|
||||||
|
raise
|
||||||
|
f = None
|
||||||
|
else:
|
||||||
|
encoding, lines = detect_encoding(f.readline)
|
||||||
|
f.seek(0)
|
||||||
|
first_line = f.readline()
|
||||||
|
if not first_line:
|
||||||
|
logger.warning('%s: %s is an empty file (skipping)',
|
||||||
|
self.get_command_name(), script)
|
||||||
|
return
|
||||||
|
|
||||||
|
match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n'))
|
||||||
|
if match:
|
||||||
|
adjust = True
|
||||||
|
post_interp = match.group(1) or b''
|
||||||
|
|
||||||
|
if not adjust:
|
||||||
|
if f:
|
||||||
|
f.close()
|
||||||
|
self._fileop.copy_file(script, outname)
|
||||||
|
if self.set_mode:
|
||||||
|
self._fileop.set_executable_mode([outname])
|
||||||
|
filenames.append(outname)
|
||||||
|
else:
|
||||||
|
logger.info('copying and adjusting %s -> %s', script,
|
||||||
|
self.target_dir)
|
||||||
|
if not self._fileop.dry_run:
|
||||||
|
shebang = self._get_shebang(encoding, post_interp)
|
||||||
|
if b'pythonw' in first_line:
|
||||||
|
ext = 'pyw'
|
||||||
|
else:
|
||||||
|
ext = 'py'
|
||||||
|
n = os.path.basename(outname)
|
||||||
|
self._write_script([n], shebang, f.read(), filenames, ext)
|
||||||
|
if f:
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dry_run(self):
|
||||||
|
return self._fileop.dry_run
|
||||||
|
|
||||||
|
@dry_run.setter
|
||||||
|
def dry_run(self, value):
|
||||||
|
self._fileop.dry_run = value
|
||||||
|
|
||||||
|
if os.name == 'nt':
|
||||||
|
# Executable launcher support.
|
||||||
|
# Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/
|
||||||
|
|
||||||
|
def _get_launcher(self, kind):
|
||||||
|
if struct.calcsize('P') == 8: # 64-bit
|
||||||
|
bits = '64'
|
||||||
|
else:
|
||||||
|
bits = '32'
|
||||||
|
name = '%s%s.exe' % (kind, bits)
|
||||||
|
# Issue 31: don't hardcode an absolute package name, but
|
||||||
|
# determine it relative to the current package
|
||||||
|
distlib_package = __name__.rsplit('.', 1)[0]
|
||||||
|
result = finder(distlib_package).find(name).bytes
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Public API follows
|
||||||
|
|
||||||
|
def make(self, specification, options=None):
|
||||||
|
"""
|
||||||
|
Make a script.
|
||||||
|
|
||||||
|
:param specification: The specification, which is either a valid export
|
||||||
|
entry specification (to make a script from a
|
||||||
|
callable) or a filename (to make a script by
|
||||||
|
copying from a source location).
|
||||||
|
:param options: A dictionary of options controlling script generation.
|
||||||
|
:return: A list of all absolute pathnames written to.
|
||||||
|
"""
|
||||||
|
filenames = []
|
||||||
|
entry = get_export_entry(specification)
|
||||||
|
if entry is None:
|
||||||
|
self._copy_script(specification, filenames)
|
||||||
|
else:
|
||||||
|
self._make_script(entry, filenames, options=options)
|
||||||
|
return filenames
|
||||||
|
|
||||||
|
def make_multiple(self, specifications, options=None):
|
||||||
|
"""
|
||||||
|
Take a list of specifications and make scripts from them,
|
||||||
|
:param specifications: A list of specifications.
|
||||||
|
:return: A list of all absolute pathnames written to,
|
||||||
|
"""
|
||||||
|
filenames = []
|
||||||
|
for specification in specifications:
|
||||||
|
filenames.extend(self.make(specification, options))
|
||||||
|
return filenames
|
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,742 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2012-2014 The Python Software Foundation.
|
||||||
|
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||||
|
#
|
||||||
|
"""
|
||||||
|
Implementation of a flexible versioning scheme providing support for PEP-386,
|
||||||
|
distribute-compatible and semantic versioning.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .compat import string_types
|
||||||
|
|
||||||
|
__all__ = ['NormalizedVersion', 'NormalizedMatcher',
|
||||||
|
'LegacyVersion', 'LegacyMatcher',
|
||||||
|
'SemanticVersion', 'SemanticMatcher',
|
||||||
|
'UnsupportedVersionError', 'get_scheme']
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedVersionError(ValueError):
|
||||||
|
"""This is an unsupported version."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Version(object):
|
||||||
|
def __init__(self, s):
|
||||||
|
self._string = s = s.strip()
|
||||||
|
self._parts = parts = self.parse(s)
|
||||||
|
assert isinstance(parts, tuple)
|
||||||
|
assert len(parts) > 0
|
||||||
|
|
||||||
|
def parse(self, s):
|
||||||
|
raise NotImplementedError('please implement in a subclass')
|
||||||
|
|
||||||
|
def _check_compatible(self, other):
|
||||||
|
if type(self) != type(other):
|
||||||
|
raise TypeError('cannot compare %r and %r' % (self, other))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
self._check_compatible(other)
|
||||||
|
return self._parts == other._parts
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self.__eq__(other)
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
self._check_compatible(other)
|
||||||
|
return self._parts < other._parts
|
||||||
|
|
||||||
|
def __gt__(self, other):
|
||||||
|
return not (self.__lt__(other) or self.__eq__(other))
|
||||||
|
|
||||||
|
def __le__(self, other):
|
||||||
|
return self.__lt__(other) or self.__eq__(other)
|
||||||
|
|
||||||
|
def __ge__(self, other):
|
||||||
|
return self.__gt__(other) or self.__eq__(other)
|
||||||
|
|
||||||
|
# See http://docs.python.org/reference/datamodel#object.__hash__
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self._parts)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "%s('%s')" % (self.__class__.__name__, self._string)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self._string
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_prerelease(self):
|
||||||
|
raise NotImplementedError('Please implement in subclasses.')
|
||||||
|
|
||||||
|
|
||||||
|
class Matcher(object):
|
||||||
|
version_class = None
|
||||||
|
|
||||||
|
dist_re = re.compile(r"^(\w[\s\w'.-]*)(\((.*)\))?")
|
||||||
|
comp_re = re.compile(r'^(<=|>=|<|>|!=|={2,3}|~=)?\s*([^\s,]+)$')
|
||||||
|
num_re = re.compile(r'^\d+(\.\d+)*$')
|
||||||
|
|
||||||
|
# value is either a callable or the name of a method
|
||||||
|
_operators = {
|
||||||
|
'<': lambda v, c, p: v < c,
|
||||||
|
'>': lambda v, c, p: v > c,
|
||||||
|
'<=': lambda v, c, p: v == c or v < c,
|
||||||
|
'>=': lambda v, c, p: v == c or v > c,
|
||||||
|
'==': lambda v, c, p: v == c,
|
||||||
|
'===': lambda v, c, p: v == c,
|
||||||
|
# by default, compatible => >=.
|
||||||
|
'~=': lambda v, c, p: v == c or v > c,
|
||||||
|
'!=': lambda v, c, p: v != c,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, s):
|
||||||
|
if self.version_class is None:
|
||||||
|
raise ValueError('Please specify a version class')
|
||||||
|
self._string = s = s.strip()
|
||||||
|
m = self.dist_re.match(s)
|
||||||
|
if not m:
|
||||||
|
raise ValueError('Not valid: %r' % s)
|
||||||
|
groups = m.groups('')
|
||||||
|
self.name = groups[0].strip()
|
||||||
|
self.key = self.name.lower() # for case-insensitive comparisons
|
||||||
|
clist = []
|
||||||
|
if groups[2]:
|
||||||
|
constraints = [c.strip() for c in groups[2].split(',')]
|
||||||
|
for c in constraints:
|
||||||
|
m = self.comp_re.match(c)
|
||||||
|
if not m:
|
||||||
|
raise ValueError('Invalid %r in %r' % (c, s))
|
||||||
|
groups = m.groups()
|
||||||
|
op = groups[0] or '~='
|
||||||
|
s = groups[1]
|
||||||
|
if s.endswith('.*'):
|
||||||
|
if op not in ('==', '!='):
|
||||||
|
raise ValueError('\'.*\' not allowed for '
|
||||||
|
'%r constraints' % op)
|
||||||
|
# Could be a partial version (e.g. for '2.*') which
|
||||||
|
# won't parse as a version, so keep it as a string
|
||||||
|
vn, prefix = s[:-2], True
|
||||||
|
if not self.num_re.match(vn):
|
||||||
|
# Just to check that vn is a valid version
|
||||||
|
self.version_class(vn)
|
||||||
|
else:
|
||||||
|
# Should parse as a version, so we can create an
|
||||||
|
# instance for the comparison
|
||||||
|
vn, prefix = self.version_class(s), False
|
||||||
|
clist.append((op, vn, prefix))
|
||||||
|
self._parts = tuple(clist)
|
||||||
|
|
||||||
|
def match(self, version):
|
||||||
|
"""
|
||||||
|
Check if the provided version matches the constraints.
|
||||||
|
|
||||||
|
:param version: The version to match against this instance.
|
||||||
|
:type version: Strring or :class:`Version` instance.
|
||||||
|
"""
|
||||||
|
if isinstance(version, string_types):
|
||||||
|
version = self.version_class(version)
|
||||||
|
for operator, constraint, prefix in self._parts:
|
||||||
|
f = self._operators.get(operator)
|
||||||
|
if isinstance(f, string_types):
|
||||||
|
f = getattr(self, f)
|
||||||
|
if not f:
|
||||||
|
msg = ('%r not implemented '
|
||||||
|
'for %s' % (operator, self.__class__.__name__))
|
||||||
|
raise NotImplementedError(msg)
|
||||||
|
if not f(version, constraint, prefix):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def exact_version(self):
|
||||||
|
result = None
|
||||||
|
if len(self._parts) == 1 and self._parts[0][0] in ('==', '==='):
|
||||||
|
result = self._parts[0][1]
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _check_compatible(self, other):
|
||||||
|
if type(self) != type(other) or self.name != other.name:
|
||||||
|
raise TypeError('cannot compare %s and %s' % (self, other))
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
self._check_compatible(other)
|
||||||
|
return self.key == other.key and self._parts == other._parts
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
return not self.__eq__(other)
|
||||||
|
|
||||||
|
# See http://docs.python.org/reference/datamodel#object.__hash__
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.key) + hash(self._parts)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "%s(%r)" % (self.__class__.__name__, self._string)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self._string
|
||||||
|
|
||||||
|
|
||||||
|
PEP440_VERSION_RE = re.compile(r'^v?(\d+!)?(\d+(\.\d+)*)((a|b|c|rc)(\d+))?'
|
||||||
|
r'(\.(post)(\d+))?(\.(dev)(\d+))?'
|
||||||
|
r'(\+([a-zA-Z\d]+(\.[a-zA-Z\d]+)?))?$')
|
||||||
|
|
||||||
|
|
||||||
|
def _pep_440_key(s):
|
||||||
|
s = s.strip()
|
||||||
|
m = PEP440_VERSION_RE.match(s)
|
||||||
|
if not m:
|
||||||
|
raise UnsupportedVersionError('Not a valid version: %s' % s)
|
||||||
|
groups = m.groups()
|
||||||
|
nums = tuple(int(v) for v in groups[1].split('.'))
|
||||||
|
while len(nums) > 1 and nums[-1] == 0:
|
||||||
|
nums = nums[:-1]
|
||||||
|
|
||||||
|
if not groups[0]:
|
||||||
|
epoch = 0
|
||||||
|
else:
|
||||||
|
epoch = int(groups[0])
|
||||||
|
pre = groups[4:6]
|
||||||
|
post = groups[7:9]
|
||||||
|
dev = groups[10:12]
|
||||||
|
local = groups[13]
|
||||||
|
if pre == (None, None):
|
||||||
|
pre = ()
|
||||||
|
else:
|
||||||
|
pre = pre[0], int(pre[1])
|
||||||
|
if post == (None, None):
|
||||||
|
post = ()
|
||||||
|
else:
|
||||||
|
post = post[0], int(post[1])
|
||||||
|
if dev == (None, None):
|
||||||
|
dev = ()
|
||||||
|
else:
|
||||||
|
dev = dev[0], int(dev[1])
|
||||||
|
if local is None:
|
||||||
|
local = ()
|
||||||
|
else:
|
||||||
|
parts = []
|
||||||
|
for part in local.split('.'):
|
||||||
|
# to ensure that numeric compares as > lexicographic, avoid
|
||||||
|
# comparing them directly, but encode a tuple which ensures
|
||||||
|
# correct sorting
|
||||||
|
if part.isdigit():
|
||||||
|
part = (1, int(part))
|
||||||
|
else:
|
||||||
|
part = (0, part)
|
||||||
|
parts.append(part)
|
||||||
|
local = tuple(parts)
|
||||||
|
if not pre:
|
||||||
|
# either before pre-release, or final release and after
|
||||||
|
if not post and dev:
|
||||||
|
# before pre-release
|
||||||
|
pre = ('a', -1) # to sort before a0
|
||||||
|
else:
|
||||||
|
pre = ('z',) # to sort after all pre-releases
|
||||||
|
# now look at the state of post and dev.
|
||||||
|
if not post:
|
||||||
|
post = ('_',) # sort before 'a'
|
||||||
|
if not dev:
|
||||||
|
dev = ('final',)
|
||||||
|
|
||||||
|
#print('%s -> %s' % (s, m.groups()))
|
||||||
|
return epoch, nums, pre, post, dev, local
|
||||||
|
|
||||||
|
|
||||||
|
_normalized_key = _pep_440_key
|
||||||
|
|
||||||
|
|
||||||
|
class NormalizedVersion(Version):
|
||||||
|
"""A rational version.
|
||||||
|
|
||||||
|
Good:
|
||||||
|
1.2 # equivalent to "1.2.0"
|
||||||
|
1.2.0
|
||||||
|
1.2a1
|
||||||
|
1.2.3a2
|
||||||
|
1.2.3b1
|
||||||
|
1.2.3c1
|
||||||
|
1.2.3.4
|
||||||
|
TODO: fill this out
|
||||||
|
|
||||||
|
Bad:
|
||||||
|
1 # mininum two numbers
|
||||||
|
1.2a # release level must have a release serial
|
||||||
|
1.2.3b
|
||||||
|
"""
|
||||||
|
def parse(self, s):
|
||||||
|
result = _normalized_key(s)
|
||||||
|
# _normalized_key loses trailing zeroes in the release
|
||||||
|
# clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0
|
||||||
|
# However, PEP 440 prefix matching needs it: for example,
|
||||||
|
# (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0).
|
||||||
|
m = PEP440_VERSION_RE.match(s) # must succeed
|
||||||
|
groups = m.groups()
|
||||||
|
self._release_clause = tuple(int(v) for v in groups[1].split('.'))
|
||||||
|
return result
|
||||||
|
|
||||||
|
PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev'])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_prerelease(self):
|
||||||
|
return any(t[0] in self.PREREL_TAGS for t in self._parts if t)
|
||||||
|
|
||||||
|
|
||||||
|
def _match_prefix(x, y):
|
||||||
|
x = str(x)
|
||||||
|
y = str(y)
|
||||||
|
if x == y:
|
||||||
|
return True
|
||||||
|
if not x.startswith(y):
|
||||||
|
return False
|
||||||
|
n = len(y)
|
||||||
|
return x[n] == '.'
|
||||||
|
|
||||||
|
|
||||||
|
class NormalizedMatcher(Matcher):
|
||||||
|
version_class = NormalizedVersion
|
||||||
|
|
||||||
|
# value is either a callable or the name of a method
|
||||||
|
_operators = {
|
||||||
|
'~=': '_match_compatible',
|
||||||
|
'<': '_match_lt',
|
||||||
|
'>': '_match_gt',
|
||||||
|
'<=': '_match_le',
|
||||||
|
'>=': '_match_ge',
|
||||||
|
'==': '_match_eq',
|
||||||
|
'===': '_match_arbitrary',
|
||||||
|
'!=': '_match_ne',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _adjust_local(self, version, constraint, prefix):
|
||||||
|
if prefix:
|
||||||
|
strip_local = '+' not in constraint and version._parts[-1]
|
||||||
|
else:
|
||||||
|
# both constraint and version are
|
||||||
|
# NormalizedVersion instances.
|
||||||
|
# If constraint does not have a local component,
|
||||||
|
# ensure the version doesn't, either.
|
||||||
|
strip_local = not constraint._parts[-1] and version._parts[-1]
|
||||||
|
if strip_local:
|
||||||
|
s = version._string.split('+', 1)[0]
|
||||||
|
version = self.version_class(s)
|
||||||
|
return version, constraint
|
||||||
|
|
||||||
|
def _match_lt(self, version, constraint, prefix):
|
||||||
|
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||||
|
if version >= constraint:
|
||||||
|
return False
|
||||||
|
release_clause = constraint._release_clause
|
||||||
|
pfx = '.'.join([str(i) for i in release_clause])
|
||||||
|
return not _match_prefix(version, pfx)
|
||||||
|
|
||||||
|
def _match_gt(self, version, constraint, prefix):
|
||||||
|
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||||
|
if version <= constraint:
|
||||||
|
return False
|
||||||
|
release_clause = constraint._release_clause
|
||||||
|
pfx = '.'.join([str(i) for i in release_clause])
|
||||||
|
return not _match_prefix(version, pfx)
|
||||||
|
|
||||||
|
def _match_le(self, version, constraint, prefix):
|
||||||
|
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||||
|
return version <= constraint
|
||||||
|
|
||||||
|
def _match_ge(self, version, constraint, prefix):
|
||||||
|
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||||
|
return version >= constraint
|
||||||
|
|
||||||
|
def _match_eq(self, version, constraint, prefix):
|
||||||
|
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||||
|
if not prefix:
|
||||||
|
result = (version == constraint)
|
||||||
|
else:
|
||||||
|
result = _match_prefix(version, constraint)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _match_arbitrary(self, version, constraint, prefix):
|
||||||
|
return str(version) == str(constraint)
|
||||||
|
|
||||||
|
def _match_ne(self, version, constraint, prefix):
|
||||||
|
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||||
|
if not prefix:
|
||||||
|
result = (version != constraint)
|
||||||
|
else:
|
||||||
|
result = not _match_prefix(version, constraint)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _match_compatible(self, version, constraint, prefix):
|
||||||
|
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||||
|
if version == constraint:
|
||||||
|
return True
|
||||||
|
if version < constraint:
|
||||||
|
return False
|
||||||
|
# if not prefix:
|
||||||
|
# return True
|
||||||
|
release_clause = constraint._release_clause
|
||||||
|
if len(release_clause) > 1:
|
||||||
|
release_clause = release_clause[:-1]
|
||||||
|
pfx = '.'.join([str(i) for i in release_clause])
|
||||||
|
return _match_prefix(version, pfx)
|
||||||
|
|
||||||
|
_REPLACEMENTS = (
|
||||||
|
(re.compile('[.+-]$'), ''), # remove trailing puncts
|
||||||
|
(re.compile(r'^[.](\d)'), r'0.\1'), # .N -> 0.N at start
|
||||||
|
(re.compile('^[.-]'), ''), # remove leading puncts
|
||||||
|
(re.compile(r'^\((.*)\)$'), r'\1'), # remove parentheses
|
||||||
|
(re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
|
||||||
|
(re.compile(r'^r(ev)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
|
||||||
|
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
|
||||||
|
(re.compile(r'\b(alfa|apha)\b'), 'alpha'), # misspelt alpha
|
||||||
|
(re.compile(r'\b(pre-alpha|prealpha)\b'),
|
||||||
|
'pre.alpha'), # standardise
|
||||||
|
(re.compile(r'\(beta\)$'), 'beta'), # remove parentheses
|
||||||
|
)
|
||||||
|
|
||||||
|
_SUFFIX_REPLACEMENTS = (
|
||||||
|
(re.compile('^[:~._+-]+'), ''), # remove leading puncts
|
||||||
|
(re.compile('[,*")([\]]'), ''), # remove unwanted chars
|
||||||
|
(re.compile('[~:+_ -]'), '.'), # replace illegal chars
|
||||||
|
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
|
||||||
|
(re.compile(r'\.$'), ''), # trailing '.'
|
||||||
|
)
|
||||||
|
|
||||||
|
_NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)')
|
||||||
|
|
||||||
|
|
||||||
|
def _suggest_semantic_version(s):
|
||||||
|
"""
|
||||||
|
Try to suggest a semantic form for a version for which
|
||||||
|
_suggest_normalized_version couldn't come up with anything.
|
||||||
|
"""
|
||||||
|
result = s.strip().lower()
|
||||||
|
for pat, repl in _REPLACEMENTS:
|
||||||
|
result = pat.sub(repl, result)
|
||||||
|
if not result:
|
||||||
|
result = '0.0.0'
|
||||||
|
|
||||||
|
# Now look for numeric prefix, and separate it out from
|
||||||
|
# the rest.
|
||||||
|
#import pdb; pdb.set_trace()
|
||||||
|
m = _NUMERIC_PREFIX.match(result)
|
||||||
|
if not m:
|
||||||
|
prefix = '0.0.0'
|
||||||
|
suffix = result
|
||||||
|
else:
|
||||||
|
prefix = m.groups()[0].split('.')
|
||||||
|
prefix = [int(i) for i in prefix]
|
||||||
|
while len(prefix) < 3:
|
||||||
|
prefix.append(0)
|
||||||
|
if len(prefix) == 3:
|
||||||
|
suffix = result[m.end():]
|
||||||
|
else:
|
||||||
|
suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():]
|
||||||
|
prefix = prefix[:3]
|
||||||
|
prefix = '.'.join([str(i) for i in prefix])
|
||||||
|
suffix = suffix.strip()
|
||||||
|
if suffix:
|
||||||
|
#import pdb; pdb.set_trace()
|
||||||
|
# massage the suffix.
|
||||||
|
for pat, repl in _SUFFIX_REPLACEMENTS:
|
||||||
|
suffix = pat.sub(repl, suffix)
|
||||||
|
|
||||||
|
if not suffix:
|
||||||
|
result = prefix
|
||||||
|
else:
|
||||||
|
sep = '-' if 'dev' in suffix else '+'
|
||||||
|
result = prefix + sep + suffix
|
||||||
|
if not is_semver(result):
|
||||||
|
result = None
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _suggest_normalized_version(s):
|
||||||
|
"""Suggest a normalized version close to the given version string.
|
||||||
|
|
||||||
|
If you have a version string that isn't rational (i.e. NormalizedVersion
|
||||||
|
doesn't like it) then you might be able to get an equivalent (or close)
|
||||||
|
rational version from this function.
|
||||||
|
|
||||||
|
This does a number of simple normalizations to the given string, based
|
||||||
|
on observation of versions currently in use on PyPI. Given a dump of
|
||||||
|
those version during PyCon 2009, 4287 of them:
|
||||||
|
- 2312 (53.93%) match NormalizedVersion without change
|
||||||
|
with the automatic suggestion
|
||||||
|
- 3474 (81.04%) match when using this suggestion method
|
||||||
|
|
||||||
|
@param s {str} An irrational version string.
|
||||||
|
@returns A rational version string, or None, if couldn't determine one.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
_normalized_key(s)
|
||||||
|
return s # already rational
|
||||||
|
except UnsupportedVersionError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
rs = s.lower()
|
||||||
|
|
||||||
|
# part of this could use maketrans
|
||||||
|
for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'),
|
||||||
|
('beta', 'b'), ('rc', 'c'), ('-final', ''),
|
||||||
|
('-pre', 'c'),
|
||||||
|
('-release', ''), ('.release', ''), ('-stable', ''),
|
||||||
|
('+', '.'), ('_', '.'), (' ', ''), ('.final', ''),
|
||||||
|
('final', '')):
|
||||||
|
rs = rs.replace(orig, repl)
|
||||||
|
|
||||||
|
# if something ends with dev or pre, we add a 0
|
||||||
|
rs = re.sub(r"pre$", r"pre0", rs)
|
||||||
|
rs = re.sub(r"dev$", r"dev0", rs)
|
||||||
|
|
||||||
|
# if we have something like "b-2" or "a.2" at the end of the
|
||||||
|
# version, that is pobably beta, alpha, etc
|
||||||
|
# let's remove the dash or dot
|
||||||
|
rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs)
|
||||||
|
|
||||||
|
# 1.0-dev-r371 -> 1.0.dev371
|
||||||
|
# 0.1-dev-r79 -> 0.1.dev79
|
||||||
|
rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs)
|
||||||
|
|
||||||
|
# Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1
|
||||||
|
rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs)
|
||||||
|
|
||||||
|
# Clean: v0.3, v1.0
|
||||||
|
if rs.startswith('v'):
|
||||||
|
rs = rs[1:]
|
||||||
|
|
||||||
|
# Clean leading '0's on numbers.
|
||||||
|
#TODO: unintended side-effect on, e.g., "2003.05.09"
|
||||||
|
# PyPI stats: 77 (~2%) better
|
||||||
|
rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs)
|
||||||
|
|
||||||
|
# Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers
|
||||||
|
# zero.
|
||||||
|
# PyPI stats: 245 (7.56%) better
|
||||||
|
rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs)
|
||||||
|
|
||||||
|
# the 'dev-rNNN' tag is a dev tag
|
||||||
|
rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs)
|
||||||
|
|
||||||
|
# clean the - when used as a pre delimiter
|
||||||
|
rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs)
|
||||||
|
|
||||||
|
# a terminal "dev" or "devel" can be changed into ".dev0"
|
||||||
|
rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs)
|
||||||
|
|
||||||
|
# a terminal "dev" can be changed into ".dev0"
|
||||||
|
rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs)
|
||||||
|
|
||||||
|
# a terminal "final" or "stable" can be removed
|
||||||
|
rs = re.sub(r"(final|stable)$", "", rs)
|
||||||
|
|
||||||
|
# The 'r' and the '-' tags are post release tags
|
||||||
|
# 0.4a1.r10 -> 0.4a1.post10
|
||||||
|
# 0.9.33-17222 -> 0.9.33.post17222
|
||||||
|
# 0.9.33-r17222 -> 0.9.33.post17222
|
||||||
|
rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs)
|
||||||
|
|
||||||
|
# Clean 'r' instead of 'dev' usage:
|
||||||
|
# 0.9.33+r17222 -> 0.9.33.dev17222
|
||||||
|
# 1.0dev123 -> 1.0.dev123
|
||||||
|
# 1.0.git123 -> 1.0.dev123
|
||||||
|
# 1.0.bzr123 -> 1.0.dev123
|
||||||
|
# 0.1a0dev.123 -> 0.1a0.dev123
|
||||||
|
# PyPI stats: ~150 (~4%) better
|
||||||
|
rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs)
|
||||||
|
|
||||||
|
# Clean '.pre' (normalized from '-pre' above) instead of 'c' usage:
|
||||||
|
# 0.2.pre1 -> 0.2c1
|
||||||
|
# 0.2-c1 -> 0.2c1
|
||||||
|
# 1.0preview123 -> 1.0c123
|
||||||
|
# PyPI stats: ~21 (0.62%) better
|
||||||
|
rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs)
|
||||||
|
|
||||||
|
# Tcl/Tk uses "px" for their post release markers
|
||||||
|
rs = re.sub(r"p(\d+)$", r".post\1", rs)
|
||||||
|
|
||||||
|
try:
|
||||||
|
_normalized_key(rs)
|
||||||
|
except UnsupportedVersionError:
|
||||||
|
rs = None
|
||||||
|
return rs
|
||||||
|
|
||||||
|
#
|
||||||
|
# Legacy version processing (distribute-compatible)
|
||||||
|
#
|
||||||
|
|
||||||
|
_VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I)
|
||||||
|
_VERSION_REPLACE = {
|
||||||
|
'pre': 'c',
|
||||||
|
'preview': 'c',
|
||||||
|
'-': 'final-',
|
||||||
|
'rc': 'c',
|
||||||
|
'dev': '@',
|
||||||
|
'': None,
|
||||||
|
'.': None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _legacy_key(s):
|
||||||
|
def get_parts(s):
|
||||||
|
result = []
|
||||||
|
for p in _VERSION_PART.split(s.lower()):
|
||||||
|
p = _VERSION_REPLACE.get(p, p)
|
||||||
|
if p:
|
||||||
|
if '0' <= p[:1] <= '9':
|
||||||
|
p = p.zfill(8)
|
||||||
|
else:
|
||||||
|
p = '*' + p
|
||||||
|
result.append(p)
|
||||||
|
result.append('*final')
|
||||||
|
return result
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for p in get_parts(s):
|
||||||
|
if p.startswith('*'):
|
||||||
|
if p < '*final':
|
||||||
|
while result and result[-1] == '*final-':
|
||||||
|
result.pop()
|
||||||
|
while result and result[-1] == '00000000':
|
||||||
|
result.pop()
|
||||||
|
result.append(p)
|
||||||
|
return tuple(result)
|
||||||
|
|
||||||
|
|
||||||
|
class LegacyVersion(Version):
|
||||||
|
def parse(self, s):
|
||||||
|
return _legacy_key(s)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_prerelease(self):
|
||||||
|
result = False
|
||||||
|
for x in self._parts:
|
||||||
|
if (isinstance(x, string_types) and x.startswith('*') and
|
||||||
|
x < '*final'):
|
||||||
|
result = True
|
||||||
|
break
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class LegacyMatcher(Matcher):
|
||||||
|
version_class = LegacyVersion
|
||||||
|
|
||||||
|
_operators = dict(Matcher._operators)
|
||||||
|
_operators['~='] = '_match_compatible'
|
||||||
|
|
||||||
|
numeric_re = re.compile('^(\d+(\.\d+)*)')
|
||||||
|
|
||||||
|
def _match_compatible(self, version, constraint, prefix):
|
||||||
|
if version < constraint:
|
||||||
|
return False
|
||||||
|
m = self.numeric_re.match(str(constraint))
|
||||||
|
if not m:
|
||||||
|
logger.warning('Cannot compute compatible match for version %s '
|
||||||
|
' and constraint %s', version, constraint)
|
||||||
|
return True
|
||||||
|
s = m.groups()[0]
|
||||||
|
if '.' in s:
|
||||||
|
s = s.rsplit('.', 1)[0]
|
||||||
|
return _match_prefix(version, s)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Semantic versioning
|
||||||
|
#
|
||||||
|
|
||||||
|
_SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)'
|
||||||
|
r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?'
|
||||||
|
r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I)
|
||||||
|
|
||||||
|
|
||||||
|
def is_semver(s):
|
||||||
|
return _SEMVER_RE.match(s)
|
||||||
|
|
||||||
|
|
||||||
|
def _semantic_key(s):
|
||||||
|
def make_tuple(s, absent):
|
||||||
|
if s is None:
|
||||||
|
result = (absent,)
|
||||||
|
else:
|
||||||
|
parts = s[1:].split('.')
|
||||||
|
# We can't compare ints and strings on Python 3, so fudge it
|
||||||
|
# by zero-filling numeric values so simulate a numeric comparison
|
||||||
|
result = tuple([p.zfill(8) if p.isdigit() else p for p in parts])
|
||||||
|
return result
|
||||||
|
|
||||||
|
m = is_semver(s)
|
||||||
|
if not m:
|
||||||
|
raise UnsupportedVersionError(s)
|
||||||
|
groups = m.groups()
|
||||||
|
major, minor, patch = [int(i) for i in groups[:3]]
|
||||||
|
# choose the '|' and '*' so that versions sort correctly
|
||||||
|
pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*')
|
||||||
|
return (major, minor, patch), pre, build
|
||||||
|
|
||||||
|
|
||||||
|
class SemanticVersion(Version):
|
||||||
|
def parse(self, s):
|
||||||
|
return _semantic_key(s)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_prerelease(self):
|
||||||
|
return self._parts[1][0] != '|'
|
||||||
|
|
||||||
|
|
||||||
|
class SemanticMatcher(Matcher):
|
||||||
|
version_class = SemanticVersion
|
||||||
|
|
||||||
|
|
||||||
|
class VersionScheme(object):
|
||||||
|
def __init__(self, key, matcher, suggester=None):
|
||||||
|
self.key = key
|
||||||
|
self.matcher = matcher
|
||||||
|
self.suggester = suggester
|
||||||
|
|
||||||
|
def is_valid_version(self, s):
|
||||||
|
try:
|
||||||
|
self.matcher.version_class(s)
|
||||||
|
result = True
|
||||||
|
except UnsupportedVersionError:
|
||||||
|
result = False
|
||||||
|
return result
|
||||||
|
|
||||||
|
def is_valid_matcher(self, s):
|
||||||
|
try:
|
||||||
|
self.matcher(s)
|
||||||
|
result = True
|
||||||
|
except UnsupportedVersionError:
|
||||||
|
result = False
|
||||||
|
return result
|
||||||
|
|
||||||
|
def is_valid_constraint_list(self, s):
|
||||||
|
"""
|
||||||
|
Used for processing some metadata fields
|
||||||
|
"""
|
||||||
|
return self.is_valid_matcher('dummy_name (%s)' % s)
|
||||||
|
|
||||||
|
def suggest(self, s):
|
||||||
|
if self.suggester is None:
|
||||||
|
result = None
|
||||||
|
else:
|
||||||
|
result = self.suggester(s)
|
||||||
|
return result
|
||||||
|
|
||||||
|
_SCHEMES = {
|
||||||
|
'normalized': VersionScheme(_normalized_key, NormalizedMatcher,
|
||||||
|
_suggest_normalized_version),
|
||||||
|
'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s),
|
||||||
|
'semantic': VersionScheme(_semantic_key, SemanticMatcher,
|
||||||
|
_suggest_semantic_version),
|
||||||
|
}
|
||||||
|
|
||||||
|
_SCHEMES['default'] = _SCHEMES['normalized']
|
||||||
|
|
||||||
|
|
||||||
|
def get_scheme(name):
|
||||||
|
if name not in _SCHEMES:
|
||||||
|
raise ValueError('unknown scheme name: %r' % name)
|
||||||
|
return _SCHEMES[name]
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,976 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2014 Vinay Sajip.
|
||||||
|
# Licensed to the Python Software Foundation under a contributor agreement.
|
||||||
|
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import codecs
|
||||||
|
import datetime
|
||||||
|
import distutils.util
|
||||||
|
from email import message_from_file
|
||||||
|
import hashlib
|
||||||
|
import imp
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import posixpath
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
from . import __version__, DistlibException
|
||||||
|
from .compat import sysconfig, ZipFile, fsdecode, text_type, filter
|
||||||
|
from .database import InstalledDistribution
|
||||||
|
from .metadata import Metadata, METADATA_FILENAME
|
||||||
|
from .util import (FileOperator, convert_path, CSVReader, CSVWriter, Cache,
|
||||||
|
cached_property, get_cache_base, read_exports, tempdir)
|
||||||
|
from .version import NormalizedVersion, UnsupportedVersionError
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
cache = None # created when needed
|
||||||
|
|
||||||
|
if hasattr(sys, 'pypy_version_info'):
|
||||||
|
IMP_PREFIX = 'pp'
|
||||||
|
elif sys.platform.startswith('java'):
|
||||||
|
IMP_PREFIX = 'jy'
|
||||||
|
elif sys.platform == 'cli':
|
||||||
|
IMP_PREFIX = 'ip'
|
||||||
|
else:
|
||||||
|
IMP_PREFIX = 'cp'
|
||||||
|
|
||||||
|
VER_SUFFIX = sysconfig.get_config_var('py_version_nodot')
|
||||||
|
if not VER_SUFFIX: # pragma: no cover
|
||||||
|
VER_SUFFIX = '%s%s' % sys.version_info[:2]
|
||||||
|
PYVER = 'py' + VER_SUFFIX
|
||||||
|
IMPVER = IMP_PREFIX + VER_SUFFIX
|
||||||
|
|
||||||
|
ARCH = distutils.util.get_platform().replace('-', '_').replace('.', '_')
|
||||||
|
|
||||||
|
ABI = sysconfig.get_config_var('SOABI')
|
||||||
|
if ABI and ABI.startswith('cpython-'):
|
||||||
|
ABI = ABI.replace('cpython-', 'cp')
|
||||||
|
else:
|
||||||
|
def _derive_abi():
|
||||||
|
parts = ['cp', VER_SUFFIX]
|
||||||
|
if sysconfig.get_config_var('Py_DEBUG'):
|
||||||
|
parts.append('d')
|
||||||
|
if sysconfig.get_config_var('WITH_PYMALLOC'):
|
||||||
|
parts.append('m')
|
||||||
|
if sysconfig.get_config_var('Py_UNICODE_SIZE') == 4:
|
||||||
|
parts.append('u')
|
||||||
|
return ''.join(parts)
|
||||||
|
ABI = _derive_abi()
|
||||||
|
del _derive_abi
|
||||||
|
|
||||||
|
FILENAME_RE = re.compile(r'''
|
||||||
|
(?P<nm>[^-]+)
|
||||||
|
-(?P<vn>\d+[^-]*)
|
||||||
|
(-(?P<bn>\d+[^-]*))?
|
||||||
|
-(?P<py>\w+\d+(\.\w+\d+)*)
|
||||||
|
-(?P<bi>\w+)
|
||||||
|
-(?P<ar>\w+)
|
||||||
|
\.whl$
|
||||||
|
''', re.IGNORECASE | re.VERBOSE)
|
||||||
|
|
||||||
|
NAME_VERSION_RE = re.compile(r'''
|
||||||
|
(?P<nm>[^-]+)
|
||||||
|
-(?P<vn>\d+[^-]*)
|
||||||
|
(-(?P<bn>\d+[^-]*))?$
|
||||||
|
''', re.IGNORECASE | re.VERBOSE)
|
||||||
|
|
||||||
|
SHEBANG_RE = re.compile(br'\s*#![^\r\n]*')
|
||||||
|
SHEBANG_DETAIL_RE = re.compile(br'^(\s*#!("[^"]+"|\S+))\s+(.*)$')
|
||||||
|
SHEBANG_PYTHON = b'#!python'
|
||||||
|
SHEBANG_PYTHONW = b'#!pythonw'
|
||||||
|
|
||||||
|
if os.sep == '/':
|
||||||
|
to_posix = lambda o: o
|
||||||
|
else:
|
||||||
|
to_posix = lambda o: o.replace(os.sep, '/')
|
||||||
|
|
||||||
|
|
||||||
|
class Mounter(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.impure_wheels = {}
|
||||||
|
self.libs = {}
|
||||||
|
|
||||||
|
def add(self, pathname, extensions):
|
||||||
|
self.impure_wheels[pathname] = extensions
|
||||||
|
self.libs.update(extensions)
|
||||||
|
|
||||||
|
def remove(self, pathname):
|
||||||
|
extensions = self.impure_wheels.pop(pathname)
|
||||||
|
for k, v in extensions:
|
||||||
|
if k in self.libs:
|
||||||
|
del self.libs[k]
|
||||||
|
|
||||||
|
def find_module(self, fullname, path=None):
|
||||||
|
if fullname in self.libs:
|
||||||
|
result = self
|
||||||
|
else:
|
||||||
|
result = None
|
||||||
|
return result
|
||||||
|
|
||||||
|
def load_module(self, fullname):
|
||||||
|
if fullname in sys.modules:
|
||||||
|
result = sys.modules[fullname]
|
||||||
|
else:
|
||||||
|
if fullname not in self.libs:
|
||||||
|
raise ImportError('unable to find extension for %s' % fullname)
|
||||||
|
result = imp.load_dynamic(fullname, self.libs[fullname])
|
||||||
|
result.__loader__ = self
|
||||||
|
parts = fullname.rsplit('.', 1)
|
||||||
|
if len(parts) > 1:
|
||||||
|
result.__package__ = parts[0]
|
||||||
|
return result
|
||||||
|
|
||||||
|
_hook = Mounter()
|
||||||
|
|
||||||
|
|
||||||
|
class Wheel(object):
|
||||||
|
"""
|
||||||
|
Class to build and install from Wheel files (PEP 427).
|
||||||
|
"""
|
||||||
|
|
||||||
|
wheel_version = (1, 1)
|
||||||
|
hash_kind = 'sha256'
|
||||||
|
|
||||||
|
def __init__(self, filename=None, sign=False, verify=False):
|
||||||
|
"""
|
||||||
|
Initialise an instance using a (valid) filename.
|
||||||
|
"""
|
||||||
|
self.sign = sign
|
||||||
|
self.should_verify = verify
|
||||||
|
self.buildver = ''
|
||||||
|
self.pyver = [PYVER]
|
||||||
|
self.abi = ['none']
|
||||||
|
self.arch = ['any']
|
||||||
|
self.dirname = os.getcwd()
|
||||||
|
if filename is None:
|
||||||
|
self.name = 'dummy'
|
||||||
|
self.version = '0.1'
|
||||||
|
self._filename = self.filename
|
||||||
|
else:
|
||||||
|
m = NAME_VERSION_RE.match(filename)
|
||||||
|
if m:
|
||||||
|
info = m.groupdict('')
|
||||||
|
self.name = info['nm']
|
||||||
|
# Reinstate the local version separator
|
||||||
|
self.version = info['vn'].replace('_', '-')
|
||||||
|
self.buildver = info['bn']
|
||||||
|
self._filename = self.filename
|
||||||
|
else:
|
||||||
|
dirname, filename = os.path.split(filename)
|
||||||
|
m = FILENAME_RE.match(filename)
|
||||||
|
if not m:
|
||||||
|
raise DistlibException('Invalid name or '
|
||||||
|
'filename: %r' % filename)
|
||||||
|
if dirname:
|
||||||
|
self.dirname = os.path.abspath(dirname)
|
||||||
|
self._filename = filename
|
||||||
|
info = m.groupdict('')
|
||||||
|
self.name = info['nm']
|
||||||
|
self.version = info['vn']
|
||||||
|
self.buildver = info['bn']
|
||||||
|
self.pyver = info['py'].split('.')
|
||||||
|
self.abi = info['bi'].split('.')
|
||||||
|
self.arch = info['ar'].split('.')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filename(self):
|
||||||
|
"""
|
||||||
|
Build and return a filename from the various components.
|
||||||
|
"""
|
||||||
|
if self.buildver:
|
||||||
|
buildver = '-' + self.buildver
|
||||||
|
else:
|
||||||
|
buildver = ''
|
||||||
|
pyver = '.'.join(self.pyver)
|
||||||
|
abi = '.'.join(self.abi)
|
||||||
|
arch = '.'.join(self.arch)
|
||||||
|
# replace - with _ as a local version separator
|
||||||
|
version = self.version.replace('-', '_')
|
||||||
|
return '%s-%s%s-%s-%s-%s.whl' % (self.name, version, buildver,
|
||||||
|
pyver, abi, arch)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def exists(self):
|
||||||
|
path = os.path.join(self.dirname, self.filename)
|
||||||
|
return os.path.isfile(path)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tags(self):
|
||||||
|
for pyver in self.pyver:
|
||||||
|
for abi in self.abi:
|
||||||
|
for arch in self.arch:
|
||||||
|
yield pyver, abi, arch
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def metadata(self):
|
||||||
|
pathname = os.path.join(self.dirname, self.filename)
|
||||||
|
name_ver = '%s-%s' % (self.name, self.version)
|
||||||
|
info_dir = '%s.dist-info' % name_ver
|
||||||
|
wrapper = codecs.getreader('utf-8')
|
||||||
|
with ZipFile(pathname, 'r') as zf:
|
||||||
|
wheel_metadata = self.get_wheel_metadata(zf)
|
||||||
|
wv = wheel_metadata['Wheel-Version'].split('.', 1)
|
||||||
|
file_version = tuple([int(i) for i in wv])
|
||||||
|
if file_version < (1, 1):
|
||||||
|
fn = 'METADATA'
|
||||||
|
else:
|
||||||
|
fn = METADATA_FILENAME
|
||||||
|
try:
|
||||||
|
metadata_filename = posixpath.join(info_dir, fn)
|
||||||
|
with zf.open(metadata_filename) as bf:
|
||||||
|
wf = wrapper(bf)
|
||||||
|
result = Metadata(fileobj=wf)
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError('Invalid wheel, because %s is '
|
||||||
|
'missing' % fn)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_wheel_metadata(self, zf):
|
||||||
|
name_ver = '%s-%s' % (self.name, self.version)
|
||||||
|
info_dir = '%s.dist-info' % name_ver
|
||||||
|
metadata_filename = posixpath.join(info_dir, 'WHEEL')
|
||||||
|
with zf.open(metadata_filename) as bf:
|
||||||
|
wf = codecs.getreader('utf-8')(bf)
|
||||||
|
message = message_from_file(wf)
|
||||||
|
return dict(message)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def info(self):
|
||||||
|
pathname = os.path.join(self.dirname, self.filename)
|
||||||
|
with ZipFile(pathname, 'r') as zf:
|
||||||
|
result = self.get_wheel_metadata(zf)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def process_shebang(self, data):
|
||||||
|
m = SHEBANG_RE.match(data)
|
||||||
|
if m:
|
||||||
|
end = m.end()
|
||||||
|
shebang, data_after_shebang = data[:end], data[end:]
|
||||||
|
# Preserve any arguments after the interpreter
|
||||||
|
if b'pythonw' in shebang.lower():
|
||||||
|
shebang_python = SHEBANG_PYTHONW
|
||||||
|
else:
|
||||||
|
shebang_python = SHEBANG_PYTHON
|
||||||
|
m = SHEBANG_DETAIL_RE.match(shebang)
|
||||||
|
if m:
|
||||||
|
args = b' ' + m.groups()[-1]
|
||||||
|
else:
|
||||||
|
args = b''
|
||||||
|
shebang = shebang_python + args
|
||||||
|
data = shebang + data_after_shebang
|
||||||
|
else:
|
||||||
|
cr = data.find(b'\r')
|
||||||
|
lf = data.find(b'\n')
|
||||||
|
if cr < 0 or cr > lf:
|
||||||
|
term = b'\n'
|
||||||
|
else:
|
||||||
|
if data[cr:cr + 2] == b'\r\n':
|
||||||
|
term = b'\r\n'
|
||||||
|
else:
|
||||||
|
term = b'\r'
|
||||||
|
data = SHEBANG_PYTHON + term + data
|
||||||
|
return data
|
||||||
|
|
||||||
|
def get_hash(self, data, hash_kind=None):
|
||||||
|
if hash_kind is None:
|
||||||
|
hash_kind = self.hash_kind
|
||||||
|
try:
|
||||||
|
hasher = getattr(hashlib, hash_kind)
|
||||||
|
except AttributeError:
|
||||||
|
raise DistlibException('Unsupported hash algorithm: %r' % hash_kind)
|
||||||
|
result = hasher(data).digest()
|
||||||
|
result = base64.urlsafe_b64encode(result).rstrip(b'=').decode('ascii')
|
||||||
|
return hash_kind, result
|
||||||
|
|
||||||
|
def write_record(self, records, record_path, base):
|
||||||
|
with CSVWriter(record_path) as writer:
|
||||||
|
for row in records:
|
||||||
|
writer.writerow(row)
|
||||||
|
p = to_posix(os.path.relpath(record_path, base))
|
||||||
|
writer.writerow((p, '', ''))
|
||||||
|
|
||||||
|
def write_records(self, info, libdir, archive_paths):
|
||||||
|
records = []
|
||||||
|
distinfo, info_dir = info
|
||||||
|
hasher = getattr(hashlib, self.hash_kind)
|
||||||
|
for ap, p in archive_paths:
|
||||||
|
with open(p, 'rb') as f:
|
||||||
|
data = f.read()
|
||||||
|
digest = '%s=%s' % self.get_hash(data)
|
||||||
|
size = os.path.getsize(p)
|
||||||
|
records.append((ap, digest, size))
|
||||||
|
|
||||||
|
p = os.path.join(distinfo, 'RECORD')
|
||||||
|
self.write_record(records, p, libdir)
|
||||||
|
ap = to_posix(os.path.join(info_dir, 'RECORD'))
|
||||||
|
archive_paths.append((ap, p))
|
||||||
|
|
||||||
|
def build_zip(self, pathname, archive_paths):
|
||||||
|
with ZipFile(pathname, 'w', zipfile.ZIP_DEFLATED) as zf:
|
||||||
|
for ap, p in archive_paths:
|
||||||
|
logger.debug('Wrote %s to %s in wheel', p, ap)
|
||||||
|
zf.write(p, ap)
|
||||||
|
|
||||||
|
def build(self, paths, tags=None, wheel_version=None):
|
||||||
|
"""
|
||||||
|
Build a wheel from files in specified paths, and use any specified tags
|
||||||
|
when determining the name of the wheel.
|
||||||
|
"""
|
||||||
|
if tags is None:
|
||||||
|
tags = {}
|
||||||
|
|
||||||
|
libkey = list(filter(lambda o: o in paths, ('purelib', 'platlib')))[0]
|
||||||
|
if libkey == 'platlib':
|
||||||
|
is_pure = 'false'
|
||||||
|
default_pyver = [IMPVER]
|
||||||
|
default_abi = [ABI]
|
||||||
|
default_arch = [ARCH]
|
||||||
|
else:
|
||||||
|
is_pure = 'true'
|
||||||
|
default_pyver = [PYVER]
|
||||||
|
default_abi = ['none']
|
||||||
|
default_arch = ['any']
|
||||||
|
|
||||||
|
self.pyver = tags.get('pyver', default_pyver)
|
||||||
|
self.abi = tags.get('abi', default_abi)
|
||||||
|
self.arch = tags.get('arch', default_arch)
|
||||||
|
|
||||||
|
libdir = paths[libkey]
|
||||||
|
|
||||||
|
name_ver = '%s-%s' % (self.name, self.version)
|
||||||
|
data_dir = '%s.data' % name_ver
|
||||||
|
info_dir = '%s.dist-info' % name_ver
|
||||||
|
|
||||||
|
archive_paths = []
|
||||||
|
|
||||||
|
# First, stuff which is not in site-packages
|
||||||
|
for key in ('data', 'headers', 'scripts'):
|
||||||
|
if key not in paths:
|
||||||
|
continue
|
||||||
|
path = paths[key]
|
||||||
|
if os.path.isdir(path):
|
||||||
|
for root, dirs, files in os.walk(path):
|
||||||
|
for fn in files:
|
||||||
|
p = fsdecode(os.path.join(root, fn))
|
||||||
|
rp = os.path.relpath(p, path)
|
||||||
|
ap = to_posix(os.path.join(data_dir, key, rp))
|
||||||
|
archive_paths.append((ap, p))
|
||||||
|
if key == 'scripts' and not p.endswith('.exe'):
|
||||||
|
with open(p, 'rb') as f:
|
||||||
|
data = f.read()
|
||||||
|
data = self.process_shebang(data)
|
||||||
|
with open(p, 'wb') as f:
|
||||||
|
f.write(data)
|
||||||
|
|
||||||
|
# Now, stuff which is in site-packages, other than the
|
||||||
|
# distinfo stuff.
|
||||||
|
path = libdir
|
||||||
|
distinfo = None
|
||||||
|
for root, dirs, files in os.walk(path):
|
||||||
|
if root == path:
|
||||||
|
# At the top level only, save distinfo for later
|
||||||
|
# and skip it for now
|
||||||
|
for i, dn in enumerate(dirs):
|
||||||
|
dn = fsdecode(dn)
|
||||||
|
if dn.endswith('.dist-info'):
|
||||||
|
distinfo = os.path.join(root, dn)
|
||||||
|
del dirs[i]
|
||||||
|
break
|
||||||
|
assert distinfo, '.dist-info directory expected, not found'
|
||||||
|
|
||||||
|
for fn in files:
|
||||||
|
# comment out next suite to leave .pyc files in
|
||||||
|
if fsdecode(fn).endswith(('.pyc', '.pyo')):
|
||||||
|
continue
|
||||||
|
p = os.path.join(root, fn)
|
||||||
|
rp = to_posix(os.path.relpath(p, path))
|
||||||
|
archive_paths.append((rp, p))
|
||||||
|
|
||||||
|
# Now distinfo. Assumed to be flat, i.e. os.listdir is enough.
|
||||||
|
files = os.listdir(distinfo)
|
||||||
|
for fn in files:
|
||||||
|
if fn not in ('RECORD', 'INSTALLER', 'SHARED', 'WHEEL'):
|
||||||
|
p = fsdecode(os.path.join(distinfo, fn))
|
||||||
|
ap = to_posix(os.path.join(info_dir, fn))
|
||||||
|
archive_paths.append((ap, p))
|
||||||
|
|
||||||
|
wheel_metadata = [
|
||||||
|
'Wheel-Version: %d.%d' % (wheel_version or self.wheel_version),
|
||||||
|
'Generator: distlib %s' % __version__,
|
||||||
|
'Root-Is-Purelib: %s' % is_pure,
|
||||||
|
]
|
||||||
|
for pyver, abi, arch in self.tags:
|
||||||
|
wheel_metadata.append('Tag: %s-%s-%s' % (pyver, abi, arch))
|
||||||
|
p = os.path.join(distinfo, 'WHEEL')
|
||||||
|
with open(p, 'w') as f:
|
||||||
|
f.write('\n'.join(wheel_metadata))
|
||||||
|
ap = to_posix(os.path.join(info_dir, 'WHEEL'))
|
||||||
|
archive_paths.append((ap, p))
|
||||||
|
|
||||||
|
# Now, at last, RECORD.
|
||||||
|
# Paths in here are archive paths - nothing else makes sense.
|
||||||
|
self.write_records((distinfo, info_dir), libdir, archive_paths)
|
||||||
|
# Now, ready to build the zip file
|
||||||
|
pathname = os.path.join(self.dirname, self.filename)
|
||||||
|
self.build_zip(pathname, archive_paths)
|
||||||
|
return pathname
|
||||||
|
|
||||||
|
def install(self, paths, maker, **kwargs):
|
||||||
|
"""
|
||||||
|
Install a wheel to the specified paths. If kwarg ``warner`` is
|
||||||
|
specified, it should be a callable, which will be called with two
|
||||||
|
tuples indicating the wheel version of this software and the wheel
|
||||||
|
version in the file, if there is a discrepancy in the versions.
|
||||||
|
This can be used to issue any warnings to raise any exceptions.
|
||||||
|
If kwarg ``lib_only`` is True, only the purelib/platlib files are
|
||||||
|
installed, and the headers, scripts, data and dist-info metadata are
|
||||||
|
not written.
|
||||||
|
|
||||||
|
The return value is a :class:`InstalledDistribution` instance unless
|
||||||
|
``options.lib_only`` is True, in which case the return value is ``None``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
dry_run = maker.dry_run
|
||||||
|
warner = kwargs.get('warner')
|
||||||
|
lib_only = kwargs.get('lib_only', False)
|
||||||
|
|
||||||
|
pathname = os.path.join(self.dirname, self.filename)
|
||||||
|
name_ver = '%s-%s' % (self.name, self.version)
|
||||||
|
data_dir = '%s.data' % name_ver
|
||||||
|
info_dir = '%s.dist-info' % name_ver
|
||||||
|
|
||||||
|
metadata_name = posixpath.join(info_dir, METADATA_FILENAME)
|
||||||
|
wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
|
||||||
|
record_name = posixpath.join(info_dir, 'RECORD')
|
||||||
|
|
||||||
|
wrapper = codecs.getreader('utf-8')
|
||||||
|
|
||||||
|
with ZipFile(pathname, 'r') as zf:
|
||||||
|
with zf.open(wheel_metadata_name) as bwf:
|
||||||
|
wf = wrapper(bwf)
|
||||||
|
message = message_from_file(wf)
|
||||||
|
wv = message['Wheel-Version'].split('.', 1)
|
||||||
|
file_version = tuple([int(i) for i in wv])
|
||||||
|
if (file_version != self.wheel_version) and warner:
|
||||||
|
warner(self.wheel_version, file_version)
|
||||||
|
|
||||||
|
if message['Root-Is-Purelib'] == 'true':
|
||||||
|
libdir = paths['purelib']
|
||||||
|
else:
|
||||||
|
libdir = paths['platlib']
|
||||||
|
|
||||||
|
records = {}
|
||||||
|
with zf.open(record_name) as bf:
|
||||||
|
with CSVReader(stream=bf) as reader:
|
||||||
|
for row in reader:
|
||||||
|
p = row[0]
|
||||||
|
records[p] = row
|
||||||
|
|
||||||
|
data_pfx = posixpath.join(data_dir, '')
|
||||||
|
info_pfx = posixpath.join(info_dir, '')
|
||||||
|
script_pfx = posixpath.join(data_dir, 'scripts', '')
|
||||||
|
|
||||||
|
# make a new instance rather than a copy of maker's,
|
||||||
|
# as we mutate it
|
||||||
|
fileop = FileOperator(dry_run=dry_run)
|
||||||
|
fileop.record = True # so we can rollback if needed
|
||||||
|
|
||||||
|
bc = not sys.dont_write_bytecode # Double negatives. Lovely!
|
||||||
|
|
||||||
|
outfiles = [] # for RECORD writing
|
||||||
|
|
||||||
|
# for script copying/shebang processing
|
||||||
|
workdir = tempfile.mkdtemp()
|
||||||
|
# set target dir later
|
||||||
|
# we default add_launchers to False, as the
|
||||||
|
# Python Launcher should be used instead
|
||||||
|
maker.source_dir = workdir
|
||||||
|
maker.target_dir = None
|
||||||
|
try:
|
||||||
|
for zinfo in zf.infolist():
|
||||||
|
arcname = zinfo.filename
|
||||||
|
if isinstance(arcname, text_type):
|
||||||
|
u_arcname = arcname
|
||||||
|
else:
|
||||||
|
u_arcname = arcname.decode('utf-8')
|
||||||
|
# The signature file won't be in RECORD,
|
||||||
|
# and we don't currently don't do anything with it
|
||||||
|
if u_arcname.endswith('/RECORD.jws'):
|
||||||
|
continue
|
||||||
|
row = records[u_arcname]
|
||||||
|
if row[2] and str(zinfo.file_size) != row[2]:
|
||||||
|
raise DistlibException('size mismatch for '
|
||||||
|
'%s' % u_arcname)
|
||||||
|
if row[1]:
|
||||||
|
kind, value = row[1].split('=', 1)
|
||||||
|
with zf.open(arcname) as bf:
|
||||||
|
data = bf.read()
|
||||||
|
_, digest = self.get_hash(data, kind)
|
||||||
|
if digest != value:
|
||||||
|
raise DistlibException('digest mismatch for '
|
||||||
|
'%s' % arcname)
|
||||||
|
|
||||||
|
if lib_only and u_arcname.startswith((info_pfx, data_pfx)):
|
||||||
|
logger.debug('lib_only: skipping %s', u_arcname)
|
||||||
|
continue
|
||||||
|
is_script = (u_arcname.startswith(script_pfx)
|
||||||
|
and not u_arcname.endswith('.exe'))
|
||||||
|
|
||||||
|
if u_arcname.startswith(data_pfx):
|
||||||
|
_, where, rp = u_arcname.split('/', 2)
|
||||||
|
outfile = os.path.join(paths[where], convert_path(rp))
|
||||||
|
else:
|
||||||
|
# meant for site-packages.
|
||||||
|
if u_arcname in (wheel_metadata_name, record_name):
|
||||||
|
continue
|
||||||
|
outfile = os.path.join(libdir, convert_path(u_arcname))
|
||||||
|
if not is_script:
|
||||||
|
with zf.open(arcname) as bf:
|
||||||
|
fileop.copy_stream(bf, outfile)
|
||||||
|
outfiles.append(outfile)
|
||||||
|
# Double check the digest of the written file
|
||||||
|
if not dry_run and row[1]:
|
||||||
|
with open(outfile, 'rb') as bf:
|
||||||
|
data = bf.read()
|
||||||
|
_, newdigest = self.get_hash(data, kind)
|
||||||
|
if newdigest != digest:
|
||||||
|
raise DistlibException('digest mismatch '
|
||||||
|
'on write for '
|
||||||
|
'%s' % outfile)
|
||||||
|
if bc and outfile.endswith('.py'):
|
||||||
|
try:
|
||||||
|
pyc = fileop.byte_compile(outfile)
|
||||||
|
outfiles.append(pyc)
|
||||||
|
except Exception:
|
||||||
|
# Don't give up if byte-compilation fails,
|
||||||
|
# but log it and perhaps warn the user
|
||||||
|
logger.warning('Byte-compilation failed',
|
||||||
|
exc_info=True)
|
||||||
|
else:
|
||||||
|
fn = os.path.basename(convert_path(arcname))
|
||||||
|
workname = os.path.join(workdir, fn)
|
||||||
|
with zf.open(arcname) as bf:
|
||||||
|
fileop.copy_stream(bf, workname)
|
||||||
|
|
||||||
|
dn, fn = os.path.split(outfile)
|
||||||
|
maker.target_dir = dn
|
||||||
|
filenames = maker.make(fn)
|
||||||
|
fileop.set_executable_mode(filenames)
|
||||||
|
outfiles.extend(filenames)
|
||||||
|
|
||||||
|
if lib_only:
|
||||||
|
logger.debug('lib_only: returning None')
|
||||||
|
dist = None
|
||||||
|
else:
|
||||||
|
# Generate scripts
|
||||||
|
|
||||||
|
# Try to get pydist.json so we can see if there are
|
||||||
|
# any commands to generate. If this fails (e.g. because
|
||||||
|
# of a legacy wheel), log a warning but don't give up.
|
||||||
|
commands = None
|
||||||
|
file_version = self.info['Wheel-Version']
|
||||||
|
if file_version == '1.0':
|
||||||
|
# Use legacy info
|
||||||
|
ep = posixpath.join(info_dir, 'entry_points.txt')
|
||||||
|
try:
|
||||||
|
with zf.open(ep) as bwf:
|
||||||
|
epdata = read_exports(bwf)
|
||||||
|
commands = {}
|
||||||
|
for key in ('console', 'gui'):
|
||||||
|
k = '%s_scripts' % key
|
||||||
|
if k in epdata:
|
||||||
|
commands['wrap_%s' % key] = d = {}
|
||||||
|
for v in epdata[k].values():
|
||||||
|
s = '%s:%s' % (v.prefix, v.suffix)
|
||||||
|
if v.flags:
|
||||||
|
s += ' %s' % v.flags
|
||||||
|
d[v.name] = s
|
||||||
|
except Exception:
|
||||||
|
logger.warning('Unable to read legacy script '
|
||||||
|
'metadata, so cannot generate '
|
||||||
|
'scripts')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
with zf.open(metadata_name) as bwf:
|
||||||
|
wf = wrapper(bwf)
|
||||||
|
commands = json.load(wf).get('extensions')
|
||||||
|
if commands:
|
||||||
|
commands = commands.get('python.commands')
|
||||||
|
except Exception:
|
||||||
|
logger.warning('Unable to read JSON metadata, so '
|
||||||
|
'cannot generate scripts')
|
||||||
|
if commands:
|
||||||
|
console_scripts = commands.get('wrap_console', {})
|
||||||
|
gui_scripts = commands.get('wrap_gui', {})
|
||||||
|
if console_scripts or gui_scripts:
|
||||||
|
script_dir = paths.get('scripts', '')
|
||||||
|
if not os.path.isdir(script_dir):
|
||||||
|
raise ValueError('Valid script path not '
|
||||||
|
'specified')
|
||||||
|
maker.target_dir = script_dir
|
||||||
|
for k, v in console_scripts.items():
|
||||||
|
script = '%s = %s' % (k, v)
|
||||||
|
filenames = maker.make(script)
|
||||||
|
fileop.set_executable_mode(filenames)
|
||||||
|
|
||||||
|
if gui_scripts:
|
||||||
|
options = {'gui': True }
|
||||||
|
for k, v in gui_scripts.items():
|
||||||
|
script = '%s = %s' % (k, v)
|
||||||
|
filenames = maker.make(script, options)
|
||||||
|
fileop.set_executable_mode(filenames)
|
||||||
|
|
||||||
|
p = os.path.join(libdir, info_dir)
|
||||||
|
dist = InstalledDistribution(p)
|
||||||
|
|
||||||
|
# Write SHARED
|
||||||
|
paths = dict(paths) # don't change passed in dict
|
||||||
|
del paths['purelib']
|
||||||
|
del paths['platlib']
|
||||||
|
paths['lib'] = libdir
|
||||||
|
p = dist.write_shared_locations(paths, dry_run)
|
||||||
|
if p:
|
||||||
|
outfiles.append(p)
|
||||||
|
|
||||||
|
# Write RECORD
|
||||||
|
dist.write_installed_files(outfiles, paths['prefix'],
|
||||||
|
dry_run)
|
||||||
|
return dist
|
||||||
|
except Exception: # pragma: no cover
|
||||||
|
logger.exception('installation failed.')
|
||||||
|
fileop.rollback()
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(workdir)
|
||||||
|
|
||||||
|
def _get_dylib_cache(self):
|
||||||
|
global cache
|
||||||
|
if cache is None:
|
||||||
|
# Use native string to avoid issues on 2.x: see Python #20140.
|
||||||
|
base = os.path.join(get_cache_base(), str('dylib-cache'),
|
||||||
|
sys.version[:3])
|
||||||
|
cache = Cache(base)
|
||||||
|
return cache
|
||||||
|
|
||||||
|
def _get_extensions(self):
|
||||||
|
pathname = os.path.join(self.dirname, self.filename)
|
||||||
|
name_ver = '%s-%s' % (self.name, self.version)
|
||||||
|
info_dir = '%s.dist-info' % name_ver
|
||||||
|
arcname = posixpath.join(info_dir, 'EXTENSIONS')
|
||||||
|
wrapper = codecs.getreader('utf-8')
|
||||||
|
result = []
|
||||||
|
with ZipFile(pathname, 'r') as zf:
|
||||||
|
try:
|
||||||
|
with zf.open(arcname) as bf:
|
||||||
|
wf = wrapper(bf)
|
||||||
|
extensions = json.load(wf)
|
||||||
|
cache = self._get_dylib_cache()
|
||||||
|
prefix = cache.prefix_to_dir(pathname)
|
||||||
|
cache_base = os.path.join(cache.base, prefix)
|
||||||
|
if not os.path.isdir(cache_base):
|
||||||
|
os.makedirs(cache_base)
|
||||||
|
for name, relpath in extensions.items():
|
||||||
|
dest = os.path.join(cache_base, convert_path(relpath))
|
||||||
|
if not os.path.exists(dest):
|
||||||
|
extract = True
|
||||||
|
else:
|
||||||
|
file_time = os.stat(dest).st_mtime
|
||||||
|
file_time = datetime.datetime.fromtimestamp(file_time)
|
||||||
|
info = zf.getinfo(relpath)
|
||||||
|
wheel_time = datetime.datetime(*info.date_time)
|
||||||
|
extract = wheel_time > file_time
|
||||||
|
if extract:
|
||||||
|
zf.extract(relpath, cache_base)
|
||||||
|
result.append((name, dest))
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
return result
|
||||||
|
|
||||||
|
def is_compatible(self):
|
||||||
|
"""
|
||||||
|
Determine if a wheel is compatible with the running system.
|
||||||
|
"""
|
||||||
|
return is_compatible(self)
|
||||||
|
|
||||||
|
def is_mountable(self):
|
||||||
|
"""
|
||||||
|
Determine if a wheel is asserted as mountable by its metadata.
|
||||||
|
"""
|
||||||
|
return True # for now - metadata details TBD
|
||||||
|
|
||||||
|
def mount(self, append=False):
|
||||||
|
pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
|
||||||
|
if not self.is_compatible():
|
||||||
|
msg = 'Wheel %s not compatible with this Python.' % pathname
|
||||||
|
raise DistlibException(msg)
|
||||||
|
if not self.is_mountable():
|
||||||
|
msg = 'Wheel %s is marked as not mountable.' % pathname
|
||||||
|
raise DistlibException(msg)
|
||||||
|
if pathname in sys.path:
|
||||||
|
logger.debug('%s already in path', pathname)
|
||||||
|
else:
|
||||||
|
if append:
|
||||||
|
sys.path.append(pathname)
|
||||||
|
else:
|
||||||
|
sys.path.insert(0, pathname)
|
||||||
|
extensions = self._get_extensions()
|
||||||
|
if extensions:
|
||||||
|
if _hook not in sys.meta_path:
|
||||||
|
sys.meta_path.append(_hook)
|
||||||
|
_hook.add(pathname, extensions)
|
||||||
|
|
||||||
|
def unmount(self):
|
||||||
|
pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
|
||||||
|
if pathname not in sys.path:
|
||||||
|
logger.debug('%s not in path', pathname)
|
||||||
|
else:
|
||||||
|
sys.path.remove(pathname)
|
||||||
|
if pathname in _hook.impure_wheels:
|
||||||
|
_hook.remove(pathname)
|
||||||
|
if not _hook.impure_wheels:
|
||||||
|
if _hook in sys.meta_path:
|
||||||
|
sys.meta_path.remove(_hook)
|
||||||
|
|
||||||
|
def verify(self):
|
||||||
|
pathname = os.path.join(self.dirname, self.filename)
|
||||||
|
name_ver = '%s-%s' % (self.name, self.version)
|
||||||
|
data_dir = '%s.data' % name_ver
|
||||||
|
info_dir = '%s.dist-info' % name_ver
|
||||||
|
|
||||||
|
metadata_name = posixpath.join(info_dir, METADATA_FILENAME)
|
||||||
|
wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
|
||||||
|
record_name = posixpath.join(info_dir, 'RECORD')
|
||||||
|
|
||||||
|
wrapper = codecs.getreader('utf-8')
|
||||||
|
|
||||||
|
with ZipFile(pathname, 'r') as zf:
|
||||||
|
with zf.open(wheel_metadata_name) as bwf:
|
||||||
|
wf = wrapper(bwf)
|
||||||
|
message = message_from_file(wf)
|
||||||
|
wv = message['Wheel-Version'].split('.', 1)
|
||||||
|
file_version = tuple([int(i) for i in wv])
|
||||||
|
# TODO version verification
|
||||||
|
|
||||||
|
records = {}
|
||||||
|
with zf.open(record_name) as bf:
|
||||||
|
with CSVReader(stream=bf) as reader:
|
||||||
|
for row in reader:
|
||||||
|
p = row[0]
|
||||||
|
records[p] = row
|
||||||
|
|
||||||
|
for zinfo in zf.infolist():
|
||||||
|
arcname = zinfo.filename
|
||||||
|
if isinstance(arcname, text_type):
|
||||||
|
u_arcname = arcname
|
||||||
|
else:
|
||||||
|
u_arcname = arcname.decode('utf-8')
|
||||||
|
if '..' in u_arcname:
|
||||||
|
raise DistlibException('invalid entry in '
|
||||||
|
'wheel: %r' % u_arcname)
|
||||||
|
|
||||||
|
# The signature file won't be in RECORD,
|
||||||
|
# and we don't currently don't do anything with it
|
||||||
|
if u_arcname.endswith('/RECORD.jws'):
|
||||||
|
continue
|
||||||
|
row = records[u_arcname]
|
||||||
|
if row[2] and str(zinfo.file_size) != row[2]:
|
||||||
|
raise DistlibException('size mismatch for '
|
||||||
|
'%s' % u_arcname)
|
||||||
|
if row[1]:
|
||||||
|
kind, value = row[1].split('=', 1)
|
||||||
|
with zf.open(arcname) as bf:
|
||||||
|
data = bf.read()
|
||||||
|
_, digest = self.get_hash(data, kind)
|
||||||
|
if digest != value:
|
||||||
|
raise DistlibException('digest mismatch for '
|
||||||
|
'%s' % arcname)
|
||||||
|
|
||||||
|
def update(self, modifier, dest_dir=None, **kwargs):
|
||||||
|
"""
|
||||||
|
Update the contents of a wheel in a generic way. The modifier should
|
||||||
|
be a callable which expects a dictionary argument: its keys are
|
||||||
|
archive-entry paths, and its values are absolute filesystem paths
|
||||||
|
where the contents the corresponding archive entries can be found. The
|
||||||
|
modifier is free to change the contents of the files pointed to, add
|
||||||
|
new entries and remove entries, before returning. This method will
|
||||||
|
extract the entire contents of the wheel to a temporary location, call
|
||||||
|
the modifier, and then use the passed (and possibly updated)
|
||||||
|
dictionary to write a new wheel. If ``dest_dir`` is specified, the new
|
||||||
|
wheel is written there -- otherwise, the original wheel is overwritten.
|
||||||
|
|
||||||
|
The modifier should return True if it updated the wheel, else False.
|
||||||
|
This method returns the same value the modifier returns.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def get_version(path_map, info_dir):
|
||||||
|
version = path = None
|
||||||
|
key = '%s/%s' % (info_dir, METADATA_FILENAME)
|
||||||
|
if key not in path_map:
|
||||||
|
key = '%s/PKG-INFO' % info_dir
|
||||||
|
if key in path_map:
|
||||||
|
path = path_map[key]
|
||||||
|
version = Metadata(path=path).version
|
||||||
|
return version, path
|
||||||
|
|
||||||
|
def update_version(version, path):
|
||||||
|
updated = None
|
||||||
|
try:
|
||||||
|
v = NormalizedVersion(version)
|
||||||
|
i = version.find('-')
|
||||||
|
if i < 0:
|
||||||
|
updated = '%s+1' % version
|
||||||
|
else:
|
||||||
|
parts = [int(s) for s in version[i + 1:].split('.')]
|
||||||
|
parts[-1] += 1
|
||||||
|
updated = '%s+%s' % (version[:i],
|
||||||
|
'.'.join(str(i) for i in parts))
|
||||||
|
except UnsupportedVersionError:
|
||||||
|
logger.debug('Cannot update non-compliant (PEP-440) '
|
||||||
|
'version %r', version)
|
||||||
|
if updated:
|
||||||
|
md = Metadata(path=path)
|
||||||
|
md.version = updated
|
||||||
|
legacy = not path.endswith(METADATA_FILENAME)
|
||||||
|
md.write(path=path, legacy=legacy)
|
||||||
|
logger.debug('Version updated from %r to %r', version,
|
||||||
|
updated)
|
||||||
|
|
||||||
|
pathname = os.path.join(self.dirname, self.filename)
|
||||||
|
name_ver = '%s-%s' % (self.name, self.version)
|
||||||
|
info_dir = '%s.dist-info' % name_ver
|
||||||
|
record_name = posixpath.join(info_dir, 'RECORD')
|
||||||
|
with tempdir() as workdir:
|
||||||
|
with ZipFile(pathname, 'r') as zf:
|
||||||
|
path_map = {}
|
||||||
|
for zinfo in zf.infolist():
|
||||||
|
arcname = zinfo.filename
|
||||||
|
if isinstance(arcname, text_type):
|
||||||
|
u_arcname = arcname
|
||||||
|
else:
|
||||||
|
u_arcname = arcname.decode('utf-8')
|
||||||
|
if u_arcname == record_name:
|
||||||
|
continue
|
||||||
|
if '..' in u_arcname:
|
||||||
|
raise DistlibException('invalid entry in '
|
||||||
|
'wheel: %r' % u_arcname)
|
||||||
|
zf.extract(zinfo, workdir)
|
||||||
|
path = os.path.join(workdir, convert_path(u_arcname))
|
||||||
|
path_map[u_arcname] = path
|
||||||
|
|
||||||
|
# Remember the version.
|
||||||
|
original_version, _ = get_version(path_map, info_dir)
|
||||||
|
# Files extracted. Call the modifier.
|
||||||
|
modified = modifier(path_map, **kwargs)
|
||||||
|
if modified:
|
||||||
|
# Something changed - need to build a new wheel.
|
||||||
|
current_version, path = get_version(path_map, info_dir)
|
||||||
|
if current_version and (current_version == original_version):
|
||||||
|
# Add or update local version to signify changes.
|
||||||
|
update_version(current_version, path)
|
||||||
|
# Decide where the new wheel goes.
|
||||||
|
if dest_dir is None:
|
||||||
|
fd, newpath = tempfile.mkstemp(suffix='.whl',
|
||||||
|
prefix='wheel-update-',
|
||||||
|
dir=workdir)
|
||||||
|
os.close(fd)
|
||||||
|
else:
|
||||||
|
if not os.path.isdir(dest_dir):
|
||||||
|
raise DistlibException('Not a directory: %r' % dest_dir)
|
||||||
|
newpath = os.path.join(dest_dir, self.filename)
|
||||||
|
archive_paths = list(path_map.items())
|
||||||
|
distinfo = os.path.join(workdir, info_dir)
|
||||||
|
info = distinfo, info_dir
|
||||||
|
self.write_records(info, workdir, archive_paths)
|
||||||
|
self.build_zip(newpath, archive_paths)
|
||||||
|
if dest_dir is None:
|
||||||
|
shutil.copyfile(newpath, pathname)
|
||||||
|
return modified
|
||||||
|
|
||||||
|
def compatible_tags():
|
||||||
|
"""
|
||||||
|
Return (pyver, abi, arch) tuples compatible with this Python.
|
||||||
|
"""
|
||||||
|
versions = [VER_SUFFIX]
|
||||||
|
major = VER_SUFFIX[0]
|
||||||
|
for minor in range(sys.version_info[1] - 1, - 1, -1):
|
||||||
|
versions.append(''.join([major, str(minor)]))
|
||||||
|
|
||||||
|
abis = []
|
||||||
|
for suffix, _, _ in imp.get_suffixes():
|
||||||
|
if suffix.startswith('.abi'):
|
||||||
|
abis.append(suffix.split('.', 2)[1])
|
||||||
|
abis.sort()
|
||||||
|
if ABI != 'none':
|
||||||
|
abis.insert(0, ABI)
|
||||||
|
abis.append('none')
|
||||||
|
result = []
|
||||||
|
|
||||||
|
arches = [ARCH]
|
||||||
|
if sys.platform == 'darwin':
|
||||||
|
m = re.match('(\w+)_(\d+)_(\d+)_(\w+)$', ARCH)
|
||||||
|
if m:
|
||||||
|
name, major, minor, arch = m.groups()
|
||||||
|
minor = int(minor)
|
||||||
|
matches = [arch]
|
||||||
|
if arch in ('i386', 'ppc'):
|
||||||
|
matches.append('fat')
|
||||||
|
if arch in ('i386', 'ppc', 'x86_64'):
|
||||||
|
matches.append('fat3')
|
||||||
|
if arch in ('ppc64', 'x86_64'):
|
||||||
|
matches.append('fat64')
|
||||||
|
if arch in ('i386', 'x86_64'):
|
||||||
|
matches.append('intel')
|
||||||
|
if arch in ('i386', 'x86_64', 'intel', 'ppc', 'ppc64'):
|
||||||
|
matches.append('universal')
|
||||||
|
while minor >= 0:
|
||||||
|
for match in matches:
|
||||||
|
s = '%s_%s_%s_%s' % (name, major, minor, match)
|
||||||
|
if s != ARCH: # already there
|
||||||
|
arches.append(s)
|
||||||
|
minor -= 1
|
||||||
|
|
||||||
|
# Most specific - our Python version, ABI and arch
|
||||||
|
for abi in abis:
|
||||||
|
for arch in arches:
|
||||||
|
result.append((''.join((IMP_PREFIX, versions[0])), abi, arch))
|
||||||
|
|
||||||
|
# where no ABI / arch dependency, but IMP_PREFIX dependency
|
||||||
|
for i, version in enumerate(versions):
|
||||||
|
result.append((''.join((IMP_PREFIX, version)), 'none', 'any'))
|
||||||
|
if i == 0:
|
||||||
|
result.append((''.join((IMP_PREFIX, version[0])), 'none', 'any'))
|
||||||
|
|
||||||
|
# no IMP_PREFIX, ABI or arch dependency
|
||||||
|
for i, version in enumerate(versions):
|
||||||
|
result.append((''.join(('py', version)), 'none', 'any'))
|
||||||
|
if i == 0:
|
||||||
|
result.append((''.join(('py', version[0])), 'none', 'any'))
|
||||||
|
return set(result)
|
||||||
|
|
||||||
|
|
||||||
|
COMPATIBLE_TAGS = compatible_tags()
|
||||||
|
|
||||||
|
del compatible_tags
|
||||||
|
|
||||||
|
|
||||||
|
def is_compatible(wheel, tags=None):
|
||||||
|
if not isinstance(wheel, Wheel):
|
||||||
|
wheel = Wheel(wheel) # assume it's a filename
|
||||||
|
result = False
|
||||||
|
if tags is None:
|
||||||
|
tags = COMPATIBLE_TAGS
|
||||||
|
for ver, abi, arch in tags:
|
||||||
|
if ver in wheel.pyver and abi in wheel.abi and arch in wheel.arch:
|
||||||
|
result = True
|
||||||
|
break
|
||||||
|
return result
|
|
@ -0,0 +1,23 @@
|
||||||
|
"""
|
||||||
|
HTML parsing library based on the WHATWG "HTML5"
|
||||||
|
specification. The parser is designed to be compatible with existing
|
||||||
|
HTML found in the wild and implements well-defined error recovery that
|
||||||
|
is largely compatible with modern desktop web browsers.
|
||||||
|
|
||||||
|
Example usage:
|
||||||
|
|
||||||
|
import html5lib
|
||||||
|
f = open("my_document.html")
|
||||||
|
tree = html5lib.parse(f)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from .html5parser import HTMLParser, parse, parseFragment
|
||||||
|
from .treebuilders import getTreeBuilder
|
||||||
|
from .treewalkers import getTreeWalker
|
||||||
|
from .serializer import serialize
|
||||||
|
|
||||||
|
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
||||||
|
"getTreeWalker", "serialize"]
|
||||||
|
__version__ = "0.999"
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,12 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
class Filter(object):
|
||||||
|
def __init__(self, source):
|
||||||
|
self.source = source
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.source)
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self.source, name)
|
|
@ -0,0 +1,20 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
|
||||||
|
try:
|
||||||
|
from collections import OrderedDict
|
||||||
|
except ImportError:
|
||||||
|
from ordereddict import OrderedDict
|
||||||
|
|
||||||
|
|
||||||
|
class Filter(_base.Filter):
|
||||||
|
def __iter__(self):
|
||||||
|
for token in _base.Filter.__iter__(self):
|
||||||
|
if token["type"] in ("StartTag", "EmptyTag"):
|
||||||
|
attrs = OrderedDict()
|
||||||
|
for name, value in sorted(token["data"].items(),
|
||||||
|
key=lambda x: x[0]):
|
||||||
|
attrs[name] = value
|
||||||
|
token["data"] = attrs
|
||||||
|
yield token
|
|
@ -0,0 +1,65 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
|
||||||
|
|
||||||
|
class Filter(_base.Filter):
|
||||||
|
def __init__(self, source, encoding):
|
||||||
|
_base.Filter.__init__(self, source)
|
||||||
|
self.encoding = encoding
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
state = "pre_head"
|
||||||
|
meta_found = (self.encoding is None)
|
||||||
|
pending = []
|
||||||
|
|
||||||
|
for token in _base.Filter.__iter__(self):
|
||||||
|
type = token["type"]
|
||||||
|
if type == "StartTag":
|
||||||
|
if token["name"].lower() == "head":
|
||||||
|
state = "in_head"
|
||||||
|
|
||||||
|
elif type == "EmptyTag":
|
||||||
|
if token["name"].lower() == "meta":
|
||||||
|
# replace charset with actual encoding
|
||||||
|
has_http_equiv_content_type = False
|
||||||
|
for (namespace, name), value in token["data"].items():
|
||||||
|
if namespace is not None:
|
||||||
|
continue
|
||||||
|
elif name.lower() == 'charset':
|
||||||
|
token["data"][(namespace, name)] = self.encoding
|
||||||
|
meta_found = True
|
||||||
|
break
|
||||||
|
elif name == 'http-equiv' and value.lower() == 'content-type':
|
||||||
|
has_http_equiv_content_type = True
|
||||||
|
else:
|
||||||
|
if has_http_equiv_content_type and (None, "content") in token["data"]:
|
||||||
|
token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
|
||||||
|
meta_found = True
|
||||||
|
|
||||||
|
elif token["name"].lower() == "head" and not meta_found:
|
||||||
|
# insert meta into empty head
|
||||||
|
yield {"type": "StartTag", "name": "head",
|
||||||
|
"data": token["data"]}
|
||||||
|
yield {"type": "EmptyTag", "name": "meta",
|
||||||
|
"data": {(None, "charset"): self.encoding}}
|
||||||
|
yield {"type": "EndTag", "name": "head"}
|
||||||
|
meta_found = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
elif type == "EndTag":
|
||||||
|
if token["name"].lower() == "head" and pending:
|
||||||
|
# insert meta into head (if necessary) and flush pending queue
|
||||||
|
yield pending.pop(0)
|
||||||
|
if not meta_found:
|
||||||
|
yield {"type": "EmptyTag", "name": "meta",
|
||||||
|
"data": {(None, "charset"): self.encoding}}
|
||||||
|
while pending:
|
||||||
|
yield pending.pop(0)
|
||||||
|
meta_found = True
|
||||||
|
state = "post_head"
|
||||||
|
|
||||||
|
if state == "in_head":
|
||||||
|
pending.append(token)
|
||||||
|
else:
|
||||||
|
yield token
|
|
@ -0,0 +1,93 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from gettext import gettext
|
||||||
|
_ = gettext
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
from ..constants import cdataElements, rcdataElements, voidElements
|
||||||
|
|
||||||
|
from ..constants import spaceCharacters
|
||||||
|
spaceCharacters = "".join(spaceCharacters)
|
||||||
|
|
||||||
|
|
||||||
|
class LintError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Filter(_base.Filter):
|
||||||
|
def __iter__(self):
|
||||||
|
open_elements = []
|
||||||
|
contentModelFlag = "PCDATA"
|
||||||
|
for token in _base.Filter.__iter__(self):
|
||||||
|
type = token["type"]
|
||||||
|
if type in ("StartTag", "EmptyTag"):
|
||||||
|
name = token["name"]
|
||||||
|
if contentModelFlag != "PCDATA":
|
||||||
|
raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
|
||||||
|
if not isinstance(name, str):
|
||||||
|
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||||
|
if not name:
|
||||||
|
raise LintError(_("Empty tag name"))
|
||||||
|
if type == "StartTag" and name in voidElements:
|
||||||
|
raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
|
||||||
|
elif type == "EmptyTag" and name not in voidElements:
|
||||||
|
raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
|
||||||
|
if type == "StartTag":
|
||||||
|
open_elements.append(name)
|
||||||
|
for name, value in token["data"]:
|
||||||
|
if not isinstance(name, str):
|
||||||
|
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
|
||||||
|
if not name:
|
||||||
|
raise LintError(_("Empty attribute name"))
|
||||||
|
if not isinstance(value, str):
|
||||||
|
raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
|
||||||
|
if name in cdataElements:
|
||||||
|
contentModelFlag = "CDATA"
|
||||||
|
elif name in rcdataElements:
|
||||||
|
contentModelFlag = "RCDATA"
|
||||||
|
elif name == "plaintext":
|
||||||
|
contentModelFlag = "PLAINTEXT"
|
||||||
|
|
||||||
|
elif type == "EndTag":
|
||||||
|
name = token["name"]
|
||||||
|
if not isinstance(name, str):
|
||||||
|
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||||
|
if not name:
|
||||||
|
raise LintError(_("Empty tag name"))
|
||||||
|
if name in voidElements:
|
||||||
|
raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
|
||||||
|
start_name = open_elements.pop()
|
||||||
|
if start_name != name:
|
||||||
|
raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
|
||||||
|
contentModelFlag = "PCDATA"
|
||||||
|
|
||||||
|
elif type == "Comment":
|
||||||
|
if contentModelFlag != "PCDATA":
|
||||||
|
raise LintError(_("Comment not in PCDATA content model flag"))
|
||||||
|
|
||||||
|
elif type in ("Characters", "SpaceCharacters"):
|
||||||
|
data = token["data"]
|
||||||
|
if not isinstance(data, str):
|
||||||
|
raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
|
||||||
|
if not data:
|
||||||
|
raise LintError(_("%(type)s token with empty data") % {"type": type})
|
||||||
|
if type == "SpaceCharacters":
|
||||||
|
data = data.strip(spaceCharacters)
|
||||||
|
if data:
|
||||||
|
raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
|
||||||
|
|
||||||
|
elif type == "Doctype":
|
||||||
|
name = token["name"]
|
||||||
|
if contentModelFlag != "PCDATA":
|
||||||
|
raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
|
||||||
|
if not isinstance(name, str):
|
||||||
|
raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
|
||||||
|
# XXX: what to do with token["data"] ?
|
||||||
|
|
||||||
|
elif type in ("ParseError", "SerializeError"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise LintError(_("Unknown token type: %(type)s") % {"type": type})
|
||||||
|
|
||||||
|
yield token
|
|
@ -0,0 +1,205 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
|
||||||
|
|
||||||
|
class Filter(_base.Filter):
|
||||||
|
def slider(self):
|
||||||
|
previous1 = previous2 = None
|
||||||
|
for token in self.source:
|
||||||
|
if previous1 is not None:
|
||||||
|
yield previous2, previous1, token
|
||||||
|
previous2 = previous1
|
||||||
|
previous1 = token
|
||||||
|
yield previous2, previous1, None
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for previous, token, next in self.slider():
|
||||||
|
type = token["type"]
|
||||||
|
if type == "StartTag":
|
||||||
|
if (token["data"] or
|
||||||
|
not self.is_optional_start(token["name"], previous, next)):
|
||||||
|
yield token
|
||||||
|
elif type == "EndTag":
|
||||||
|
if not self.is_optional_end(token["name"], next):
|
||||||
|
yield token
|
||||||
|
else:
|
||||||
|
yield token
|
||||||
|
|
||||||
|
def is_optional_start(self, tagname, previous, next):
|
||||||
|
type = next and next["type"] or None
|
||||||
|
if tagname in 'html':
|
||||||
|
# An html element's start tag may be omitted if the first thing
|
||||||
|
# inside the html element is not a space character or a comment.
|
||||||
|
return type not in ("Comment", "SpaceCharacters")
|
||||||
|
elif tagname == 'head':
|
||||||
|
# A head element's start tag may be omitted if the first thing
|
||||||
|
# inside the head element is an element.
|
||||||
|
# XXX: we also omit the start tag if the head element is empty
|
||||||
|
if type in ("StartTag", "EmptyTag"):
|
||||||
|
return True
|
||||||
|
elif type == "EndTag":
|
||||||
|
return next["name"] == "head"
|
||||||
|
elif tagname == 'body':
|
||||||
|
# A body element's start tag may be omitted if the first thing
|
||||||
|
# inside the body element is not a space character or a comment,
|
||||||
|
# except if the first thing inside the body element is a script
|
||||||
|
# or style element and the node immediately preceding the body
|
||||||
|
# element is a head element whose end tag has been omitted.
|
||||||
|
if type in ("Comment", "SpaceCharacters"):
|
||||||
|
return False
|
||||||
|
elif type == "StartTag":
|
||||||
|
# XXX: we do not look at the preceding event, so we never omit
|
||||||
|
# the body element's start tag if it's followed by a script or
|
||||||
|
# a style element.
|
||||||
|
return next["name"] not in ('script', 'style')
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
elif tagname == 'colgroup':
|
||||||
|
# A colgroup element's start tag may be omitted if the first thing
|
||||||
|
# inside the colgroup element is a col element, and if the element
|
||||||
|
# is not immediately preceeded by another colgroup element whose
|
||||||
|
# end tag has been omitted.
|
||||||
|
if type in ("StartTag", "EmptyTag"):
|
||||||
|
# XXX: we do not look at the preceding event, so instead we never
|
||||||
|
# omit the colgroup element's end tag when it is immediately
|
||||||
|
# followed by another colgroup element. See is_optional_end.
|
||||||
|
return next["name"] == "col"
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
elif tagname == 'tbody':
|
||||||
|
# A tbody element's start tag may be omitted if the first thing
|
||||||
|
# inside the tbody element is a tr element, and if the element is
|
||||||
|
# not immediately preceeded by a tbody, thead, or tfoot element
|
||||||
|
# whose end tag has been omitted.
|
||||||
|
if type == "StartTag":
|
||||||
|
# omit the thead and tfoot elements' end tag when they are
|
||||||
|
# immediately followed by a tbody element. See is_optional_end.
|
||||||
|
if previous and previous['type'] == 'EndTag' and \
|
||||||
|
previous['name'] in ('tbody', 'thead', 'tfoot'):
|
||||||
|
return False
|
||||||
|
return next["name"] == 'tr'
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_optional_end(self, tagname, next):
|
||||||
|
type = next and next["type"] or None
|
||||||
|
if tagname in ('html', 'head', 'body'):
|
||||||
|
# An html element's end tag may be omitted if the html element
|
||||||
|
# is not immediately followed by a space character or a comment.
|
||||||
|
return type not in ("Comment", "SpaceCharacters")
|
||||||
|
elif tagname in ('li', 'optgroup', 'tr'):
|
||||||
|
# A li element's end tag may be omitted if the li element is
|
||||||
|
# immediately followed by another li element or if there is
|
||||||
|
# no more content in the parent element.
|
||||||
|
# An optgroup element's end tag may be omitted if the optgroup
|
||||||
|
# element is immediately followed by another optgroup element,
|
||||||
|
# or if there is no more content in the parent element.
|
||||||
|
# A tr element's end tag may be omitted if the tr element is
|
||||||
|
# immediately followed by another tr element, or if there is
|
||||||
|
# no more content in the parent element.
|
||||||
|
if type == "StartTag":
|
||||||
|
return next["name"] == tagname
|
||||||
|
else:
|
||||||
|
return type == "EndTag" or type is None
|
||||||
|
elif tagname in ('dt', 'dd'):
|
||||||
|
# A dt element's end tag may be omitted if the dt element is
|
||||||
|
# immediately followed by another dt element or a dd element.
|
||||||
|
# A dd element's end tag may be omitted if the dd element is
|
||||||
|
# immediately followed by another dd element or a dt element,
|
||||||
|
# or if there is no more content in the parent element.
|
||||||
|
if type == "StartTag":
|
||||||
|
return next["name"] in ('dt', 'dd')
|
||||||
|
elif tagname == 'dd':
|
||||||
|
return type == "EndTag" or type is None
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
elif tagname == 'p':
|
||||||
|
# A p element's end tag may be omitted if the p element is
|
||||||
|
# immediately followed by an address, article, aside,
|
||||||
|
# blockquote, datagrid, dialog, dir, div, dl, fieldset,
|
||||||
|
# footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
|
||||||
|
# nav, ol, p, pre, section, table, or ul, element, or if
|
||||||
|
# there is no more content in the parent element.
|
||||||
|
if type in ("StartTag", "EmptyTag"):
|
||||||
|
return next["name"] in ('address', 'article', 'aside',
|
||||||
|
'blockquote', 'datagrid', 'dialog',
|
||||||
|
'dir', 'div', 'dl', 'fieldset', 'footer',
|
||||||
|
'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||||
|
'header', 'hr', 'menu', 'nav', 'ol',
|
||||||
|
'p', 'pre', 'section', 'table', 'ul')
|
||||||
|
else:
|
||||||
|
return type == "EndTag" or type is None
|
||||||
|
elif tagname == 'option':
|
||||||
|
# An option element's end tag may be omitted if the option
|
||||||
|
# element is immediately followed by another option element,
|
||||||
|
# or if it is immediately followed by an <code>optgroup</code>
|
||||||
|
# element, or if there is no more content in the parent
|
||||||
|
# element.
|
||||||
|
if type == "StartTag":
|
||||||
|
return next["name"] in ('option', 'optgroup')
|
||||||
|
else:
|
||||||
|
return type == "EndTag" or type is None
|
||||||
|
elif tagname in ('rt', 'rp'):
|
||||||
|
# An rt element's end tag may be omitted if the rt element is
|
||||||
|
# immediately followed by an rt or rp element, or if there is
|
||||||
|
# no more content in the parent element.
|
||||||
|
# An rp element's end tag may be omitted if the rp element is
|
||||||
|
# immediately followed by an rt or rp element, or if there is
|
||||||
|
# no more content in the parent element.
|
||||||
|
if type == "StartTag":
|
||||||
|
return next["name"] in ('rt', 'rp')
|
||||||
|
else:
|
||||||
|
return type == "EndTag" or type is None
|
||||||
|
elif tagname == 'colgroup':
|
||||||
|
# A colgroup element's end tag may be omitted if the colgroup
|
||||||
|
# element is not immediately followed by a space character or
|
||||||
|
# a comment.
|
||||||
|
if type in ("Comment", "SpaceCharacters"):
|
||||||
|
return False
|
||||||
|
elif type == "StartTag":
|
||||||
|
# XXX: we also look for an immediately following colgroup
|
||||||
|
# element. See is_optional_start.
|
||||||
|
return next["name"] != 'colgroup'
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
elif tagname in ('thead', 'tbody'):
|
||||||
|
# A thead element's end tag may be omitted if the thead element
|
||||||
|
# is immediately followed by a tbody or tfoot element.
|
||||||
|
# A tbody element's end tag may be omitted if the tbody element
|
||||||
|
# is immediately followed by a tbody or tfoot element, or if
|
||||||
|
# there is no more content in the parent element.
|
||||||
|
# A tfoot element's end tag may be omitted if the tfoot element
|
||||||
|
# is immediately followed by a tbody element, or if there is no
|
||||||
|
# more content in the parent element.
|
||||||
|
# XXX: we never omit the end tag when the following element is
|
||||||
|
# a tbody. See is_optional_start.
|
||||||
|
if type == "StartTag":
|
||||||
|
return next["name"] in ['tbody', 'tfoot']
|
||||||
|
elif tagname == 'tbody':
|
||||||
|
return type == "EndTag" or type is None
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
elif tagname == 'tfoot':
|
||||||
|
# A tfoot element's end tag may be omitted if the tfoot element
|
||||||
|
# is immediately followed by a tbody element, or if there is no
|
||||||
|
# more content in the parent element.
|
||||||
|
# XXX: we never omit the end tag when the following element is
|
||||||
|
# a tbody. See is_optional_start.
|
||||||
|
if type == "StartTag":
|
||||||
|
return next["name"] == 'tbody'
|
||||||
|
else:
|
||||||
|
return type == "EndTag" or type is None
|
||||||
|
elif tagname in ('td', 'th'):
|
||||||
|
# A td element's end tag may be omitted if the td element is
|
||||||
|
# immediately followed by a td or th element, or if there is
|
||||||
|
# no more content in the parent element.
|
||||||
|
# A th element's end tag may be omitted if the th element is
|
||||||
|
# immediately followed by a td or th element, or if there is
|
||||||
|
# no more content in the parent element.
|
||||||
|
if type == "StartTag":
|
||||||
|
return next["name"] in ('td', 'th')
|
||||||
|
else:
|
||||||
|
return type == "EndTag" or type is None
|
||||||
|
return False
|
|
@ -0,0 +1,12 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
from ..sanitizer import HTMLSanitizerMixin
|
||||||
|
|
||||||
|
|
||||||
|
class Filter(_base.Filter, HTMLSanitizerMixin):
|
||||||
|
def __iter__(self):
|
||||||
|
for token in _base.Filter.__iter__(self):
|
||||||
|
token = self.sanitize_token(token)
|
||||||
|
if token:
|
||||||
|
yield token
|
|
@ -0,0 +1,38 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
from ..constants import rcdataElements, spaceCharacters
|
||||||
|
spaceCharacters = "".join(spaceCharacters)
|
||||||
|
|
||||||
|
SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
|
||||||
|
|
||||||
|
|
||||||
|
class Filter(_base.Filter):
|
||||||
|
|
||||||
|
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
preserve = 0
|
||||||
|
for token in _base.Filter.__iter__(self):
|
||||||
|
type = token["type"]
|
||||||
|
if type == "StartTag" \
|
||||||
|
and (preserve or token["name"] in self.spacePreserveElements):
|
||||||
|
preserve += 1
|
||||||
|
|
||||||
|
elif type == "EndTag" and preserve:
|
||||||
|
preserve -= 1
|
||||||
|
|
||||||
|
elif not preserve and type == "SpaceCharacters" and token["data"]:
|
||||||
|
# Test on token["data"] above to not introduce spaces where there were not
|
||||||
|
token["data"] = " "
|
||||||
|
|
||||||
|
elif not preserve and type == "Characters":
|
||||||
|
token["data"] = collapse_spaces(token["data"])
|
||||||
|
|
||||||
|
yield token
|
||||||
|
|
||||||
|
|
||||||
|
def collapse_spaces(text):
|
||||||
|
return SPACES_REGEX.sub(' ', text)
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,285 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from .constants import DataLossWarning
|
||||||
|
|
||||||
|
baseChar = """
|
||||||
|
[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
|
||||||
|
[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
|
||||||
|
[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
|
||||||
|
[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
|
||||||
|
[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
|
||||||
|
[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
|
||||||
|
[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
|
||||||
|
[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
|
||||||
|
[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
|
||||||
|
[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
|
||||||
|
[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
|
||||||
|
[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
|
||||||
|
[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
|
||||||
|
[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
|
||||||
|
[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
|
||||||
|
[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
|
||||||
|
[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
|
||||||
|
[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
|
||||||
|
[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
|
||||||
|
[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
|
||||||
|
[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
|
||||||
|
[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
|
||||||
|
[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
|
||||||
|
[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
|
||||||
|
[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
|
||||||
|
[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
|
||||||
|
[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
|
||||||
|
[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
|
||||||
|
[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
|
||||||
|
[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
|
||||||
|
#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
|
||||||
|
#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
|
||||||
|
#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
|
||||||
|
[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
|
||||||
|
[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
|
||||||
|
#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
|
||||||
|
[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
|
||||||
|
[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
|
||||||
|
[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
|
||||||
|
[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
|
||||||
|
[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
|
||||||
|
#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
|
||||||
|
[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
|
||||||
|
[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
|
||||||
|
[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
|
||||||
|
[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
|
||||||
|
|
||||||
|
ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
|
||||||
|
|
||||||
|
combiningCharacter = """
|
||||||
|
[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
|
||||||
|
[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
|
||||||
|
[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
|
||||||
|
[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
|
||||||
|
#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
|
||||||
|
[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
|
||||||
|
[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
|
||||||
|
#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
|
||||||
|
[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
|
||||||
|
[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
|
||||||
|
#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
|
||||||
|
[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
|
||||||
|
[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
|
||||||
|
[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
|
||||||
|
[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
|
||||||
|
[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
|
||||||
|
#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
|
||||||
|
[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
|
||||||
|
#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
|
||||||
|
[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
|
||||||
|
[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
|
||||||
|
#x3099 | #x309A"""
|
||||||
|
|
||||||
|
digit = """
|
||||||
|
[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
|
||||||
|
[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
|
||||||
|
[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
|
||||||
|
[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
|
||||||
|
|
||||||
|
extender = """
|
||||||
|
#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
|
||||||
|
#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
|
||||||
|
|
||||||
|
letter = " | ".join([baseChar, ideographic])
|
||||||
|
|
||||||
|
# Without the
|
||||||
|
name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
|
||||||
|
extender])
|
||||||
|
nameFirst = " | ".join([letter, "_"])
|
||||||
|
|
||||||
|
reChar = re.compile(r"#x([\d|A-F]{4,4})")
|
||||||
|
reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
|
||||||
|
|
||||||
|
|
||||||
|
def charStringToList(chars):
|
||||||
|
charRanges = [item.strip() for item in chars.split(" | ")]
|
||||||
|
rv = []
|
||||||
|
for item in charRanges:
|
||||||
|
foundMatch = False
|
||||||
|
for regexp in (reChar, reCharRange):
|
||||||
|
match = regexp.match(item)
|
||||||
|
if match is not None:
|
||||||
|
rv.append([hexToInt(item) for item in match.groups()])
|
||||||
|
if len(rv[-1]) == 1:
|
||||||
|
rv[-1] = rv[-1] * 2
|
||||||
|
foundMatch = True
|
||||||
|
break
|
||||||
|
if not foundMatch:
|
||||||
|
assert len(item) == 1
|
||||||
|
|
||||||
|
rv.append([ord(item)] * 2)
|
||||||
|
rv = normaliseCharList(rv)
|
||||||
|
return rv
|
||||||
|
|
||||||
|
|
||||||
|
def normaliseCharList(charList):
|
||||||
|
charList = sorted(charList)
|
||||||
|
for item in charList:
|
||||||
|
assert item[1] >= item[0]
|
||||||
|
rv = []
|
||||||
|
i = 0
|
||||||
|
while i < len(charList):
|
||||||
|
j = 1
|
||||||
|
rv.append(charList[i])
|
||||||
|
while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
|
||||||
|
rv[-1][1] = charList[i + j][1]
|
||||||
|
j += 1
|
||||||
|
i += j
|
||||||
|
return rv
|
||||||
|
|
||||||
|
# We don't really support characters above the BMP :(
|
||||||
|
max_unicode = int("FFFF", 16)
|
||||||
|
|
||||||
|
|
||||||
|
def missingRanges(charList):
|
||||||
|
rv = []
|
||||||
|
if charList[0] != 0:
|
||||||
|
rv.append([0, charList[0][0] - 1])
|
||||||
|
for i, item in enumerate(charList[:-1]):
|
||||||
|
rv.append([item[1] + 1, charList[i + 1][0] - 1])
|
||||||
|
if charList[-1][1] != max_unicode:
|
||||||
|
rv.append([charList[-1][1] + 1, max_unicode])
|
||||||
|
return rv
|
||||||
|
|
||||||
|
|
||||||
|
def listToRegexpStr(charList):
|
||||||
|
rv = []
|
||||||
|
for item in charList:
|
||||||
|
if item[0] == item[1]:
|
||||||
|
rv.append(escapeRegexp(chr(item[0])))
|
||||||
|
else:
|
||||||
|
rv.append(escapeRegexp(chr(item[0])) + "-" +
|
||||||
|
escapeRegexp(chr(item[1])))
|
||||||
|
return "[%s]" % "".join(rv)
|
||||||
|
|
||||||
|
|
||||||
|
def hexToInt(hex_str):
|
||||||
|
return int(hex_str, 16)
|
||||||
|
|
||||||
|
|
||||||
|
def escapeRegexp(string):
|
||||||
|
specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
|
||||||
|
"[", "]", "|", "(", ")", "-")
|
||||||
|
for char in specialCharacters:
|
||||||
|
string = string.replace(char, "\\" + char)
|
||||||
|
|
||||||
|
return string
|
||||||
|
|
||||||
|
# output from the above
|
||||||
|
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
|
||||||
|
|
||||||
|
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
|
||||||
|
|
||||||
|
# Simpler things
|
||||||
|
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
|
||||||
|
|
||||||
|
|
||||||
|
class InfosetFilter(object):
|
||||||
|
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
|
||||||
|
|
||||||
|
def __init__(self, replaceChars=None,
|
||||||
|
dropXmlnsLocalName=False,
|
||||||
|
dropXmlnsAttrNs=False,
|
||||||
|
preventDoubleDashComments=False,
|
||||||
|
preventDashAtCommentEnd=False,
|
||||||
|
replaceFormFeedCharacters=True,
|
||||||
|
preventSingleQuotePubid=False):
|
||||||
|
|
||||||
|
self.dropXmlnsLocalName = dropXmlnsLocalName
|
||||||
|
self.dropXmlnsAttrNs = dropXmlnsAttrNs
|
||||||
|
|
||||||
|
self.preventDoubleDashComments = preventDoubleDashComments
|
||||||
|
self.preventDashAtCommentEnd = preventDashAtCommentEnd
|
||||||
|
|
||||||
|
self.replaceFormFeedCharacters = replaceFormFeedCharacters
|
||||||
|
|
||||||
|
self.preventSingleQuotePubid = preventSingleQuotePubid
|
||||||
|
|
||||||
|
self.replaceCache = {}
|
||||||
|
|
||||||
|
def coerceAttribute(self, name, namespace=None):
|
||||||
|
if self.dropXmlnsLocalName and name.startswith("xmlns:"):
|
||||||
|
warnings.warn("Attributes cannot begin with xmlns", DataLossWarning)
|
||||||
|
return None
|
||||||
|
elif (self.dropXmlnsAttrNs and
|
||||||
|
namespace == "http://www.w3.org/2000/xmlns/"):
|
||||||
|
warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning)
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return self.toXmlName(name)
|
||||||
|
|
||||||
|
def coerceElement(self, name, namespace=None):
|
||||||
|
return self.toXmlName(name)
|
||||||
|
|
||||||
|
def coerceComment(self, data):
|
||||||
|
if self.preventDoubleDashComments:
|
||||||
|
while "--" in data:
|
||||||
|
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
|
||||||
|
data = data.replace("--", "- -")
|
||||||
|
return data
|
||||||
|
|
||||||
|
def coerceCharacters(self, data):
|
||||||
|
if self.replaceFormFeedCharacters:
|
||||||
|
for i in range(data.count("\x0C")):
|
||||||
|
warnings.warn("Text cannot contain U+000C", DataLossWarning)
|
||||||
|
data = data.replace("\x0C", " ")
|
||||||
|
# Other non-xml characters
|
||||||
|
return data
|
||||||
|
|
||||||
|
def coercePubid(self, data):
|
||||||
|
dataOutput = data
|
||||||
|
for char in nonPubidCharRegexp.findall(data):
|
||||||
|
warnings.warn("Coercing non-XML pubid", DataLossWarning)
|
||||||
|
replacement = self.getReplacementCharacter(char)
|
||||||
|
dataOutput = dataOutput.replace(char, replacement)
|
||||||
|
if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
|
||||||
|
warnings.warn("Pubid cannot contain single quote", DataLossWarning)
|
||||||
|
dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'"))
|
||||||
|
return dataOutput
|
||||||
|
|
||||||
|
def toXmlName(self, name):
|
||||||
|
nameFirst = name[0]
|
||||||
|
nameRest = name[1:]
|
||||||
|
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
|
||||||
|
if m:
|
||||||
|
warnings.warn("Coercing non-XML name", DataLossWarning)
|
||||||
|
nameFirstOutput = self.getReplacementCharacter(nameFirst)
|
||||||
|
else:
|
||||||
|
nameFirstOutput = nameFirst
|
||||||
|
|
||||||
|
nameRestOutput = nameRest
|
||||||
|
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
|
||||||
|
for char in replaceChars:
|
||||||
|
warnings.warn("Coercing non-XML name", DataLossWarning)
|
||||||
|
replacement = self.getReplacementCharacter(char)
|
||||||
|
nameRestOutput = nameRestOutput.replace(char, replacement)
|
||||||
|
return nameFirstOutput + nameRestOutput
|
||||||
|
|
||||||
|
def getReplacementCharacter(self, char):
|
||||||
|
if char in self.replaceCache:
|
||||||
|
replacement = self.replaceCache[char]
|
||||||
|
else:
|
||||||
|
replacement = self.escapeChar(char)
|
||||||
|
return replacement
|
||||||
|
|
||||||
|
def fromXmlName(self, name):
|
||||||
|
for item in set(self.replacementRegexp.findall(name)):
|
||||||
|
name = name.replace(item, self.unescapeChar(item))
|
||||||
|
return name
|
||||||
|
|
||||||
|
def escapeChar(self, char):
|
||||||
|
replacement = "U%05X" % ord(char)
|
||||||
|
self.replaceCache[char] = replacement
|
||||||
|
return replacement
|
||||||
|
|
||||||
|
def unescapeChar(self, charcode):
|
||||||
|
return chr(int(charcode[1:], 16))
|
|
@ -0,0 +1,886 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
from pip._vendor.six import text_type
|
||||||
|
from pip._vendor.six.moves import http_client
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
|
||||||
|
from .constants import encodings, ReparseException
|
||||||
|
from . import utils
|
||||||
|
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
|
try:
|
||||||
|
from io import BytesIO
|
||||||
|
except ImportError:
|
||||||
|
BytesIO = StringIO
|
||||||
|
|
||||||
|
try:
|
||||||
|
from io import BufferedIOBase
|
||||||
|
except ImportError:
|
||||||
|
class BufferedIOBase(object):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Non-unicode versions of constants for use in the pre-parser
|
||||||
|
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
|
||||||
|
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
|
||||||
|
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
|
||||||
|
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
|
||||||
|
|
||||||
|
invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
|
||||||
|
|
||||||
|
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||||
|
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
||||||
|
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
|
||||||
|
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
|
||||||
|
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
|
||||||
|
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
||||||
|
0x10FFFE, 0x10FFFF])
|
||||||
|
|
||||||
|
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
|
||||||
|
|
||||||
|
# Cache for charsUntil()
|
||||||
|
charsUntilRegEx = {}
|
||||||
|
|
||||||
|
|
||||||
|
class BufferedStream(object):
|
||||||
|
"""Buffering for streams that do not have buffering of their own
|
||||||
|
|
||||||
|
The buffer is implemented as a list of chunks on the assumption that
|
||||||
|
joining many strings will be slow since it is O(n**2)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.stream = stream
|
||||||
|
self.buffer = []
|
||||||
|
self.position = [-1, 0] # chunk number, offset
|
||||||
|
|
||||||
|
def tell(self):
|
||||||
|
pos = 0
|
||||||
|
for chunk in self.buffer[:self.position[0]]:
|
||||||
|
pos += len(chunk)
|
||||||
|
pos += self.position[1]
|
||||||
|
return pos
|
||||||
|
|
||||||
|
def seek(self, pos):
|
||||||
|
assert pos <= self._bufferedBytes()
|
||||||
|
offset = pos
|
||||||
|
i = 0
|
||||||
|
while len(self.buffer[i]) < offset:
|
||||||
|
offset -= len(self.buffer[i])
|
||||||
|
i += 1
|
||||||
|
self.position = [i, offset]
|
||||||
|
|
||||||
|
def read(self, bytes):
|
||||||
|
if not self.buffer:
|
||||||
|
return self._readStream(bytes)
|
||||||
|
elif (self.position[0] == len(self.buffer) and
|
||||||
|
self.position[1] == len(self.buffer[-1])):
|
||||||
|
return self._readStream(bytes)
|
||||||
|
else:
|
||||||
|
return self._readFromBuffer(bytes)
|
||||||
|
|
||||||
|
def _bufferedBytes(self):
|
||||||
|
return sum([len(item) for item in self.buffer])
|
||||||
|
|
||||||
|
def _readStream(self, bytes):
|
||||||
|
data = self.stream.read(bytes)
|
||||||
|
self.buffer.append(data)
|
||||||
|
self.position[0] += 1
|
||||||
|
self.position[1] = len(data)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def _readFromBuffer(self, bytes):
|
||||||
|
remainingBytes = bytes
|
||||||
|
rv = []
|
||||||
|
bufferIndex = self.position[0]
|
||||||
|
bufferOffset = self.position[1]
|
||||||
|
while bufferIndex < len(self.buffer) and remainingBytes != 0:
|
||||||
|
assert remainingBytes > 0
|
||||||
|
bufferedData = self.buffer[bufferIndex]
|
||||||
|
|
||||||
|
if remainingBytes <= len(bufferedData) - bufferOffset:
|
||||||
|
bytesToRead = remainingBytes
|
||||||
|
self.position = [bufferIndex, bufferOffset + bytesToRead]
|
||||||
|
else:
|
||||||
|
bytesToRead = len(bufferedData) - bufferOffset
|
||||||
|
self.position = [bufferIndex, len(bufferedData)]
|
||||||
|
bufferIndex += 1
|
||||||
|
rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
|
||||||
|
remainingBytes -= bytesToRead
|
||||||
|
|
||||||
|
bufferOffset = 0
|
||||||
|
|
||||||
|
if remainingBytes:
|
||||||
|
rv.append(self._readStream(remainingBytes))
|
||||||
|
|
||||||
|
return b"".join(rv)
|
||||||
|
|
||||||
|
|
||||||
|
def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
|
||||||
|
if isinstance(source, http_client.HTTPResponse):
|
||||||
|
# Work around Python bug #20007: read(0) closes the connection.
|
||||||
|
# http://bugs.python.org/issue20007
|
||||||
|
isUnicode = False
|
||||||
|
elif hasattr(source, "read"):
|
||||||
|
isUnicode = isinstance(source.read(0), text_type)
|
||||||
|
else:
|
||||||
|
isUnicode = isinstance(source, text_type)
|
||||||
|
|
||||||
|
if isUnicode:
|
||||||
|
if encoding is not None:
|
||||||
|
raise TypeError("Cannot explicitly set an encoding with a unicode string")
|
||||||
|
|
||||||
|
return HTMLUnicodeInputStream(source)
|
||||||
|
else:
|
||||||
|
return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLUnicodeInputStream(object):
|
||||||
|
"""Provides a unicode stream of characters to the HTMLTokenizer.
|
||||||
|
|
||||||
|
This class takes care of character encoding and removing or replacing
|
||||||
|
incorrect byte-sequences and also provides column and line tracking.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
_defaultChunkSize = 10240
|
||||||
|
|
||||||
|
def __init__(self, source):
|
||||||
|
"""Initialises the HTMLInputStream.
|
||||||
|
|
||||||
|
HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
||||||
|
for use by html5lib.
|
||||||
|
|
||||||
|
source can be either a file-object, local filename or a string.
|
||||||
|
|
||||||
|
The optional encoding parameter must be a string that indicates
|
||||||
|
the encoding. If specified, that encoding will be used,
|
||||||
|
regardless of any BOM or later declaration (such as in a meta
|
||||||
|
element)
|
||||||
|
|
||||||
|
parseMeta - Look for a <meta> element containing encoding information
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Craziness
|
||||||
|
if len("\U0010FFFF") == 1:
|
||||||
|
self.reportCharacterErrors = self.characterErrorsUCS4
|
||||||
|
self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
|
||||||
|
else:
|
||||||
|
self.reportCharacterErrors = self.characterErrorsUCS2
|
||||||
|
self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")
|
||||||
|
|
||||||
|
# List of where new lines occur
|
||||||
|
self.newLines = [0]
|
||||||
|
|
||||||
|
self.charEncoding = ("utf-8", "certain")
|
||||||
|
self.dataStream = self.openStream(source)
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.chunk = ""
|
||||||
|
self.chunkSize = 0
|
||||||
|
self.chunkOffset = 0
|
||||||
|
self.errors = []
|
||||||
|
|
||||||
|
# number of (complete) lines in previous chunks
|
||||||
|
self.prevNumLines = 0
|
||||||
|
# number of columns in the last line of the previous chunk
|
||||||
|
self.prevNumCols = 0
|
||||||
|
|
||||||
|
# Deal with CR LF and surrogates split over chunk boundaries
|
||||||
|
self._bufferedCharacter = None
|
||||||
|
|
||||||
|
def openStream(self, source):
|
||||||
|
"""Produces a file object from source.
|
||||||
|
|
||||||
|
source can be either a file object, local filename or a string.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Already a file object
|
||||||
|
if hasattr(source, 'read'):
|
||||||
|
stream = source
|
||||||
|
else:
|
||||||
|
stream = StringIO(source)
|
||||||
|
|
||||||
|
return stream
|
||||||
|
|
||||||
|
def _position(self, offset):
|
||||||
|
chunk = self.chunk
|
||||||
|
nLines = chunk.count('\n', 0, offset)
|
||||||
|
positionLine = self.prevNumLines + nLines
|
||||||
|
lastLinePos = chunk.rfind('\n', 0, offset)
|
||||||
|
if lastLinePos == -1:
|
||||||
|
positionColumn = self.prevNumCols + offset
|
||||||
|
else:
|
||||||
|
positionColumn = offset - (lastLinePos + 1)
|
||||||
|
return (positionLine, positionColumn)
|
||||||
|
|
||||||
|
def position(self):
|
||||||
|
"""Returns (line, col) of the current position in the stream."""
|
||||||
|
line, col = self._position(self.chunkOffset)
|
||||||
|
return (line + 1, col)
|
||||||
|
|
||||||
|
def char(self):
|
||||||
|
""" Read one character from the stream or queue if available. Return
|
||||||
|
EOF when EOF is reached.
|
||||||
|
"""
|
||||||
|
# Read a new chunk from the input stream if necessary
|
||||||
|
if self.chunkOffset >= self.chunkSize:
|
||||||
|
if not self.readChunk():
|
||||||
|
return EOF
|
||||||
|
|
||||||
|
chunkOffset = self.chunkOffset
|
||||||
|
char = self.chunk[chunkOffset]
|
||||||
|
self.chunkOffset = chunkOffset + 1
|
||||||
|
|
||||||
|
return char
|
||||||
|
|
||||||
|
def readChunk(self, chunkSize=None):
|
||||||
|
if chunkSize is None:
|
||||||
|
chunkSize = self._defaultChunkSize
|
||||||
|
|
||||||
|
self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
|
||||||
|
|
||||||
|
self.chunk = ""
|
||||||
|
self.chunkSize = 0
|
||||||
|
self.chunkOffset = 0
|
||||||
|
|
||||||
|
data = self.dataStream.read(chunkSize)
|
||||||
|
|
||||||
|
# Deal with CR LF and surrogates broken across chunks
|
||||||
|
if self._bufferedCharacter:
|
||||||
|
data = self._bufferedCharacter + data
|
||||||
|
self._bufferedCharacter = None
|
||||||
|
elif not data:
|
||||||
|
# We have no more data, bye-bye stream
|
||||||
|
return False
|
||||||
|
|
||||||
|
if len(data) > 1:
|
||||||
|
lastv = ord(data[-1])
|
||||||
|
if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
|
||||||
|
self._bufferedCharacter = data[-1]
|
||||||
|
data = data[:-1]
|
||||||
|
|
||||||
|
self.reportCharacterErrors(data)
|
||||||
|
|
||||||
|
# Replace invalid characters
|
||||||
|
# Note U+0000 is dealt with in the tokenizer
|
||||||
|
data = self.replaceCharactersRegexp.sub("\ufffd", data)
|
||||||
|
|
||||||
|
data = data.replace("\r\n", "\n")
|
||||||
|
data = data.replace("\r", "\n")
|
||||||
|
|
||||||
|
self.chunk = data
|
||||||
|
self.chunkSize = len(data)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def characterErrorsUCS4(self, data):
|
||||||
|
for i in range(len(invalid_unicode_re.findall(data))):
|
||||||
|
self.errors.append("invalid-codepoint")
|
||||||
|
|
||||||
|
def characterErrorsUCS2(self, data):
|
||||||
|
# Someone picked the wrong compile option
|
||||||
|
# You lose
|
||||||
|
skip = False
|
||||||
|
for match in invalid_unicode_re.finditer(data):
|
||||||
|
if skip:
|
||||||
|
continue
|
||||||
|
codepoint = ord(match.group())
|
||||||
|
pos = match.start()
|
||||||
|
# Pretty sure there should be endianness issues here
|
||||||
|
if utils.isSurrogatePair(data[pos:pos + 2]):
|
||||||
|
# We have a surrogate pair!
|
||||||
|
char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
|
||||||
|
if char_val in non_bmp_invalid_codepoints:
|
||||||
|
self.errors.append("invalid-codepoint")
|
||||||
|
skip = True
|
||||||
|
elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
|
||||||
|
pos == len(data) - 1):
|
||||||
|
self.errors.append("invalid-codepoint")
|
||||||
|
else:
|
||||||
|
skip = False
|
||||||
|
self.errors.append("invalid-codepoint")
|
||||||
|
|
||||||
|
def charsUntil(self, characters, opposite=False):
|
||||||
|
""" Returns a string of characters from the stream up to but not
|
||||||
|
including any character in 'characters' or EOF. 'characters' must be
|
||||||
|
a container that supports the 'in' method and iteration over its
|
||||||
|
characters.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Use a cache of regexps to find the required characters
|
||||||
|
try:
|
||||||
|
chars = charsUntilRegEx[(characters, opposite)]
|
||||||
|
except KeyError:
|
||||||
|
if __debug__:
|
||||||
|
for c in characters:
|
||||||
|
assert(ord(c) < 128)
|
||||||
|
regex = "".join(["\\x%02x" % ord(c) for c in characters])
|
||||||
|
if not opposite:
|
||||||
|
regex = "^%s" % regex
|
||||||
|
chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
|
||||||
|
|
||||||
|
rv = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Find the longest matching prefix
|
||||||
|
m = chars.match(self.chunk, self.chunkOffset)
|
||||||
|
if m is None:
|
||||||
|
# If nothing matched, and it wasn't because we ran out of chunk,
|
||||||
|
# then stop
|
||||||
|
if self.chunkOffset != self.chunkSize:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
end = m.end()
|
||||||
|
# If not the whole chunk matched, return everything
|
||||||
|
# up to the part that didn't match
|
||||||
|
if end != self.chunkSize:
|
||||||
|
rv.append(self.chunk[self.chunkOffset:end])
|
||||||
|
self.chunkOffset = end
|
||||||
|
break
|
||||||
|
# If the whole remainder of the chunk matched,
|
||||||
|
# use it all and read the next chunk
|
||||||
|
rv.append(self.chunk[self.chunkOffset:])
|
||||||
|
if not self.readChunk():
|
||||||
|
# Reached EOF
|
||||||
|
break
|
||||||
|
|
||||||
|
r = "".join(rv)
|
||||||
|
return r
|
||||||
|
|
||||||
|
def unget(self, char):
|
||||||
|
# Only one character is allowed to be ungotten at once - it must
|
||||||
|
# be consumed again before any further call to unget
|
||||||
|
if char is not None:
|
||||||
|
if self.chunkOffset == 0:
|
||||||
|
# unget is called quite rarely, so it's a good idea to do
|
||||||
|
# more work here if it saves a bit of work in the frequently
|
||||||
|
# called char and charsUntil.
|
||||||
|
# So, just prepend the ungotten character onto the current
|
||||||
|
# chunk:
|
||||||
|
self.chunk = char + self.chunk
|
||||||
|
self.chunkSize += 1
|
||||||
|
else:
|
||||||
|
self.chunkOffset -= 1
|
||||||
|
assert self.chunk[self.chunkOffset] == char
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||||
|
"""Provides a unicode stream of characters to the HTMLTokenizer.
|
||||||
|
|
||||||
|
This class takes care of character encoding and removing or replacing
|
||||||
|
incorrect byte-sequences and also provides column and line tracking.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
|
||||||
|
"""Initialises the HTMLInputStream.
|
||||||
|
|
||||||
|
HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
||||||
|
for use by html5lib.
|
||||||
|
|
||||||
|
source can be either a file-object, local filename or a string.
|
||||||
|
|
||||||
|
The optional encoding parameter must be a string that indicates
|
||||||
|
the encoding. If specified, that encoding will be used,
|
||||||
|
regardless of any BOM or later declaration (such as in a meta
|
||||||
|
element)
|
||||||
|
|
||||||
|
parseMeta - Look for a <meta> element containing encoding information
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Raw Stream - for unicode objects this will encode to utf-8 and set
|
||||||
|
# self.charEncoding as appropriate
|
||||||
|
self.rawStream = self.openStream(source)
|
||||||
|
|
||||||
|
HTMLUnicodeInputStream.__init__(self, self.rawStream)
|
||||||
|
|
||||||
|
self.charEncoding = (codecName(encoding), "certain")
|
||||||
|
|
||||||
|
# Encoding Information
|
||||||
|
# Number of bytes to use when looking for a meta element with
|
||||||
|
# encoding information
|
||||||
|
self.numBytesMeta = 512
|
||||||
|
# Number of bytes to use when using detecting encoding using chardet
|
||||||
|
self.numBytesChardet = 100
|
||||||
|
# Encoding to use if no other information can be found
|
||||||
|
self.defaultEncoding = "windows-1252"
|
||||||
|
|
||||||
|
# Detect encoding iff no explicit "transport level" encoding is supplied
|
||||||
|
if (self.charEncoding[0] is None):
|
||||||
|
self.charEncoding = self.detectEncoding(parseMeta, chardet)
|
||||||
|
|
||||||
|
# Call superclass
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
|
||||||
|
'replace')
|
||||||
|
HTMLUnicodeInputStream.reset(self)
|
||||||
|
|
||||||
|
def openStream(self, source):
|
||||||
|
"""Produces a file object from source.
|
||||||
|
|
||||||
|
source can be either a file object, local filename or a string.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Already a file object
|
||||||
|
if hasattr(source, 'read'):
|
||||||
|
stream = source
|
||||||
|
else:
|
||||||
|
stream = BytesIO(source)
|
||||||
|
|
||||||
|
try:
|
||||||
|
stream.seek(stream.tell())
|
||||||
|
except:
|
||||||
|
stream = BufferedStream(stream)
|
||||||
|
|
||||||
|
return stream
|
||||||
|
|
||||||
|
def detectEncoding(self, parseMeta=True, chardet=True):
|
||||||
|
# First look for a BOM
|
||||||
|
# This will also read past the BOM if present
|
||||||
|
encoding = self.detectBOM()
|
||||||
|
confidence = "certain"
|
||||||
|
# If there is no BOM need to look for meta elements with encoding
|
||||||
|
# information
|
||||||
|
if encoding is None and parseMeta:
|
||||||
|
encoding = self.detectEncodingMeta()
|
||||||
|
confidence = "tentative"
|
||||||
|
# Guess with chardet, if avaliable
|
||||||
|
if encoding is None and chardet:
|
||||||
|
confidence = "tentative"
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
from charade.universaldetector import UniversalDetector
|
||||||
|
except ImportError:
|
||||||
|
from chardet.universaldetector import UniversalDetector
|
||||||
|
buffers = []
|
||||||
|
detector = UniversalDetector()
|
||||||
|
while not detector.done:
|
||||||
|
buffer = self.rawStream.read(self.numBytesChardet)
|
||||||
|
assert isinstance(buffer, bytes)
|
||||||
|
if not buffer:
|
||||||
|
break
|
||||||
|
buffers.append(buffer)
|
||||||
|
detector.feed(buffer)
|
||||||
|
detector.close()
|
||||||
|
encoding = detector.result['encoding']
|
||||||
|
self.rawStream.seek(0)
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
# If all else fails use the default encoding
|
||||||
|
if encoding is None:
|
||||||
|
confidence = "tentative"
|
||||||
|
encoding = self.defaultEncoding
|
||||||
|
|
||||||
|
# Substitute for equivalent encodings:
|
||||||
|
encodingSub = {"iso-8859-1": "windows-1252"}
|
||||||
|
|
||||||
|
if encoding.lower() in encodingSub:
|
||||||
|
encoding = encodingSub[encoding.lower()]
|
||||||
|
|
||||||
|
return encoding, confidence
|
||||||
|
|
||||||
|
def changeEncoding(self, newEncoding):
|
||||||
|
assert self.charEncoding[1] != "certain"
|
||||||
|
newEncoding = codecName(newEncoding)
|
||||||
|
if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
|
||||||
|
newEncoding = "utf-8"
|
||||||
|
if newEncoding is None:
|
||||||
|
return
|
||||||
|
elif newEncoding == self.charEncoding[0]:
|
||||||
|
self.charEncoding = (self.charEncoding[0], "certain")
|
||||||
|
else:
|
||||||
|
self.rawStream.seek(0)
|
||||||
|
self.reset()
|
||||||
|
self.charEncoding = (newEncoding, "certain")
|
||||||
|
raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
|
||||||
|
|
||||||
|
def detectBOM(self):
|
||||||
|
"""Attempts to detect at BOM at the start of the stream. If
|
||||||
|
an encoding can be determined from the BOM return the name of the
|
||||||
|
encoding otherwise return None"""
|
||||||
|
bomDict = {
|
||||||
|
codecs.BOM_UTF8: 'utf-8',
|
||||||
|
codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
|
||||||
|
codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Go to beginning of file and read in 4 bytes
|
||||||
|
string = self.rawStream.read(4)
|
||||||
|
assert isinstance(string, bytes)
|
||||||
|
|
||||||
|
# Try detecting the BOM using bytes from the string
|
||||||
|
encoding = bomDict.get(string[:3]) # UTF-8
|
||||||
|
seek = 3
|
||||||
|
if not encoding:
|
||||||
|
# Need to detect UTF-32 before UTF-16
|
||||||
|
encoding = bomDict.get(string) # UTF-32
|
||||||
|
seek = 4
|
||||||
|
if not encoding:
|
||||||
|
encoding = bomDict.get(string[:2]) # UTF-16
|
||||||
|
seek = 2
|
||||||
|
|
||||||
|
# Set the read position past the BOM if one was found, otherwise
|
||||||
|
# set it to the start of the stream
|
||||||
|
self.rawStream.seek(encoding and seek or 0)
|
||||||
|
|
||||||
|
return encoding
|
||||||
|
|
||||||
|
def detectEncodingMeta(self):
|
||||||
|
"""Report the encoding declared by the meta element
|
||||||
|
"""
|
||||||
|
buffer = self.rawStream.read(self.numBytesMeta)
|
||||||
|
assert isinstance(buffer, bytes)
|
||||||
|
parser = EncodingParser(buffer)
|
||||||
|
self.rawStream.seek(0)
|
||||||
|
encoding = parser.getEncoding()
|
||||||
|
|
||||||
|
if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
|
||||||
|
encoding = "utf-8"
|
||||||
|
|
||||||
|
return encoding
|
||||||
|
|
||||||
|
|
||||||
|
class EncodingBytes(bytes):
|
||||||
|
"""String-like object with an associated position and various extra methods
|
||||||
|
If the position is ever greater than the string length then an exception is
|
||||||
|
raised"""
|
||||||
|
def __new__(self, value):
|
||||||
|
assert isinstance(value, bytes)
|
||||||
|
return bytes.__new__(self, value.lower())
|
||||||
|
|
||||||
|
def __init__(self, value):
|
||||||
|
self._position = -1
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self):
|
||||||
|
p = self._position = self._position + 1
|
||||||
|
if p >= len(self):
|
||||||
|
raise StopIteration
|
||||||
|
elif p < 0:
|
||||||
|
raise TypeError
|
||||||
|
return self[p:p + 1]
|
||||||
|
|
||||||
|
def next(self):
|
||||||
|
# Py2 compat
|
||||||
|
return self.__next__()
|
||||||
|
|
||||||
|
def previous(self):
|
||||||
|
p = self._position
|
||||||
|
if p >= len(self):
|
||||||
|
raise StopIteration
|
||||||
|
elif p < 0:
|
||||||
|
raise TypeError
|
||||||
|
self._position = p = p - 1
|
||||||
|
return self[p:p + 1]
|
||||||
|
|
||||||
|
def setPosition(self, position):
|
||||||
|
if self._position >= len(self):
|
||||||
|
raise StopIteration
|
||||||
|
self._position = position
|
||||||
|
|
||||||
|
def getPosition(self):
|
||||||
|
if self._position >= len(self):
|
||||||
|
raise StopIteration
|
||||||
|
if self._position >= 0:
|
||||||
|
return self._position
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
position = property(getPosition, setPosition)
|
||||||
|
|
||||||
|
def getCurrentByte(self):
|
||||||
|
return self[self.position:self.position + 1]
|
||||||
|
|
||||||
|
currentByte = property(getCurrentByte)
|
||||||
|
|
||||||
|
def skip(self, chars=spaceCharactersBytes):
|
||||||
|
"""Skip past a list of characters"""
|
||||||
|
p = self.position # use property for the error-checking
|
||||||
|
while p < len(self):
|
||||||
|
c = self[p:p + 1]
|
||||||
|
if c not in chars:
|
||||||
|
self._position = p
|
||||||
|
return c
|
||||||
|
p += 1
|
||||||
|
self._position = p
|
||||||
|
return None
|
||||||
|
|
||||||
|
def skipUntil(self, chars):
|
||||||
|
p = self.position
|
||||||
|
while p < len(self):
|
||||||
|
c = self[p:p + 1]
|
||||||
|
if c in chars:
|
||||||
|
self._position = p
|
||||||
|
return c
|
||||||
|
p += 1
|
||||||
|
self._position = p
|
||||||
|
return None
|
||||||
|
|
||||||
|
def matchBytes(self, bytes):
|
||||||
|
"""Look for a sequence of bytes at the start of a string. If the bytes
|
||||||
|
are found return True and advance the position to the byte after the
|
||||||
|
match. Otherwise return False and leave the position alone"""
|
||||||
|
p = self.position
|
||||||
|
data = self[p:p + len(bytes)]
|
||||||
|
rv = data.startswith(bytes)
|
||||||
|
if rv:
|
||||||
|
self.position += len(bytes)
|
||||||
|
return rv
|
||||||
|
|
||||||
|
def jumpTo(self, bytes):
|
||||||
|
"""Look for the next sequence of bytes matching a given sequence. If
|
||||||
|
a match is found advance the position to the last byte of the match"""
|
||||||
|
newPosition = self[self.position:].find(bytes)
|
||||||
|
if newPosition > -1:
|
||||||
|
# XXX: This is ugly, but I can't see a nicer way to fix this.
|
||||||
|
if self._position == -1:
|
||||||
|
self._position = 0
|
||||||
|
self._position += (newPosition + len(bytes) - 1)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
raise StopIteration
|
||||||
|
|
||||||
|
|
||||||
|
class EncodingParser(object):
|
||||||
|
"""Mini parser for detecting character encoding from meta elements"""
|
||||||
|
|
||||||
|
def __init__(self, data):
|
||||||
|
"""string - the data to work on for encoding detection"""
|
||||||
|
self.data = EncodingBytes(data)
|
||||||
|
self.encoding = None
|
||||||
|
|
||||||
|
def getEncoding(self):
|
||||||
|
methodDispatch = (
|
||||||
|
(b"<!--", self.handleComment),
|
||||||
|
(b"<meta", self.handleMeta),
|
||||||
|
(b"</", self.handlePossibleEndTag),
|
||||||
|
(b"<!", self.handleOther),
|
||||||
|
(b"<?", self.handleOther),
|
||||||
|
(b"<", self.handlePossibleStartTag))
|
||||||
|
for byte in self.data:
|
||||||
|
keepParsing = True
|
||||||
|
for key, method in methodDispatch:
|
||||||
|
if self.data.matchBytes(key):
|
||||||
|
try:
|
||||||
|
keepParsing = method()
|
||||||
|
break
|
||||||
|
except StopIteration:
|
||||||
|
keepParsing = False
|
||||||
|
break
|
||||||
|
if not keepParsing:
|
||||||
|
break
|
||||||
|
|
||||||
|
return self.encoding
|
||||||
|
|
||||||
|
def handleComment(self):
|
||||||
|
"""Skip over comments"""
|
||||||
|
return self.data.jumpTo(b"-->")
|
||||||
|
|
||||||
|
def handleMeta(self):
|
||||||
|
if self.data.currentByte not in spaceCharactersBytes:
|
||||||
|
# if we have <meta not followed by a space so just keep going
|
||||||
|
return True
|
||||||
|
# We have a valid meta element we want to search for attributes
|
||||||
|
hasPragma = False
|
||||||
|
pendingEncoding = None
|
||||||
|
while True:
|
||||||
|
# Try to find the next attribute after the current position
|
||||||
|
attr = self.getAttribute()
|
||||||
|
if attr is None:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
if attr[0] == b"http-equiv":
|
||||||
|
hasPragma = attr[1] == b"content-type"
|
||||||
|
if hasPragma and pendingEncoding is not None:
|
||||||
|
self.encoding = pendingEncoding
|
||||||
|
return False
|
||||||
|
elif attr[0] == b"charset":
|
||||||
|
tentativeEncoding = attr[1]
|
||||||
|
codec = codecName(tentativeEncoding)
|
||||||
|
if codec is not None:
|
||||||
|
self.encoding = codec
|
||||||
|
return False
|
||||||
|
elif attr[0] == b"content":
|
||||||
|
contentParser = ContentAttrParser(EncodingBytes(attr[1]))
|
||||||
|
tentativeEncoding = contentParser.parse()
|
||||||
|
if tentativeEncoding is not None:
|
||||||
|
codec = codecName(tentativeEncoding)
|
||||||
|
if codec is not None:
|
||||||
|
if hasPragma:
|
||||||
|
self.encoding = codec
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
pendingEncoding = codec
|
||||||
|
|
||||||
|
def handlePossibleStartTag(self):
|
||||||
|
return self.handlePossibleTag(False)
|
||||||
|
|
||||||
|
def handlePossibleEndTag(self):
|
||||||
|
next(self.data)
|
||||||
|
return self.handlePossibleTag(True)
|
||||||
|
|
||||||
|
def handlePossibleTag(self, endTag):
|
||||||
|
data = self.data
|
||||||
|
if data.currentByte not in asciiLettersBytes:
|
||||||
|
# If the next byte is not an ascii letter either ignore this
|
||||||
|
# fragment (possible start tag case) or treat it according to
|
||||||
|
# handleOther
|
||||||
|
if endTag:
|
||||||
|
data.previous()
|
||||||
|
self.handleOther()
|
||||||
|
return True
|
||||||
|
|
||||||
|
c = data.skipUntil(spacesAngleBrackets)
|
||||||
|
if c == b"<":
|
||||||
|
# return to the first step in the overall "two step" algorithm
|
||||||
|
# reprocessing the < byte
|
||||||
|
data.previous()
|
||||||
|
else:
|
||||||
|
# Read all attributes
|
||||||
|
attr = self.getAttribute()
|
||||||
|
while attr is not None:
|
||||||
|
attr = self.getAttribute()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def handleOther(self):
|
||||||
|
return self.data.jumpTo(b">")
|
||||||
|
|
||||||
|
def getAttribute(self):
|
||||||
|
"""Return a name,value pair for the next attribute in the stream,
|
||||||
|
if one is found, or None"""
|
||||||
|
data = self.data
|
||||||
|
# Step 1 (skip chars)
|
||||||
|
c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
|
||||||
|
assert c is None or len(c) == 1
|
||||||
|
# Step 2
|
||||||
|
if c in (b">", None):
|
||||||
|
return None
|
||||||
|
# Step 3
|
||||||
|
attrName = []
|
||||||
|
attrValue = []
|
||||||
|
# Step 4 attribute name
|
||||||
|
while True:
|
||||||
|
if c == b"=" and attrName:
|
||||||
|
break
|
||||||
|
elif c in spaceCharactersBytes:
|
||||||
|
# Step 6!
|
||||||
|
c = data.skip()
|
||||||
|
break
|
||||||
|
elif c in (b"/", b">"):
|
||||||
|
return b"".join(attrName), b""
|
||||||
|
elif c in asciiUppercaseBytes:
|
||||||
|
attrName.append(c.lower())
|
||||||
|
elif c is None:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
attrName.append(c)
|
||||||
|
# Step 5
|
||||||
|
c = next(data)
|
||||||
|
# Step 7
|
||||||
|
if c != b"=":
|
||||||
|
data.previous()
|
||||||
|
return b"".join(attrName), b""
|
||||||
|
# Step 8
|
||||||
|
next(data)
|
||||||
|
# Step 9
|
||||||
|
c = data.skip()
|
||||||
|
# Step 10
|
||||||
|
if c in (b"'", b'"'):
|
||||||
|
# 10.1
|
||||||
|
quoteChar = c
|
||||||
|
while True:
|
||||||
|
# 10.2
|
||||||
|
c = next(data)
|
||||||
|
# 10.3
|
||||||
|
if c == quoteChar:
|
||||||
|
next(data)
|
||||||
|
return b"".join(attrName), b"".join(attrValue)
|
||||||
|
# 10.4
|
||||||
|
elif c in asciiUppercaseBytes:
|
||||||
|
attrValue.append(c.lower())
|
||||||
|
# 10.5
|
||||||
|
else:
|
||||||
|
attrValue.append(c)
|
||||||
|
elif c == b">":
|
||||||
|
return b"".join(attrName), b""
|
||||||
|
elif c in asciiUppercaseBytes:
|
||||||
|
attrValue.append(c.lower())
|
||||||
|
elif c is None:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
attrValue.append(c)
|
||||||
|
# Step 11
|
||||||
|
while True:
|
||||||
|
c = next(data)
|
||||||
|
if c in spacesAngleBrackets:
|
||||||
|
return b"".join(attrName), b"".join(attrValue)
|
||||||
|
elif c in asciiUppercaseBytes:
|
||||||
|
attrValue.append(c.lower())
|
||||||
|
elif c is None:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
attrValue.append(c)
|
||||||
|
|
||||||
|
|
||||||
|
class ContentAttrParser(object):
|
||||||
|
def __init__(self, data):
|
||||||
|
assert isinstance(data, bytes)
|
||||||
|
self.data = data
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
try:
|
||||||
|
# Check if the attr name is charset
|
||||||
|
# otherwise return
|
||||||
|
self.data.jumpTo(b"charset")
|
||||||
|
self.data.position += 1
|
||||||
|
self.data.skip()
|
||||||
|
if not self.data.currentByte == b"=":
|
||||||
|
# If there is no = sign keep looking for attrs
|
||||||
|
return None
|
||||||
|
self.data.position += 1
|
||||||
|
self.data.skip()
|
||||||
|
# Look for an encoding between matching quote marks
|
||||||
|
if self.data.currentByte in (b'"', b"'"):
|
||||||
|
quoteMark = self.data.currentByte
|
||||||
|
self.data.position += 1
|
||||||
|
oldPosition = self.data.position
|
||||||
|
if self.data.jumpTo(quoteMark):
|
||||||
|
return self.data[oldPosition:self.data.position]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
# Unquoted value
|
||||||
|
oldPosition = self.data.position
|
||||||
|
try:
|
||||||
|
self.data.skipUntil(spaceCharactersBytes)
|
||||||
|
return self.data[oldPosition:self.data.position]
|
||||||
|
except StopIteration:
|
||||||
|
# Return the whole remaining value
|
||||||
|
return self.data[oldPosition:]
|
||||||
|
except StopIteration:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def codecName(encoding):
|
||||||
|
"""Return the python codec name corresponding to an encoding or None if the
|
||||||
|
string doesn't correspond to a valid encoding."""
|
||||||
|
if isinstance(encoding, bytes):
|
||||||
|
try:
|
||||||
|
encoding = encoding.decode("ascii")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
return None
|
||||||
|
if encoding:
|
||||||
|
canonicalName = ascii_punctuation_re.sub("", encoding).lower()
|
||||||
|
return encodings.get(canonicalName, None)
|
||||||
|
else:
|
||||||
|
return None
|
|
@ -0,0 +1,271 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
from xml.sax.saxutils import escape, unescape
|
||||||
|
|
||||||
|
from .tokenizer import HTMLTokenizer
|
||||||
|
from .constants import tokenTypes
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLSanitizerMixin(object):
|
||||||
|
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
|
||||||
|
|
||||||
|
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
|
||||||
|
'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
|
||||||
|
'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
|
||||||
|
'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
|
||||||
|
'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
|
||||||
|
'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
|
||||||
|
'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
|
||||||
|
'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
|
||||||
|
'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
|
||||||
|
'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
|
||||||
|
'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
|
||||||
|
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
|
||||||
|
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
|
||||||
|
|
||||||
|
mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
|
||||||
|
'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
|
||||||
|
'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
|
||||||
|
'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
|
||||||
|
'munderover', 'none']
|
||||||
|
|
||||||
|
svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
|
||||||
|
'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
|
||||||
|
'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
|
||||||
|
'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
|
||||||
|
'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
|
||||||
|
'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
|
||||||
|
|
||||||
|
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
|
||||||
|
'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
|
||||||
|
'background', 'balance', 'bgcolor', 'bgproperties', 'border',
|
||||||
|
'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
|
||||||
|
'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
|
||||||
|
'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
|
||||||
|
'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
|
||||||
|
'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
|
||||||
|
'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
|
||||||
|
'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
|
||||||
|
'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
|
||||||
|
'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
|
||||||
|
'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
|
||||||
|
'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
|
||||||
|
'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
|
||||||
|
'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
|
||||||
|
'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
|
||||||
|
'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
|
||||||
|
'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
|
||||||
|
'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
|
||||||
|
'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
|
||||||
|
'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
|
||||||
|
'width', 'wrap', 'xml:lang']
|
||||||
|
|
||||||
|
mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
|
||||||
|
'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
|
||||||
|
'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
|
||||||
|
'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
|
||||||
|
'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
|
||||||
|
'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
|
||||||
|
'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
|
||||||
|
'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
|
||||||
|
'xlink:type', 'xmlns', 'xmlns:xlink']
|
||||||
|
|
||||||
|
svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
|
||||||
|
'arabic-form', 'ascent', 'attributeName', 'attributeType',
|
||||||
|
'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
|
||||||
|
'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
|
||||||
|
'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
|
||||||
|
'fill-opacity', 'fill-rule', 'font-family', 'font-size',
|
||||||
|
'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
|
||||||
|
'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
|
||||||
|
'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
|
||||||
|
'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
|
||||||
|
'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
|
||||||
|
'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
|
||||||
|
'opacity', 'orient', 'origin', 'overline-position',
|
||||||
|
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
|
||||||
|
'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
|
||||||
|
'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
|
||||||
|
'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
|
||||||
|
'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
|
||||||
|
'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
|
||||||
|
'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
|
||||||
|
'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
|
||||||
|
'transform', 'type', 'u1', 'u2', 'underline-position',
|
||||||
|
'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
|
||||||
|
'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
|
||||||
|
'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
|
||||||
|
'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
|
||||||
|
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
|
||||||
|
'y1', 'y2', 'zoomAndPan']
|
||||||
|
|
||||||
|
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
|
||||||
|
'xlink:href', 'xml:base']
|
||||||
|
|
||||||
|
svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
|
||||||
|
'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
|
||||||
|
'mask', 'stroke']
|
||||||
|
|
||||||
|
svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
|
||||||
|
'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
|
||||||
|
'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
|
||||||
|
'set', 'use']
|
||||||
|
|
||||||
|
acceptable_css_properties = ['azimuth', 'background-color',
|
||||||
|
'border-bottom-color', 'border-collapse', 'border-color',
|
||||||
|
'border-left-color', 'border-right-color', 'border-top-color', 'clear',
|
||||||
|
'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
|
||||||
|
'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
|
||||||
|
'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
|
||||||
|
'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
|
||||||
|
'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
|
||||||
|
'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
|
||||||
|
'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
|
||||||
|
'white-space', 'width']
|
||||||
|
|
||||||
|
acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
|
||||||
|
'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
|
||||||
|
'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
|
||||||
|
'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
|
||||||
|
'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
|
||||||
|
'transparent', 'underline', 'white', 'yellow']
|
||||||
|
|
||||||
|
acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
|
||||||
|
'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
|
||||||
|
'stroke-opacity']
|
||||||
|
|
||||||
|
acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
|
||||||
|
'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
|
||||||
|
'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
|
||||||
|
'ssh', 'sftp', 'rtsp', 'afs']
|
||||||
|
|
||||||
|
# subclasses may define their own versions of these constants
|
||||||
|
allowed_elements = acceptable_elements + mathml_elements + svg_elements
|
||||||
|
allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
|
||||||
|
allowed_css_properties = acceptable_css_properties
|
||||||
|
allowed_css_keywords = acceptable_css_keywords
|
||||||
|
allowed_svg_properties = acceptable_svg_properties
|
||||||
|
allowed_protocols = acceptable_protocols
|
||||||
|
|
||||||
|
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
|
||||||
|
# stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
|
||||||
|
# attributes are parsed, and a restricted set, # specified by
|
||||||
|
# ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
|
||||||
|
# attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
|
||||||
|
# in ALLOWED_PROTOCOLS are allowed.
|
||||||
|
#
|
||||||
|
# sanitize_html('<script> do_nasty_stuff() </script>')
|
||||||
|
# => <script> do_nasty_stuff() </script>
|
||||||
|
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
||||||
|
# => <a>Click here for $100</a>
|
||||||
|
def sanitize_token(self, token):
|
||||||
|
|
||||||
|
# accommodate filters which use token_type differently
|
||||||
|
token_type = token["type"]
|
||||||
|
if token_type in list(tokenTypes.keys()):
|
||||||
|
token_type = tokenTypes[token_type]
|
||||||
|
|
||||||
|
if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
|
||||||
|
tokenTypes["EmptyTag"]):
|
||||||
|
if token["name"] in self.allowed_elements:
|
||||||
|
return self.allowed_token(token, token_type)
|
||||||
|
else:
|
||||||
|
return self.disallowed_token(token, token_type)
|
||||||
|
elif token_type == tokenTypes["Comment"]:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
return token
|
||||||
|
|
||||||
|
def allowed_token(self, token, token_type):
|
||||||
|
if "data" in token:
|
||||||
|
attrs = dict([(name, val) for name, val in
|
||||||
|
token["data"][::-1]
|
||||||
|
if name in self.allowed_attributes])
|
||||||
|
for attr in self.attr_val_is_uri:
|
||||||
|
if attr not in attrs:
|
||||||
|
continue
|
||||||
|
val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
|
||||||
|
unescape(attrs[attr])).lower()
|
||||||
|
# remove replacement characters from unescaped characters
|
||||||
|
val_unescaped = val_unescaped.replace("\ufffd", "")
|
||||||
|
if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
|
||||||
|
(val_unescaped.split(':')[0] not in
|
||||||
|
self.allowed_protocols)):
|
||||||
|
del attrs[attr]
|
||||||
|
for attr in self.svg_attr_val_allows_ref:
|
||||||
|
if attr in attrs:
|
||||||
|
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
|
||||||
|
' ',
|
||||||
|
unescape(attrs[attr]))
|
||||||
|
if (token["name"] in self.svg_allow_local_href and
|
||||||
|
'xlink:href' in attrs and re.search('^\s*[^#\s].*',
|
||||||
|
attrs['xlink:href'])):
|
||||||
|
del attrs['xlink:href']
|
||||||
|
if 'style' in attrs:
|
||||||
|
attrs['style'] = self.sanitize_css(attrs['style'])
|
||||||
|
token["data"] = [[name, val] for name, val in list(attrs.items())]
|
||||||
|
return token
|
||||||
|
|
||||||
|
def disallowed_token(self, token, token_type):
|
||||||
|
if token_type == tokenTypes["EndTag"]:
|
||||||
|
token["data"] = "</%s>" % token["name"]
|
||||||
|
elif token["data"]:
|
||||||
|
attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
|
||||||
|
token["data"] = "<%s%s>" % (token["name"], attrs)
|
||||||
|
else:
|
||||||
|
token["data"] = "<%s>" % token["name"]
|
||||||
|
if token.get("selfClosing"):
|
||||||
|
token["data"] = token["data"][:-1] + "/>"
|
||||||
|
|
||||||
|
if token["type"] in list(tokenTypes.keys()):
|
||||||
|
token["type"] = "Characters"
|
||||||
|
else:
|
||||||
|
token["type"] = tokenTypes["Characters"]
|
||||||
|
|
||||||
|
del token["name"]
|
||||||
|
return token
|
||||||
|
|
||||||
|
def sanitize_css(self, style):
|
||||||
|
# disallow urls
|
||||||
|
style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
|
||||||
|
|
||||||
|
# gauntlet
|
||||||
|
if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
|
||||||
|
return ''
|
||||||
|
if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
||||||
|
return ''
|
||||||
|
|
||||||
|
clean = []
|
||||||
|
for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
|
||||||
|
if not value:
|
||||||
|
continue
|
||||||
|
if prop.lower() in self.allowed_css_properties:
|
||||||
|
clean.append(prop + ': ' + value + ';')
|
||||||
|
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
|
||||||
|
'padding']:
|
||||||
|
for keyword in value.split():
|
||||||
|
if not keyword in self.acceptable_css_keywords and \
|
||||||
|
not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
clean.append(prop + ': ' + value + ';')
|
||||||
|
elif prop.lower() in self.allowed_svg_properties:
|
||||||
|
clean.append(prop + ': ' + value + ';')
|
||||||
|
|
||||||
|
return ' '.join(clean)
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
|
||||||
|
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
|
||||||
|
lowercaseElementName=False, lowercaseAttrName=False, parser=None):
|
||||||
|
# Change case matching defaults as we only output lowercase html anyway
|
||||||
|
# This solution doesn't seem ideal...
|
||||||
|
HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
|
||||||
|
lowercaseElementName, lowercaseAttrName, parser=parser)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for token in HTMLTokenizer.__iter__(self):
|
||||||
|
token = self.sanitize_token(token)
|
||||||
|
if token:
|
||||||
|
yield token
|
|
@ -0,0 +1,16 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from .. import treewalkers
|
||||||
|
|
||||||
|
from .htmlserializer import HTMLSerializer
|
||||||
|
|
||||||
|
|
||||||
|
def serialize(input, tree="etree", format="html", encoding=None,
|
||||||
|
**serializer_opts):
|
||||||
|
# XXX: Should we cache this?
|
||||||
|
walker = treewalkers.getTreeWalker(tree)
|
||||||
|
if format == "html":
|
||||||
|
s = HTMLSerializer(**serializer_opts)
|
||||||
|
else:
|
||||||
|
raise ValueError("type must be html")
|
||||||
|
return s.render(walker(input), encoding)
|
|
@ -0,0 +1,320 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
from pip._vendor.six import text_type
|
||||||
|
|
||||||
|
import gettext
|
||||||
|
_ = gettext.gettext
|
||||||
|
|
||||||
|
try:
|
||||||
|
from functools import reduce
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
from ..constants import voidElements, booleanAttributes, spaceCharacters
|
||||||
|
from ..constants import rcdataElements, entities, xmlEntities
|
||||||
|
from .. import utils
|
||||||
|
from xml.sax.saxutils import escape
|
||||||
|
|
||||||
|
spaceCharacters = "".join(spaceCharacters)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from codecs import register_error, xmlcharrefreplace_errors
|
||||||
|
except ImportError:
|
||||||
|
unicode_encode_errors = "strict"
|
||||||
|
else:
|
||||||
|
unicode_encode_errors = "htmlentityreplace"
|
||||||
|
|
||||||
|
encode_entity_map = {}
|
||||||
|
is_ucs4 = len("\U0010FFFF") == 1
|
||||||
|
for k, v in list(entities.items()):
|
||||||
|
# skip multi-character entities
|
||||||
|
if ((is_ucs4 and len(v) > 1) or
|
||||||
|
(not is_ucs4 and len(v) > 2)):
|
||||||
|
continue
|
||||||
|
if v != "&":
|
||||||
|
if len(v) == 2:
|
||||||
|
v = utils.surrogatePairToCodepoint(v)
|
||||||
|
else:
|
||||||
|
v = ord(v)
|
||||||
|
if not v in encode_entity_map or k.islower():
|
||||||
|
# prefer < over < and similarly for &, >, etc.
|
||||||
|
encode_entity_map[v] = k
|
||||||
|
|
||||||
|
def htmlentityreplace_errors(exc):
|
||||||
|
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
|
||||||
|
res = []
|
||||||
|
codepoints = []
|
||||||
|
skip = False
|
||||||
|
for i, c in enumerate(exc.object[exc.start:exc.end]):
|
||||||
|
if skip:
|
||||||
|
skip = False
|
||||||
|
continue
|
||||||
|
index = i + exc.start
|
||||||
|
if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
|
||||||
|
codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
|
||||||
|
skip = True
|
||||||
|
else:
|
||||||
|
codepoint = ord(c)
|
||||||
|
codepoints.append(codepoint)
|
||||||
|
for cp in codepoints:
|
||||||
|
e = encode_entity_map.get(cp)
|
||||||
|
if e:
|
||||||
|
res.append("&")
|
||||||
|
res.append(e)
|
||||||
|
if not e.endswith(";"):
|
||||||
|
res.append(";")
|
||||||
|
else:
|
||||||
|
res.append("&#x%s;" % (hex(cp)[2:]))
|
||||||
|
return ("".join(res), exc.end)
|
||||||
|
else:
|
||||||
|
return xmlcharrefreplace_errors(exc)
|
||||||
|
|
||||||
|
register_error(unicode_encode_errors, htmlentityreplace_errors)
|
||||||
|
|
||||||
|
del register_error
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLSerializer(object):
|
||||||
|
|
||||||
|
# attribute quoting options
|
||||||
|
quote_attr_values = False
|
||||||
|
quote_char = '"'
|
||||||
|
use_best_quote_char = True
|
||||||
|
|
||||||
|
# tag syntax options
|
||||||
|
omit_optional_tags = True
|
||||||
|
minimize_boolean_attributes = True
|
||||||
|
use_trailing_solidus = False
|
||||||
|
space_before_trailing_solidus = True
|
||||||
|
|
||||||
|
# escaping options
|
||||||
|
escape_lt_in_attrs = False
|
||||||
|
escape_rcdata = False
|
||||||
|
resolve_entities = True
|
||||||
|
|
||||||
|
# miscellaneous options
|
||||||
|
alphabetical_attributes = False
|
||||||
|
inject_meta_charset = True
|
||||||
|
strip_whitespace = False
|
||||||
|
sanitize = False
|
||||||
|
|
||||||
|
options = ("quote_attr_values", "quote_char", "use_best_quote_char",
|
||||||
|
"omit_optional_tags", "minimize_boolean_attributes",
|
||||||
|
"use_trailing_solidus", "space_before_trailing_solidus",
|
||||||
|
"escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
|
||||||
|
"alphabetical_attributes", "inject_meta_charset",
|
||||||
|
"strip_whitespace", "sanitize")
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
"""Initialize HTMLSerializer.
|
||||||
|
|
||||||
|
Keyword options (default given first unless specified) include:
|
||||||
|
|
||||||
|
inject_meta_charset=True|False
|
||||||
|
Whether it insert a meta element to define the character set of the
|
||||||
|
document.
|
||||||
|
quote_attr_values=True|False
|
||||||
|
Whether to quote attribute values that don't require quoting
|
||||||
|
per HTML5 parsing rules.
|
||||||
|
quote_char=u'"'|u"'"
|
||||||
|
Use given quote character for attribute quoting. Default is to
|
||||||
|
use double quote unless attribute value contains a double quote,
|
||||||
|
in which case single quotes are used instead.
|
||||||
|
escape_lt_in_attrs=False|True
|
||||||
|
Whether to escape < in attribute values.
|
||||||
|
escape_rcdata=False|True
|
||||||
|
Whether to escape characters that need to be escaped within normal
|
||||||
|
elements within rcdata elements such as style.
|
||||||
|
resolve_entities=True|False
|
||||||
|
Whether to resolve named character entities that appear in the
|
||||||
|
source tree. The XML predefined entities < > & " '
|
||||||
|
are unaffected by this setting.
|
||||||
|
strip_whitespace=False|True
|
||||||
|
Whether to remove semantically meaningless whitespace. (This
|
||||||
|
compresses all whitespace to a single space except within pre.)
|
||||||
|
minimize_boolean_attributes=True|False
|
||||||
|
Shortens boolean attributes to give just the attribute value,
|
||||||
|
for example <input disabled="disabled"> becomes <input disabled>.
|
||||||
|
use_trailing_solidus=False|True
|
||||||
|
Includes a close-tag slash at the end of the start tag of void
|
||||||
|
elements (empty elements whose end tag is forbidden). E.g. <hr/>.
|
||||||
|
space_before_trailing_solidus=True|False
|
||||||
|
Places a space immediately before the closing slash in a tag
|
||||||
|
using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
|
||||||
|
sanitize=False|True
|
||||||
|
Strip all unsafe or unknown constructs from output.
|
||||||
|
See `html5lib user documentation`_
|
||||||
|
omit_optional_tags=True|False
|
||||||
|
Omit start/end tags that are optional.
|
||||||
|
alphabetical_attributes=False|True
|
||||||
|
Reorder attributes to be in alphabetical order.
|
||||||
|
|
||||||
|
.. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
|
||||||
|
"""
|
||||||
|
if 'quote_char' in kwargs:
|
||||||
|
self.use_best_quote_char = False
|
||||||
|
for attr in self.options:
|
||||||
|
setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
|
||||||
|
self.errors = []
|
||||||
|
self.strict = False
|
||||||
|
|
||||||
|
def encode(self, string):
|
||||||
|
assert(isinstance(string, text_type))
|
||||||
|
if self.encoding:
|
||||||
|
return string.encode(self.encoding, unicode_encode_errors)
|
||||||
|
else:
|
||||||
|
return string
|
||||||
|
|
||||||
|
def encodeStrict(self, string):
|
||||||
|
assert(isinstance(string, text_type))
|
||||||
|
if self.encoding:
|
||||||
|
return string.encode(self.encoding, "strict")
|
||||||
|
else:
|
||||||
|
return string
|
||||||
|
|
||||||
|
def serialize(self, treewalker, encoding=None):
|
||||||
|
self.encoding = encoding
|
||||||
|
in_cdata = False
|
||||||
|
self.errors = []
|
||||||
|
|
||||||
|
if encoding and self.inject_meta_charset:
|
||||||
|
from ..filters.inject_meta_charset import Filter
|
||||||
|
treewalker = Filter(treewalker, encoding)
|
||||||
|
# WhitespaceFilter should be used before OptionalTagFilter
|
||||||
|
# for maximum efficiently of this latter filter
|
||||||
|
if self.strip_whitespace:
|
||||||
|
from ..filters.whitespace import Filter
|
||||||
|
treewalker = Filter(treewalker)
|
||||||
|
if self.sanitize:
|
||||||
|
from ..filters.sanitizer import Filter
|
||||||
|
treewalker = Filter(treewalker)
|
||||||
|
if self.omit_optional_tags:
|
||||||
|
from ..filters.optionaltags import Filter
|
||||||
|
treewalker = Filter(treewalker)
|
||||||
|
# Alphabetical attributes must be last, as other filters
|
||||||
|
# could add attributes and alter the order
|
||||||
|
if self.alphabetical_attributes:
|
||||||
|
from ..filters.alphabeticalattributes import Filter
|
||||||
|
treewalker = Filter(treewalker)
|
||||||
|
|
||||||
|
for token in treewalker:
|
||||||
|
type = token["type"]
|
||||||
|
if type == "Doctype":
|
||||||
|
doctype = "<!DOCTYPE %s" % token["name"]
|
||||||
|
|
||||||
|
if token["publicId"]:
|
||||||
|
doctype += ' PUBLIC "%s"' % token["publicId"]
|
||||||
|
elif token["systemId"]:
|
||||||
|
doctype += " SYSTEM"
|
||||||
|
if token["systemId"]:
|
||||||
|
if token["systemId"].find('"') >= 0:
|
||||||
|
if token["systemId"].find("'") >= 0:
|
||||||
|
self.serializeError(_("System identifer contains both single and double quote characters"))
|
||||||
|
quote_char = "'"
|
||||||
|
else:
|
||||||
|
quote_char = '"'
|
||||||
|
doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
|
||||||
|
|
||||||
|
doctype += ">"
|
||||||
|
yield self.encodeStrict(doctype)
|
||||||
|
|
||||||
|
elif type in ("Characters", "SpaceCharacters"):
|
||||||
|
if type == "SpaceCharacters" or in_cdata:
|
||||||
|
if in_cdata and token["data"].find("</") >= 0:
|
||||||
|
self.serializeError(_("Unexpected </ in CDATA"))
|
||||||
|
yield self.encode(token["data"])
|
||||||
|
else:
|
||||||
|
yield self.encode(escape(token["data"]))
|
||||||
|
|
||||||
|
elif type in ("StartTag", "EmptyTag"):
|
||||||
|
name = token["name"]
|
||||||
|
yield self.encodeStrict("<%s" % name)
|
||||||
|
if name in rcdataElements and not self.escape_rcdata:
|
||||||
|
in_cdata = True
|
||||||
|
elif in_cdata:
|
||||||
|
self.serializeError(_("Unexpected child element of a CDATA element"))
|
||||||
|
for (attr_namespace, attr_name), attr_value in token["data"].items():
|
||||||
|
# TODO: Add namespace support here
|
||||||
|
k = attr_name
|
||||||
|
v = attr_value
|
||||||
|
yield self.encodeStrict(' ')
|
||||||
|
|
||||||
|
yield self.encodeStrict(k)
|
||||||
|
if not self.minimize_boolean_attributes or \
|
||||||
|
(k not in booleanAttributes.get(name, tuple())
|
||||||
|
and k not in booleanAttributes.get("", tuple())):
|
||||||
|
yield self.encodeStrict("=")
|
||||||
|
if self.quote_attr_values or not v:
|
||||||
|
quote_attr = True
|
||||||
|
else:
|
||||||
|
quote_attr = reduce(lambda x, y: x or (y in v),
|
||||||
|
spaceCharacters + ">\"'=", False)
|
||||||
|
v = v.replace("&", "&")
|
||||||
|
if self.escape_lt_in_attrs:
|
||||||
|
v = v.replace("<", "<")
|
||||||
|
if quote_attr:
|
||||||
|
quote_char = self.quote_char
|
||||||
|
if self.use_best_quote_char:
|
||||||
|
if "'" in v and '"' not in v:
|
||||||
|
quote_char = '"'
|
||||||
|
elif '"' in v and "'" not in v:
|
||||||
|
quote_char = "'"
|
||||||
|
if quote_char == "'":
|
||||||
|
v = v.replace("'", "'")
|
||||||
|
else:
|
||||||
|
v = v.replace('"', """)
|
||||||
|
yield self.encodeStrict(quote_char)
|
||||||
|
yield self.encode(v)
|
||||||
|
yield self.encodeStrict(quote_char)
|
||||||
|
else:
|
||||||
|
yield self.encode(v)
|
||||||
|
if name in voidElements and self.use_trailing_solidus:
|
||||||
|
if self.space_before_trailing_solidus:
|
||||||
|
yield self.encodeStrict(" /")
|
||||||
|
else:
|
||||||
|
yield self.encodeStrict("/")
|
||||||
|
yield self.encode(">")
|
||||||
|
|
||||||
|
elif type == "EndTag":
|
||||||
|
name = token["name"]
|
||||||
|
if name in rcdataElements:
|
||||||
|
in_cdata = False
|
||||||
|
elif in_cdata:
|
||||||
|
self.serializeError(_("Unexpected child element of a CDATA element"))
|
||||||
|
yield self.encodeStrict("</%s>" % name)
|
||||||
|
|
||||||
|
elif type == "Comment":
|
||||||
|
data = token["data"]
|
||||||
|
if data.find("--") >= 0:
|
||||||
|
self.serializeError(_("Comment contains --"))
|
||||||
|
yield self.encodeStrict("<!--%s-->" % token["data"])
|
||||||
|
|
||||||
|
elif type == "Entity":
|
||||||
|
name = token["name"]
|
||||||
|
key = name + ";"
|
||||||
|
if not key in entities:
|
||||||
|
self.serializeError(_("Entity %s not recognized" % name))
|
||||||
|
if self.resolve_entities and key not in xmlEntities:
|
||||||
|
data = entities[key]
|
||||||
|
else:
|
||||||
|
data = "&%s;" % name
|
||||||
|
yield self.encodeStrict(data)
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.serializeError(token["data"])
|
||||||
|
|
||||||
|
def render(self, treewalker, encoding=None):
|
||||||
|
if encoding:
|
||||||
|
return b"".join(list(self.serialize(treewalker, encoding)))
|
||||||
|
else:
|
||||||
|
return "".join(list(self.serialize(treewalker)))
|
||||||
|
|
||||||
|
def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
|
||||||
|
# XXX The idea is to make data mandatory.
|
||||||
|
self.errors.append(data)
|
||||||
|
if self.strict:
|
||||||
|
raise SerializeError
|
||||||
|
|
||||||
|
|
||||||
|
def SerializeError(Exception):
|
||||||
|
"""Error in serialized tree"""
|
||||||
|
pass
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,44 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from xml.sax.xmlreader import AttributesNSImpl
|
||||||
|
|
||||||
|
from ..constants import adjustForeignAttributes, unadjustForeignAttributes
|
||||||
|
|
||||||
|
prefix_mapping = {}
|
||||||
|
for prefix, localName, namespace in adjustForeignAttributes.values():
|
||||||
|
if prefix is not None:
|
||||||
|
prefix_mapping[prefix] = namespace
|
||||||
|
|
||||||
|
|
||||||
|
def to_sax(walker, handler):
|
||||||
|
"""Call SAX-like content handler based on treewalker walker"""
|
||||||
|
handler.startDocument()
|
||||||
|
for prefix, namespace in prefix_mapping.items():
|
||||||
|
handler.startPrefixMapping(prefix, namespace)
|
||||||
|
|
||||||
|
for token in walker:
|
||||||
|
type = token["type"]
|
||||||
|
if type == "Doctype":
|
||||||
|
continue
|
||||||
|
elif type in ("StartTag", "EmptyTag"):
|
||||||
|
attrs = AttributesNSImpl(token["data"],
|
||||||
|
unadjustForeignAttributes)
|
||||||
|
handler.startElementNS((token["namespace"], token["name"]),
|
||||||
|
token["name"],
|
||||||
|
attrs)
|
||||||
|
if type == "EmptyTag":
|
||||||
|
handler.endElementNS((token["namespace"], token["name"]),
|
||||||
|
token["name"])
|
||||||
|
elif type == "EndTag":
|
||||||
|
handler.endElementNS((token["namespace"], token["name"]),
|
||||||
|
token["name"])
|
||||||
|
elif type in ("Characters", "SpaceCharacters"):
|
||||||
|
handler.characters(token["data"])
|
||||||
|
elif type == "Comment":
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
assert False, "Unknown token type"
|
||||||
|
|
||||||
|
for prefix, namespace in prefix_mapping.items():
|
||||||
|
handler.endPrefixMapping(prefix)
|
||||||
|
handler.endDocument()
|
|
@ -0,0 +1,76 @@
|
||||||
|
"""A collection of modules for building different kinds of tree from
|
||||||
|
HTML documents.
|
||||||
|
|
||||||
|
To create a treebuilder for a new type of tree, you need to do
|
||||||
|
implement several things:
|
||||||
|
|
||||||
|
1) A set of classes for various types of elements: Document, Doctype,
|
||||||
|
Comment, Element. These must implement the interface of
|
||||||
|
_base.treebuilders.Node (although comment nodes have a different
|
||||||
|
signature for their constructor, see treebuilders.etree.Comment)
|
||||||
|
Textual content may also be implemented as another node type, or not, as
|
||||||
|
your tree implementation requires.
|
||||||
|
|
||||||
|
2) A treebuilder object (called TreeBuilder by convention) that
|
||||||
|
inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
|
||||||
|
documentClass - the class to use for the bottommost node of a document
|
||||||
|
elementClass - the class to use for HTML Elements
|
||||||
|
commentClass - the class to use for comments
|
||||||
|
doctypeClass - the class to use for doctypes
|
||||||
|
It also has one required method:
|
||||||
|
getDocument - Returns the root node of the complete document tree
|
||||||
|
|
||||||
|
3) If you wish to run the unit tests, you must also create a
|
||||||
|
testSerializer method on your treebuilder which accepts a node and
|
||||||
|
returns a string containing Node and its children serialized according
|
||||||
|
to the format used in the unittests
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from ..utils import default_etree
|
||||||
|
|
||||||
|
treeBuilderCache = {}
|
||||||
|
|
||||||
|
|
||||||
|
def getTreeBuilder(treeType, implementation=None, **kwargs):
|
||||||
|
"""Get a TreeBuilder class for various types of tree with built-in support
|
||||||
|
|
||||||
|
treeType - the name of the tree type required (case-insensitive). Supported
|
||||||
|
values are:
|
||||||
|
|
||||||
|
"dom" - A generic builder for DOM implementations, defaulting to
|
||||||
|
a xml.dom.minidom based implementation.
|
||||||
|
"etree" - A generic builder for tree implementations exposing an
|
||||||
|
ElementTree-like interface, defaulting to
|
||||||
|
xml.etree.cElementTree if available and
|
||||||
|
xml.etree.ElementTree if not.
|
||||||
|
"lxml" - A etree-based builder for lxml.etree, handling
|
||||||
|
limitations of lxml's implementation.
|
||||||
|
|
||||||
|
implementation - (Currently applies to the "etree" and "dom" tree types). A
|
||||||
|
module implementing the tree type e.g.
|
||||||
|
xml.etree.ElementTree or xml.etree.cElementTree."""
|
||||||
|
|
||||||
|
treeType = treeType.lower()
|
||||||
|
if treeType not in treeBuilderCache:
|
||||||
|
if treeType == "dom":
|
||||||
|
from . import dom
|
||||||
|
# Come up with a sane default (pref. from the stdlib)
|
||||||
|
if implementation is None:
|
||||||
|
from xml.dom import minidom
|
||||||
|
implementation = minidom
|
||||||
|
# NEVER cache here, caching is done in the dom submodule
|
||||||
|
return dom.getDomModule(implementation, **kwargs).TreeBuilder
|
||||||
|
elif treeType == "lxml":
|
||||||
|
from . import etree_lxml
|
||||||
|
treeBuilderCache[treeType] = etree_lxml.TreeBuilder
|
||||||
|
elif treeType == "etree":
|
||||||
|
from . import etree
|
||||||
|
if implementation is None:
|
||||||
|
implementation = default_etree
|
||||||
|
# NEVER cache here, caching is done in the etree submodule
|
||||||
|
return etree.getETreeModule(implementation, **kwargs).TreeBuilder
|
||||||
|
else:
|
||||||
|
raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
|
||||||
|
return treeBuilderCache.get(treeType)
|
|
@ -0,0 +1,377 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
from pip._vendor.six import text_type
|
||||||
|
|
||||||
|
from ..constants import scopingElements, tableInsertModeElements, namespaces
|
||||||
|
|
||||||
|
# The scope markers are inserted when entering object elements,
|
||||||
|
# marquees, table cells, and table captions, and are used to prevent formatting
|
||||||
|
# from "leaking" into tables, object elements, and marquees.
|
||||||
|
Marker = None
|
||||||
|
|
||||||
|
listElementsMap = {
|
||||||
|
None: (frozenset(scopingElements), False),
|
||||||
|
"button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
|
||||||
|
"list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
|
||||||
|
(namespaces["html"], "ul")])), False),
|
||||||
|
"table": (frozenset([(namespaces["html"], "html"),
|
||||||
|
(namespaces["html"], "table")]), False),
|
||||||
|
"select": (frozenset([(namespaces["html"], "optgroup"),
|
||||||
|
(namespaces["html"], "option")]), True)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Node(object):
|
||||||
|
def __init__(self, name):
|
||||||
|
"""Node representing an item in the tree.
|
||||||
|
name - The tag name associated with the node
|
||||||
|
parent - The parent of the current node (or None for the document node)
|
||||||
|
value - The value of the current node (applies to text nodes and
|
||||||
|
comments
|
||||||
|
attributes - a dict holding name, value pairs for attributes of the node
|
||||||
|
childNodes - a list of child nodes of the current node. This must
|
||||||
|
include all elements but not necessarily other node types
|
||||||
|
_flags - A list of miscellaneous flags that can be set on the node
|
||||||
|
"""
|
||||||
|
self.name = name
|
||||||
|
self.parent = None
|
||||||
|
self.value = None
|
||||||
|
self.attributes = {}
|
||||||
|
self.childNodes = []
|
||||||
|
self._flags = []
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
attributesStr = " ".join(["%s=\"%s\"" % (name, value)
|
||||||
|
for name, value in
|
||||||
|
self.attributes.items()])
|
||||||
|
if attributesStr:
|
||||||
|
return "<%s %s>" % (self.name, attributesStr)
|
||||||
|
else:
|
||||||
|
return "<%s>" % (self.name)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<%s>" % (self.name)
|
||||||
|
|
||||||
|
def appendChild(self, node):
|
||||||
|
"""Insert node as a child of the current node
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def insertText(self, data, insertBefore=None):
|
||||||
|
"""Insert data as text in the current node, positioned before the
|
||||||
|
start of node insertBefore or to the end of the node's text.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def insertBefore(self, node, refNode):
|
||||||
|
"""Insert node as a child of the current node, before refNode in the
|
||||||
|
list of child nodes. Raises ValueError if refNode is not a child of
|
||||||
|
the current node"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def removeChild(self, node):
|
||||||
|
"""Remove node from the children of the current node
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def reparentChildren(self, newParent):
|
||||||
|
"""Move all the children of the current node to newParent.
|
||||||
|
This is needed so that trees that don't store text as nodes move the
|
||||||
|
text in the correct way
|
||||||
|
"""
|
||||||
|
# XXX - should this method be made more general?
|
||||||
|
for child in self.childNodes:
|
||||||
|
newParent.appendChild(child)
|
||||||
|
self.childNodes = []
|
||||||
|
|
||||||
|
def cloneNode(self):
|
||||||
|
"""Return a shallow copy of the current node i.e. a node with the same
|
||||||
|
name and attributes but with no parent or child nodes
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def hasContent(self):
|
||||||
|
"""Return true if the node has children or text, false otherwise
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class ActiveFormattingElements(list):
|
||||||
|
def append(self, node):
|
||||||
|
equalCount = 0
|
||||||
|
if node != Marker:
|
||||||
|
for element in self[::-1]:
|
||||||
|
if element == Marker:
|
||||||
|
break
|
||||||
|
if self.nodesEqual(element, node):
|
||||||
|
equalCount += 1
|
||||||
|
if equalCount == 3:
|
||||||
|
self.remove(element)
|
||||||
|
break
|
||||||
|
list.append(self, node)
|
||||||
|
|
||||||
|
def nodesEqual(self, node1, node2):
|
||||||
|
if not node1.nameTuple == node2.nameTuple:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not node1.attributes == node2.attributes:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class TreeBuilder(object):
|
||||||
|
"""Base treebuilder implementation
|
||||||
|
documentClass - the class to use for the bottommost node of a document
|
||||||
|
elementClass - the class to use for HTML Elements
|
||||||
|
commentClass - the class to use for comments
|
||||||
|
doctypeClass - the class to use for doctypes
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Document class
|
||||||
|
documentClass = None
|
||||||
|
|
||||||
|
# The class to use for creating a node
|
||||||
|
elementClass = None
|
||||||
|
|
||||||
|
# The class to use for creating comments
|
||||||
|
commentClass = None
|
||||||
|
|
||||||
|
# The class to use for creating doctypes
|
||||||
|
doctypeClass = None
|
||||||
|
|
||||||
|
# Fragment class
|
||||||
|
fragmentClass = None
|
||||||
|
|
||||||
|
def __init__(self, namespaceHTMLElements):
|
||||||
|
if namespaceHTMLElements:
|
||||||
|
self.defaultNamespace = "http://www.w3.org/1999/xhtml"
|
||||||
|
else:
|
||||||
|
self.defaultNamespace = None
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.openElements = []
|
||||||
|
self.activeFormattingElements = ActiveFormattingElements()
|
||||||
|
|
||||||
|
# XXX - rename these to headElement, formElement
|
||||||
|
self.headPointer = None
|
||||||
|
self.formPointer = None
|
||||||
|
|
||||||
|
self.insertFromTable = False
|
||||||
|
|
||||||
|
self.document = self.documentClass()
|
||||||
|
|
||||||
|
def elementInScope(self, target, variant=None):
|
||||||
|
|
||||||
|
# If we pass a node in we match that. if we pass a string
|
||||||
|
# match any node with that name
|
||||||
|
exactNode = hasattr(target, "nameTuple")
|
||||||
|
|
||||||
|
listElements, invert = listElementsMap[variant]
|
||||||
|
|
||||||
|
for node in reversed(self.openElements):
|
||||||
|
if (node.name == target and not exactNode or
|
||||||
|
node == target and exactNode):
|
||||||
|
return True
|
||||||
|
elif (invert ^ (node.nameTuple in listElements)):
|
||||||
|
return False
|
||||||
|
|
||||||
|
assert False # We should never reach this point
|
||||||
|
|
||||||
|
def reconstructActiveFormattingElements(self):
|
||||||
|
# Within this algorithm the order of steps described in the
|
||||||
|
# specification is not quite the same as the order of steps in the
|
||||||
|
# code. It should still do the same though.
|
||||||
|
|
||||||
|
# Step 1: stop the algorithm when there's nothing to do.
|
||||||
|
if not self.activeFormattingElements:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Step 2 and step 3: we start with the last element. So i is -1.
|
||||||
|
i = len(self.activeFormattingElements) - 1
|
||||||
|
entry = self.activeFormattingElements[i]
|
||||||
|
if entry == Marker or entry in self.openElements:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Step 6
|
||||||
|
while entry != Marker and entry not in self.openElements:
|
||||||
|
if i == 0:
|
||||||
|
# This will be reset to 0 below
|
||||||
|
i = -1
|
||||||
|
break
|
||||||
|
i -= 1
|
||||||
|
# Step 5: let entry be one earlier in the list.
|
||||||
|
entry = self.activeFormattingElements[i]
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Step 7
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
# Step 8
|
||||||
|
entry = self.activeFormattingElements[i]
|
||||||
|
clone = entry.cloneNode() # Mainly to get a new copy of the attributes
|
||||||
|
|
||||||
|
# Step 9
|
||||||
|
element = self.insertElement({"type": "StartTag",
|
||||||
|
"name": clone.name,
|
||||||
|
"namespace": clone.namespace,
|
||||||
|
"data": clone.attributes})
|
||||||
|
|
||||||
|
# Step 10
|
||||||
|
self.activeFormattingElements[i] = element
|
||||||
|
|
||||||
|
# Step 11
|
||||||
|
if element == self.activeFormattingElements[-1]:
|
||||||
|
break
|
||||||
|
|
||||||
|
def clearActiveFormattingElements(self):
|
||||||
|
entry = self.activeFormattingElements.pop()
|
||||||
|
while self.activeFormattingElements and entry != Marker:
|
||||||
|
entry = self.activeFormattingElements.pop()
|
||||||
|
|
||||||
|
def elementInActiveFormattingElements(self, name):
|
||||||
|
"""Check if an element exists between the end of the active
|
||||||
|
formatting elements and the last marker. If it does, return it, else
|
||||||
|
return false"""
|
||||||
|
|
||||||
|
for item in self.activeFormattingElements[::-1]:
|
||||||
|
# Check for Marker first because if it's a Marker it doesn't have a
|
||||||
|
# name attribute.
|
||||||
|
if item == Marker:
|
||||||
|
break
|
||||||
|
elif item.name == name:
|
||||||
|
return item
|
||||||
|
return False
|
||||||
|
|
||||||
|
def insertRoot(self, token):
|
||||||
|
element = self.createElement(token)
|
||||||
|
self.openElements.append(element)
|
||||||
|
self.document.appendChild(element)
|
||||||
|
|
||||||
|
def insertDoctype(self, token):
|
||||||
|
name = token["name"]
|
||||||
|
publicId = token["publicId"]
|
||||||
|
systemId = token["systemId"]
|
||||||
|
|
||||||
|
doctype = self.doctypeClass(name, publicId, systemId)
|
||||||
|
self.document.appendChild(doctype)
|
||||||
|
|
||||||
|
def insertComment(self, token, parent=None):
|
||||||
|
if parent is None:
|
||||||
|
parent = self.openElements[-1]
|
||||||
|
parent.appendChild(self.commentClass(token["data"]))
|
||||||
|
|
||||||
|
def createElement(self, token):
|
||||||
|
"""Create an element but don't insert it anywhere"""
|
||||||
|
name = token["name"]
|
||||||
|
namespace = token.get("namespace", self.defaultNamespace)
|
||||||
|
element = self.elementClass(name, namespace)
|
||||||
|
element.attributes = token["data"]
|
||||||
|
return element
|
||||||
|
|
||||||
|
def _getInsertFromTable(self):
|
||||||
|
return self._insertFromTable
|
||||||
|
|
||||||
|
def _setInsertFromTable(self, value):
|
||||||
|
"""Switch the function used to insert an element from the
|
||||||
|
normal one to the misnested table one and back again"""
|
||||||
|
self._insertFromTable = value
|
||||||
|
if value:
|
||||||
|
self.insertElement = self.insertElementTable
|
||||||
|
else:
|
||||||
|
self.insertElement = self.insertElementNormal
|
||||||
|
|
||||||
|
insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
|
||||||
|
|
||||||
|
def insertElementNormal(self, token):
|
||||||
|
name = token["name"]
|
||||||
|
assert isinstance(name, text_type), "Element %s not unicode" % name
|
||||||
|
namespace = token.get("namespace", self.defaultNamespace)
|
||||||
|
element = self.elementClass(name, namespace)
|
||||||
|
element.attributes = token["data"]
|
||||||
|
self.openElements[-1].appendChild(element)
|
||||||
|
self.openElements.append(element)
|
||||||
|
return element
|
||||||
|
|
||||||
|
def insertElementTable(self, token):
|
||||||
|
"""Create an element and insert it into the tree"""
|
||||||
|
element = self.createElement(token)
|
||||||
|
if self.openElements[-1].name not in tableInsertModeElements:
|
||||||
|
return self.insertElementNormal(token)
|
||||||
|
else:
|
||||||
|
# We should be in the InTable mode. This means we want to do
|
||||||
|
# special magic element rearranging
|
||||||
|
parent, insertBefore = self.getTableMisnestedNodePosition()
|
||||||
|
if insertBefore is None:
|
||||||
|
parent.appendChild(element)
|
||||||
|
else:
|
||||||
|
parent.insertBefore(element, insertBefore)
|
||||||
|
self.openElements.append(element)
|
||||||
|
return element
|
||||||
|
|
||||||
|
def insertText(self, data, parent=None):
|
||||||
|
"""Insert text data."""
|
||||||
|
if parent is None:
|
||||||
|
parent = self.openElements[-1]
|
||||||
|
|
||||||
|
if (not self.insertFromTable or (self.insertFromTable and
|
||||||
|
self.openElements[-1].name
|
||||||
|
not in tableInsertModeElements)):
|
||||||
|
parent.insertText(data)
|
||||||
|
else:
|
||||||
|
# We should be in the InTable mode. This means we want to do
|
||||||
|
# special magic element rearranging
|
||||||
|
parent, insertBefore = self.getTableMisnestedNodePosition()
|
||||||
|
parent.insertText(data, insertBefore)
|
||||||
|
|
||||||
|
def getTableMisnestedNodePosition(self):
|
||||||
|
"""Get the foster parent element, and sibling to insert before
|
||||||
|
(or None) when inserting a misnested table node"""
|
||||||
|
# The foster parent element is the one which comes before the most
|
||||||
|
# recently opened table element
|
||||||
|
# XXX - this is really inelegant
|
||||||
|
lastTable = None
|
||||||
|
fosterParent = None
|
||||||
|
insertBefore = None
|
||||||
|
for elm in self.openElements[::-1]:
|
||||||
|
if elm.name == "table":
|
||||||
|
lastTable = elm
|
||||||
|
break
|
||||||
|
if lastTable:
|
||||||
|
# XXX - we should really check that this parent is actually a
|
||||||
|
# node here
|
||||||
|
if lastTable.parent:
|
||||||
|
fosterParent = lastTable.parent
|
||||||
|
insertBefore = lastTable
|
||||||
|
else:
|
||||||
|
fosterParent = self.openElements[
|
||||||
|
self.openElements.index(lastTable) - 1]
|
||||||
|
else:
|
||||||
|
fosterParent = self.openElements[0]
|
||||||
|
return fosterParent, insertBefore
|
||||||
|
|
||||||
|
def generateImpliedEndTags(self, exclude=None):
|
||||||
|
name = self.openElements[-1].name
|
||||||
|
# XXX td, th and tr are not actually needed
|
||||||
|
if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
|
||||||
|
and name != exclude):
|
||||||
|
self.openElements.pop()
|
||||||
|
# XXX This is not entirely what the specification says. We should
|
||||||
|
# investigate it more closely.
|
||||||
|
self.generateImpliedEndTags(exclude)
|
||||||
|
|
||||||
|
def getDocument(self):
|
||||||
|
"Return the final tree"
|
||||||
|
return self.document
|
||||||
|
|
||||||
|
def getFragment(self):
|
||||||
|
"Return the final fragment"
|
||||||
|
# assert self.innerHTML
|
||||||
|
fragment = self.fragmentClass()
|
||||||
|
self.openElements[0].reparentChildren(fragment)
|
||||||
|
return fragment
|
||||||
|
|
||||||
|
def testSerializer(self, node):
|
||||||
|
"""Serialize the subtree of node in the format required by unit tests
|
||||||
|
node - the node from which to start serializing"""
|
||||||
|
raise NotImplementedError
|
|
@ -0,0 +1,227 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
from xml.dom import minidom, Node
|
||||||
|
import weakref
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
from .. import constants
|
||||||
|
from ..constants import namespaces
|
||||||
|
from ..utils import moduleFactoryFactory
|
||||||
|
|
||||||
|
|
||||||
|
def getDomBuilder(DomImplementation):
|
||||||
|
Dom = DomImplementation
|
||||||
|
|
||||||
|
class AttrList(object):
|
||||||
|
def __init__(self, element):
|
||||||
|
self.element = element
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return list(self.element.attributes.items()).__iter__()
|
||||||
|
|
||||||
|
def __setitem__(self, name, value):
|
||||||
|
self.element.setAttribute(name, value)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(list(self.element.attributes.items()))
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
return [(item[0], item[1]) for item in
|
||||||
|
list(self.element.attributes.items())]
|
||||||
|
|
||||||
|
def keys(self):
|
||||||
|
return list(self.element.attributes.keys())
|
||||||
|
|
||||||
|
def __getitem__(self, name):
|
||||||
|
return self.element.getAttribute(name)
|
||||||
|
|
||||||
|
def __contains__(self, name):
|
||||||
|
if isinstance(name, tuple):
|
||||||
|
raise NotImplementedError
|
||||||
|
else:
|
||||||
|
return self.element.hasAttribute(name)
|
||||||
|
|
||||||
|
class NodeBuilder(_base.Node):
|
||||||
|
def __init__(self, element):
|
||||||
|
_base.Node.__init__(self, element.nodeName)
|
||||||
|
self.element = element
|
||||||
|
|
||||||
|
namespace = property(lambda self: hasattr(self.element, "namespaceURI")
|
||||||
|
and self.element.namespaceURI or None)
|
||||||
|
|
||||||
|
def appendChild(self, node):
|
||||||
|
node.parent = self
|
||||||
|
self.element.appendChild(node.element)
|
||||||
|
|
||||||
|
def insertText(self, data, insertBefore=None):
|
||||||
|
text = self.element.ownerDocument.createTextNode(data)
|
||||||
|
if insertBefore:
|
||||||
|
self.element.insertBefore(text, insertBefore.element)
|
||||||
|
else:
|
||||||
|
self.element.appendChild(text)
|
||||||
|
|
||||||
|
def insertBefore(self, node, refNode):
|
||||||
|
self.element.insertBefore(node.element, refNode.element)
|
||||||
|
node.parent = self
|
||||||
|
|
||||||
|
def removeChild(self, node):
|
||||||
|
if node.element.parentNode == self.element:
|
||||||
|
self.element.removeChild(node.element)
|
||||||
|
node.parent = None
|
||||||
|
|
||||||
|
def reparentChildren(self, newParent):
|
||||||
|
while self.element.hasChildNodes():
|
||||||
|
child = self.element.firstChild
|
||||||
|
self.element.removeChild(child)
|
||||||
|
newParent.element.appendChild(child)
|
||||||
|
self.childNodes = []
|
||||||
|
|
||||||
|
def getAttributes(self):
|
||||||
|
return AttrList(self.element)
|
||||||
|
|
||||||
|
def setAttributes(self, attributes):
|
||||||
|
if attributes:
|
||||||
|
for name, value in list(attributes.items()):
|
||||||
|
if isinstance(name, tuple):
|
||||||
|
if name[0] is not None:
|
||||||
|
qualifiedName = (name[0] + ":" + name[1])
|
||||||
|
else:
|
||||||
|
qualifiedName = name[1]
|
||||||
|
self.element.setAttributeNS(name[2], qualifiedName,
|
||||||
|
value)
|
||||||
|
else:
|
||||||
|
self.element.setAttribute(
|
||||||
|
name, value)
|
||||||
|
attributes = property(getAttributes, setAttributes)
|
||||||
|
|
||||||
|
def cloneNode(self):
|
||||||
|
return NodeBuilder(self.element.cloneNode(False))
|
||||||
|
|
||||||
|
def hasContent(self):
|
||||||
|
return self.element.hasChildNodes()
|
||||||
|
|
||||||
|
def getNameTuple(self):
|
||||||
|
if self.namespace is None:
|
||||||
|
return namespaces["html"], self.name
|
||||||
|
else:
|
||||||
|
return self.namespace, self.name
|
||||||
|
|
||||||
|
nameTuple = property(getNameTuple)
|
||||||
|
|
||||||
|
class TreeBuilder(_base.TreeBuilder):
|
||||||
|
def documentClass(self):
|
||||||
|
self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
|
||||||
|
return weakref.proxy(self)
|
||||||
|
|
||||||
|
def insertDoctype(self, token):
|
||||||
|
name = token["name"]
|
||||||
|
publicId = token["publicId"]
|
||||||
|
systemId = token["systemId"]
|
||||||
|
|
||||||
|
domimpl = Dom.getDOMImplementation()
|
||||||
|
doctype = domimpl.createDocumentType(name, publicId, systemId)
|
||||||
|
self.document.appendChild(NodeBuilder(doctype))
|
||||||
|
if Dom == minidom:
|
||||||
|
doctype.ownerDocument = self.dom
|
||||||
|
|
||||||
|
def elementClass(self, name, namespace=None):
|
||||||
|
if namespace is None and self.defaultNamespace is None:
|
||||||
|
node = self.dom.createElement(name)
|
||||||
|
else:
|
||||||
|
node = self.dom.createElementNS(namespace, name)
|
||||||
|
|
||||||
|
return NodeBuilder(node)
|
||||||
|
|
||||||
|
def commentClass(self, data):
|
||||||
|
return NodeBuilder(self.dom.createComment(data))
|
||||||
|
|
||||||
|
def fragmentClass(self):
|
||||||
|
return NodeBuilder(self.dom.createDocumentFragment())
|
||||||
|
|
||||||
|
def appendChild(self, node):
|
||||||
|
self.dom.appendChild(node.element)
|
||||||
|
|
||||||
|
def testSerializer(self, element):
|
||||||
|
return testSerializer(element)
|
||||||
|
|
||||||
|
def getDocument(self):
|
||||||
|
return self.dom
|
||||||
|
|
||||||
|
def getFragment(self):
|
||||||
|
return _base.TreeBuilder.getFragment(self).element
|
||||||
|
|
||||||
|
def insertText(self, data, parent=None):
|
||||||
|
data = data
|
||||||
|
if parent != self:
|
||||||
|
_base.TreeBuilder.insertText(self, data, parent)
|
||||||
|
else:
|
||||||
|
# HACK: allow text nodes as children of the document node
|
||||||
|
if hasattr(self.dom, '_child_node_types'):
|
||||||
|
if not Node.TEXT_NODE in self.dom._child_node_types:
|
||||||
|
self.dom._child_node_types = list(self.dom._child_node_types)
|
||||||
|
self.dom._child_node_types.append(Node.TEXT_NODE)
|
||||||
|
self.dom.appendChild(self.dom.createTextNode(data))
|
||||||
|
|
||||||
|
implementation = DomImplementation
|
||||||
|
name = None
|
||||||
|
|
||||||
|
def testSerializer(element):
|
||||||
|
element.normalize()
|
||||||
|
rv = []
|
||||||
|
|
||||||
|
def serializeElement(element, indent=0):
|
||||||
|
if element.nodeType == Node.DOCUMENT_TYPE_NODE:
|
||||||
|
if element.name:
|
||||||
|
if element.publicId or element.systemId:
|
||||||
|
publicId = element.publicId or ""
|
||||||
|
systemId = element.systemId or ""
|
||||||
|
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
|
||||||
|
(' ' * indent, element.name, publicId, systemId))
|
||||||
|
else:
|
||||||
|
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name))
|
||||||
|
else:
|
||||||
|
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
|
||||||
|
elif element.nodeType == Node.DOCUMENT_NODE:
|
||||||
|
rv.append("#document")
|
||||||
|
elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
|
||||||
|
rv.append("#document-fragment")
|
||||||
|
elif element.nodeType == Node.COMMENT_NODE:
|
||||||
|
rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
|
||||||
|
elif element.nodeType == Node.TEXT_NODE:
|
||||||
|
rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
|
||||||
|
else:
|
||||||
|
if (hasattr(element, "namespaceURI") and
|
||||||
|
element.namespaceURI is not None):
|
||||||
|
name = "%s %s" % (constants.prefixes[element.namespaceURI],
|
||||||
|
element.nodeName)
|
||||||
|
else:
|
||||||
|
name = element.nodeName
|
||||||
|
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||||
|
if element.hasAttributes():
|
||||||
|
attributes = []
|
||||||
|
for i in range(len(element.attributes)):
|
||||||
|
attr = element.attributes.item(i)
|
||||||
|
name = attr.nodeName
|
||||||
|
value = attr.value
|
||||||
|
ns = attr.namespaceURI
|
||||||
|
if ns:
|
||||||
|
name = "%s %s" % (constants.prefixes[ns], attr.localName)
|
||||||
|
else:
|
||||||
|
name = attr.nodeName
|
||||||
|
attributes.append((name, value))
|
||||||
|
|
||||||
|
for name, value in sorted(attributes):
|
||||||
|
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||||
|
indent += 2
|
||||||
|
for child in element.childNodes:
|
||||||
|
serializeElement(child, indent)
|
||||||
|
serializeElement(element, 0)
|
||||||
|
|
||||||
|
return "\n".join(rv)
|
||||||
|
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
|
||||||
|
# The actual means to get a module!
|
||||||
|
getDomModule = moduleFactoryFactory(getDomBuilder)
|
|
@ -0,0 +1,337 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
from pip._vendor.six import text_type
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
from .. import ihatexml
|
||||||
|
from .. import constants
|
||||||
|
from ..constants import namespaces
|
||||||
|
from ..utils import moduleFactoryFactory
|
||||||
|
|
||||||
|
tag_regexp = re.compile("{([^}]*)}(.*)")
|
||||||
|
|
||||||
|
|
||||||
|
def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
||||||
|
ElementTree = ElementTreeImplementation
|
||||||
|
ElementTreeCommentType = ElementTree.Comment("asd").tag
|
||||||
|
|
||||||
|
class Element(_base.Node):
|
||||||
|
def __init__(self, name, namespace=None):
|
||||||
|
self._name = name
|
||||||
|
self._namespace = namespace
|
||||||
|
self._element = ElementTree.Element(self._getETreeTag(name,
|
||||||
|
namespace))
|
||||||
|
if namespace is None:
|
||||||
|
self.nameTuple = namespaces["html"], self._name
|
||||||
|
else:
|
||||||
|
self.nameTuple = self._namespace, self._name
|
||||||
|
self.parent = None
|
||||||
|
self._childNodes = []
|
||||||
|
self._flags = []
|
||||||
|
|
||||||
|
def _getETreeTag(self, name, namespace):
|
||||||
|
if namespace is None:
|
||||||
|
etree_tag = name
|
||||||
|
else:
|
||||||
|
etree_tag = "{%s}%s" % (namespace, name)
|
||||||
|
return etree_tag
|
||||||
|
|
||||||
|
def _setName(self, name):
|
||||||
|
self._name = name
|
||||||
|
self._element.tag = self._getETreeTag(self._name, self._namespace)
|
||||||
|
|
||||||
|
def _getName(self):
|
||||||
|
return self._name
|
||||||
|
|
||||||
|
name = property(_getName, _setName)
|
||||||
|
|
||||||
|
def _setNamespace(self, namespace):
|
||||||
|
self._namespace = namespace
|
||||||
|
self._element.tag = self._getETreeTag(self._name, self._namespace)
|
||||||
|
|
||||||
|
def _getNamespace(self):
|
||||||
|
return self._namespace
|
||||||
|
|
||||||
|
namespace = property(_getNamespace, _setNamespace)
|
||||||
|
|
||||||
|
def _getAttributes(self):
|
||||||
|
return self._element.attrib
|
||||||
|
|
||||||
|
def _setAttributes(self, attributes):
|
||||||
|
# Delete existing attributes first
|
||||||
|
# XXX - there may be a better way to do this...
|
||||||
|
for key in list(self._element.attrib.keys()):
|
||||||
|
del self._element.attrib[key]
|
||||||
|
for key, value in attributes.items():
|
||||||
|
if isinstance(key, tuple):
|
||||||
|
name = "{%s}%s" % (key[2], key[1])
|
||||||
|
else:
|
||||||
|
name = key
|
||||||
|
self._element.set(name, value)
|
||||||
|
|
||||||
|
attributes = property(_getAttributes, _setAttributes)
|
||||||
|
|
||||||
|
def _getChildNodes(self):
|
||||||
|
return self._childNodes
|
||||||
|
|
||||||
|
def _setChildNodes(self, value):
|
||||||
|
del self._element[:]
|
||||||
|
self._childNodes = []
|
||||||
|
for element in value:
|
||||||
|
self.insertChild(element)
|
||||||
|
|
||||||
|
childNodes = property(_getChildNodes, _setChildNodes)
|
||||||
|
|
||||||
|
def hasContent(self):
|
||||||
|
"""Return true if the node has children or text"""
|
||||||
|
return bool(self._element.text or len(self._element))
|
||||||
|
|
||||||
|
def appendChild(self, node):
|
||||||
|
self._childNodes.append(node)
|
||||||
|
self._element.append(node._element)
|
||||||
|
node.parent = self
|
||||||
|
|
||||||
|
def insertBefore(self, node, refNode):
|
||||||
|
index = list(self._element).index(refNode._element)
|
||||||
|
self._element.insert(index, node._element)
|
||||||
|
node.parent = self
|
||||||
|
|
||||||
|
def removeChild(self, node):
|
||||||
|
self._element.remove(node._element)
|
||||||
|
node.parent = None
|
||||||
|
|
||||||
|
def insertText(self, data, insertBefore=None):
|
||||||
|
if not(len(self._element)):
|
||||||
|
if not self._element.text:
|
||||||
|
self._element.text = ""
|
||||||
|
self._element.text += data
|
||||||
|
elif insertBefore is None:
|
||||||
|
# Insert the text as the tail of the last child element
|
||||||
|
if not self._element[-1].tail:
|
||||||
|
self._element[-1].tail = ""
|
||||||
|
self._element[-1].tail += data
|
||||||
|
else:
|
||||||
|
# Insert the text before the specified node
|
||||||
|
children = list(self._element)
|
||||||
|
index = children.index(insertBefore._element)
|
||||||
|
if index > 0:
|
||||||
|
if not self._element[index - 1].tail:
|
||||||
|
self._element[index - 1].tail = ""
|
||||||
|
self._element[index - 1].tail += data
|
||||||
|
else:
|
||||||
|
if not self._element.text:
|
||||||
|
self._element.text = ""
|
||||||
|
self._element.text += data
|
||||||
|
|
||||||
|
def cloneNode(self):
|
||||||
|
element = type(self)(self.name, self.namespace)
|
||||||
|
for name, value in self.attributes.items():
|
||||||
|
element.attributes[name] = value
|
||||||
|
return element
|
||||||
|
|
||||||
|
def reparentChildren(self, newParent):
|
||||||
|
if newParent.childNodes:
|
||||||
|
newParent.childNodes[-1]._element.tail += self._element.text
|
||||||
|
else:
|
||||||
|
if not newParent._element.text:
|
||||||
|
newParent._element.text = ""
|
||||||
|
if self._element.text is not None:
|
||||||
|
newParent._element.text += self._element.text
|
||||||
|
self._element.text = ""
|
||||||
|
_base.Node.reparentChildren(self, newParent)
|
||||||
|
|
||||||
|
class Comment(Element):
|
||||||
|
def __init__(self, data):
|
||||||
|
# Use the superclass constructor to set all properties on the
|
||||||
|
# wrapper element
|
||||||
|
self._element = ElementTree.Comment(data)
|
||||||
|
self.parent = None
|
||||||
|
self._childNodes = []
|
||||||
|
self._flags = []
|
||||||
|
|
||||||
|
def _getData(self):
|
||||||
|
return self._element.text
|
||||||
|
|
||||||
|
def _setData(self, value):
|
||||||
|
self._element.text = value
|
||||||
|
|
||||||
|
data = property(_getData, _setData)
|
||||||
|
|
||||||
|
class DocumentType(Element):
|
||||||
|
def __init__(self, name, publicId, systemId):
|
||||||
|
Element.__init__(self, "<!DOCTYPE>")
|
||||||
|
self._element.text = name
|
||||||
|
self.publicId = publicId
|
||||||
|
self.systemId = systemId
|
||||||
|
|
||||||
|
def _getPublicId(self):
|
||||||
|
return self._element.get("publicId", "")
|
||||||
|
|
||||||
|
def _setPublicId(self, value):
|
||||||
|
if value is not None:
|
||||||
|
self._element.set("publicId", value)
|
||||||
|
|
||||||
|
publicId = property(_getPublicId, _setPublicId)
|
||||||
|
|
||||||
|
def _getSystemId(self):
|
||||||
|
return self._element.get("systemId", "")
|
||||||
|
|
||||||
|
def _setSystemId(self, value):
|
||||||
|
if value is not None:
|
||||||
|
self._element.set("systemId", value)
|
||||||
|
|
||||||
|
systemId = property(_getSystemId, _setSystemId)
|
||||||
|
|
||||||
|
class Document(Element):
|
||||||
|
def __init__(self):
|
||||||
|
Element.__init__(self, "DOCUMENT_ROOT")
|
||||||
|
|
||||||
|
class DocumentFragment(Element):
|
||||||
|
def __init__(self):
|
||||||
|
Element.__init__(self, "DOCUMENT_FRAGMENT")
|
||||||
|
|
||||||
|
def testSerializer(element):
|
||||||
|
rv = []
|
||||||
|
|
||||||
|
def serializeElement(element, indent=0):
|
||||||
|
if not(hasattr(element, "tag")):
|
||||||
|
element = element.getroot()
|
||||||
|
if element.tag == "<!DOCTYPE>":
|
||||||
|
if element.get("publicId") or element.get("systemId"):
|
||||||
|
publicId = element.get("publicId") or ""
|
||||||
|
systemId = element.get("systemId") or ""
|
||||||
|
rv.append("""<!DOCTYPE %s "%s" "%s">""" %
|
||||||
|
(element.text, publicId, systemId))
|
||||||
|
else:
|
||||||
|
rv.append("<!DOCTYPE %s>" % (element.text,))
|
||||||
|
elif element.tag == "DOCUMENT_ROOT":
|
||||||
|
rv.append("#document")
|
||||||
|
if element.text is not None:
|
||||||
|
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
||||||
|
if element.tail is not None:
|
||||||
|
raise TypeError("Document node cannot have tail")
|
||||||
|
if hasattr(element, "attrib") and len(element.attrib):
|
||||||
|
raise TypeError("Document node cannot have attributes")
|
||||||
|
elif element.tag == ElementTreeCommentType:
|
||||||
|
rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
|
||||||
|
else:
|
||||||
|
assert isinstance(element.tag, text_type), \
|
||||||
|
"Expected unicode, got %s, %s" % (type(element.tag), element.tag)
|
||||||
|
nsmatch = tag_regexp.match(element.tag)
|
||||||
|
|
||||||
|
if nsmatch is None:
|
||||||
|
name = element.tag
|
||||||
|
else:
|
||||||
|
ns, name = nsmatch.groups()
|
||||||
|
prefix = constants.prefixes[ns]
|
||||||
|
name = "%s %s" % (prefix, name)
|
||||||
|
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||||
|
|
||||||
|
if hasattr(element, "attrib"):
|
||||||
|
attributes = []
|
||||||
|
for name, value in element.attrib.items():
|
||||||
|
nsmatch = tag_regexp.match(name)
|
||||||
|
if nsmatch is not None:
|
||||||
|
ns, name = nsmatch.groups()
|
||||||
|
prefix = constants.prefixes[ns]
|
||||||
|
attr_string = "%s %s" % (prefix, name)
|
||||||
|
else:
|
||||||
|
attr_string = name
|
||||||
|
attributes.append((attr_string, value))
|
||||||
|
|
||||||
|
for name, value in sorted(attributes):
|
||||||
|
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||||
|
if element.text:
|
||||||
|
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
||||||
|
indent += 2
|
||||||
|
for child in element:
|
||||||
|
serializeElement(child, indent)
|
||||||
|
if element.tail:
|
||||||
|
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
|
||||||
|
serializeElement(element, 0)
|
||||||
|
|
||||||
|
return "\n".join(rv)
|
||||||
|
|
||||||
|
def tostring(element):
|
||||||
|
"""Serialize an element and its child nodes to a string"""
|
||||||
|
rv = []
|
||||||
|
filter = ihatexml.InfosetFilter()
|
||||||
|
|
||||||
|
def serializeElement(element):
|
||||||
|
if isinstance(element, ElementTree.ElementTree):
|
||||||
|
element = element.getroot()
|
||||||
|
|
||||||
|
if element.tag == "<!DOCTYPE>":
|
||||||
|
if element.get("publicId") or element.get("systemId"):
|
||||||
|
publicId = element.get("publicId") or ""
|
||||||
|
systemId = element.get("systemId") or ""
|
||||||
|
rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
|
||||||
|
(element.text, publicId, systemId))
|
||||||
|
else:
|
||||||
|
rv.append("<!DOCTYPE %s>" % (element.text,))
|
||||||
|
elif element.tag == "DOCUMENT_ROOT":
|
||||||
|
if element.text is not None:
|
||||||
|
rv.append(element.text)
|
||||||
|
if element.tail is not None:
|
||||||
|
raise TypeError("Document node cannot have tail")
|
||||||
|
if hasattr(element, "attrib") and len(element.attrib):
|
||||||
|
raise TypeError("Document node cannot have attributes")
|
||||||
|
|
||||||
|
for child in element:
|
||||||
|
serializeElement(child)
|
||||||
|
|
||||||
|
elif element.tag == ElementTreeCommentType:
|
||||||
|
rv.append("<!--%s-->" % (element.text,))
|
||||||
|
else:
|
||||||
|
# This is assumed to be an ordinary element
|
||||||
|
if not element.attrib:
|
||||||
|
rv.append("<%s>" % (filter.fromXmlName(element.tag),))
|
||||||
|
else:
|
||||||
|
attr = " ".join(["%s=\"%s\"" % (
|
||||||
|
filter.fromXmlName(name), value)
|
||||||
|
for name, value in element.attrib.items()])
|
||||||
|
rv.append("<%s %s>" % (element.tag, attr))
|
||||||
|
if element.text:
|
||||||
|
rv.append(element.text)
|
||||||
|
|
||||||
|
for child in element:
|
||||||
|
serializeElement(child)
|
||||||
|
|
||||||
|
rv.append("</%s>" % (element.tag,))
|
||||||
|
|
||||||
|
if element.tail:
|
||||||
|
rv.append(element.tail)
|
||||||
|
|
||||||
|
serializeElement(element)
|
||||||
|
|
||||||
|
return "".join(rv)
|
||||||
|
|
||||||
|
class TreeBuilder(_base.TreeBuilder):
|
||||||
|
documentClass = Document
|
||||||
|
doctypeClass = DocumentType
|
||||||
|
elementClass = Element
|
||||||
|
commentClass = Comment
|
||||||
|
fragmentClass = DocumentFragment
|
||||||
|
implementation = ElementTreeImplementation
|
||||||
|
|
||||||
|
def testSerializer(self, element):
|
||||||
|
return testSerializer(element)
|
||||||
|
|
||||||
|
def getDocument(self):
|
||||||
|
if fullTree:
|
||||||
|
return self.document._element
|
||||||
|
else:
|
||||||
|
if self.defaultNamespace is not None:
|
||||||
|
return self.document._element.find(
|
||||||
|
"{%s}html" % self.defaultNamespace)
|
||||||
|
else:
|
||||||
|
return self.document._element.find("html")
|
||||||
|
|
||||||
|
def getFragment(self):
|
||||||
|
return _base.TreeBuilder.getFragment(self)._element
|
||||||
|
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
|
||||||
|
getETreeModule = moduleFactoryFactory(getETreeBuilder)
|
|
@ -0,0 +1,369 @@
|
||||||
|
"""Module for supporting the lxml.etree library. The idea here is to use as much
|
||||||
|
of the native library as possible, without using fragile hacks like custom element
|
||||||
|
names that break between releases. The downside of this is that we cannot represent
|
||||||
|
all possible trees; specifically the following are known to cause problems:
|
||||||
|
|
||||||
|
Text or comments as siblings of the root element
|
||||||
|
Docypes with no name
|
||||||
|
|
||||||
|
When any of these things occur, we emit a DataLossWarning
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
from ..constants import DataLossWarning
|
||||||
|
from .. import constants
|
||||||
|
from . import etree as etree_builders
|
||||||
|
from .. import ihatexml
|
||||||
|
|
||||||
|
import lxml.etree as etree
|
||||||
|
|
||||||
|
|
||||||
|
fullTree = True
|
||||||
|
tag_regexp = re.compile("{([^}]*)}(.*)")
|
||||||
|
|
||||||
|
comment_type = etree.Comment("asd").tag
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentType(object):
|
||||||
|
def __init__(self, name, publicId, systemId):
|
||||||
|
self.name = name
|
||||||
|
self.publicId = publicId
|
||||||
|
self.systemId = systemId
|
||||||
|
|
||||||
|
|
||||||
|
class Document(object):
|
||||||
|
def __init__(self):
|
||||||
|
self._elementTree = None
|
||||||
|
self._childNodes = []
|
||||||
|
|
||||||
|
def appendChild(self, element):
|
||||||
|
self._elementTree.getroot().addnext(element._element)
|
||||||
|
|
||||||
|
def _getChildNodes(self):
|
||||||
|
return self._childNodes
|
||||||
|
|
||||||
|
childNodes = property(_getChildNodes)
|
||||||
|
|
||||||
|
|
||||||
|
def testSerializer(element):
|
||||||
|
rv = []
|
||||||
|
finalText = None
|
||||||
|
infosetFilter = ihatexml.InfosetFilter()
|
||||||
|
|
||||||
|
def serializeElement(element, indent=0):
|
||||||
|
if not hasattr(element, "tag"):
|
||||||
|
if hasattr(element, "getroot"):
|
||||||
|
# Full tree case
|
||||||
|
rv.append("#document")
|
||||||
|
if element.docinfo.internalDTD:
|
||||||
|
if not (element.docinfo.public_id or
|
||||||
|
element.docinfo.system_url):
|
||||||
|
dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
|
||||||
|
else:
|
||||||
|
dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
|
||||||
|
element.docinfo.root_name,
|
||||||
|
element.docinfo.public_id,
|
||||||
|
element.docinfo.system_url)
|
||||||
|
rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
|
||||||
|
next_element = element.getroot()
|
||||||
|
while next_element.getprevious() is not None:
|
||||||
|
next_element = next_element.getprevious()
|
||||||
|
while next_element is not None:
|
||||||
|
serializeElement(next_element, indent + 2)
|
||||||
|
next_element = next_element.getnext()
|
||||||
|
elif isinstance(element, str) or isinstance(element, bytes):
|
||||||
|
# Text in a fragment
|
||||||
|
assert isinstance(element, str) or sys.version_info.major == 2
|
||||||
|
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
||||||
|
else:
|
||||||
|
# Fragment case
|
||||||
|
rv.append("#document-fragment")
|
||||||
|
for next_element in element:
|
||||||
|
serializeElement(next_element, indent + 2)
|
||||||
|
elif element.tag == comment_type:
|
||||||
|
rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
|
||||||
|
if hasattr(element, "tail") and element.tail:
|
||||||
|
rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
|
||||||
|
else:
|
||||||
|
assert isinstance(element, etree._Element)
|
||||||
|
nsmatch = etree_builders.tag_regexp.match(element.tag)
|
||||||
|
if nsmatch is not None:
|
||||||
|
ns = nsmatch.group(1)
|
||||||
|
tag = nsmatch.group(2)
|
||||||
|
prefix = constants.prefixes[ns]
|
||||||
|
rv.append("|%s<%s %s>" % (' ' * indent, prefix,
|
||||||
|
infosetFilter.fromXmlName(tag)))
|
||||||
|
else:
|
||||||
|
rv.append("|%s<%s>" % (' ' * indent,
|
||||||
|
infosetFilter.fromXmlName(element.tag)))
|
||||||
|
|
||||||
|
if hasattr(element, "attrib"):
|
||||||
|
attributes = []
|
||||||
|
for name, value in element.attrib.items():
|
||||||
|
nsmatch = tag_regexp.match(name)
|
||||||
|
if nsmatch is not None:
|
||||||
|
ns, name = nsmatch.groups()
|
||||||
|
name = infosetFilter.fromXmlName(name)
|
||||||
|
prefix = constants.prefixes[ns]
|
||||||
|
attr_string = "%s %s" % (prefix, name)
|
||||||
|
else:
|
||||||
|
attr_string = infosetFilter.fromXmlName(name)
|
||||||
|
attributes.append((attr_string, value))
|
||||||
|
|
||||||
|
for name, value in sorted(attributes):
|
||||||
|
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||||
|
|
||||||
|
if element.text:
|
||||||
|
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
||||||
|
indent += 2
|
||||||
|
for child in element:
|
||||||
|
serializeElement(child, indent)
|
||||||
|
if hasattr(element, "tail") and element.tail:
|
||||||
|
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
|
||||||
|
serializeElement(element, 0)
|
||||||
|
|
||||||
|
if finalText is not None:
|
||||||
|
rv.append("|%s\"%s\"" % (' ' * 2, finalText))
|
||||||
|
|
||||||
|
return "\n".join(rv)
|
||||||
|
|
||||||
|
|
||||||
|
def tostring(element):
|
||||||
|
"""Serialize an element and its child nodes to a string"""
|
||||||
|
rv = []
|
||||||
|
finalText = None
|
||||||
|
|
||||||
|
def serializeElement(element):
|
||||||
|
if not hasattr(element, "tag"):
|
||||||
|
if element.docinfo.internalDTD:
|
||||||
|
if element.docinfo.doctype:
|
||||||
|
dtd_str = element.docinfo.doctype
|
||||||
|
else:
|
||||||
|
dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
|
||||||
|
rv.append(dtd_str)
|
||||||
|
serializeElement(element.getroot())
|
||||||
|
|
||||||
|
elif element.tag == comment_type:
|
||||||
|
rv.append("<!--%s-->" % (element.text,))
|
||||||
|
|
||||||
|
else:
|
||||||
|
# This is assumed to be an ordinary element
|
||||||
|
if not element.attrib:
|
||||||
|
rv.append("<%s>" % (element.tag,))
|
||||||
|
else:
|
||||||
|
attr = " ".join(["%s=\"%s\"" % (name, value)
|
||||||
|
for name, value in element.attrib.items()])
|
||||||
|
rv.append("<%s %s>" % (element.tag, attr))
|
||||||
|
if element.text:
|
||||||
|
rv.append(element.text)
|
||||||
|
|
||||||
|
for child in element:
|
||||||
|
serializeElement(child)
|
||||||
|
|
||||||
|
rv.append("</%s>" % (element.tag,))
|
||||||
|
|
||||||
|
if hasattr(element, "tail") and element.tail:
|
||||||
|
rv.append(element.tail)
|
||||||
|
|
||||||
|
serializeElement(element)
|
||||||
|
|
||||||
|
if finalText is not None:
|
||||||
|
rv.append("%s\"" % (' ' * 2, finalText))
|
||||||
|
|
||||||
|
return "".join(rv)
|
||||||
|
|
||||||
|
|
||||||
|
class TreeBuilder(_base.TreeBuilder):
|
||||||
|
documentClass = Document
|
||||||
|
doctypeClass = DocumentType
|
||||||
|
elementClass = None
|
||||||
|
commentClass = None
|
||||||
|
fragmentClass = Document
|
||||||
|
implementation = etree
|
||||||
|
|
||||||
|
def __init__(self, namespaceHTMLElements, fullTree=False):
|
||||||
|
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
|
||||||
|
infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
|
||||||
|
self.namespaceHTMLElements = namespaceHTMLElements
|
||||||
|
|
||||||
|
class Attributes(dict):
|
||||||
|
def __init__(self, element, value={}):
|
||||||
|
self._element = element
|
||||||
|
dict.__init__(self, value)
|
||||||
|
for key, value in self.items():
|
||||||
|
if isinstance(key, tuple):
|
||||||
|
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
||||||
|
else:
|
||||||
|
name = infosetFilter.coerceAttribute(key)
|
||||||
|
self._element._element.attrib[name] = value
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
dict.__setitem__(self, key, value)
|
||||||
|
if isinstance(key, tuple):
|
||||||
|
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
||||||
|
else:
|
||||||
|
name = infosetFilter.coerceAttribute(key)
|
||||||
|
self._element._element.attrib[name] = value
|
||||||
|
|
||||||
|
class Element(builder.Element):
|
||||||
|
def __init__(self, name, namespace):
|
||||||
|
name = infosetFilter.coerceElement(name)
|
||||||
|
builder.Element.__init__(self, name, namespace=namespace)
|
||||||
|
self._attributes = Attributes(self)
|
||||||
|
|
||||||
|
def _setName(self, name):
|
||||||
|
self._name = infosetFilter.coerceElement(name)
|
||||||
|
self._element.tag = self._getETreeTag(
|
||||||
|
self._name, self._namespace)
|
||||||
|
|
||||||
|
def _getName(self):
|
||||||
|
return infosetFilter.fromXmlName(self._name)
|
||||||
|
|
||||||
|
name = property(_getName, _setName)
|
||||||
|
|
||||||
|
def _getAttributes(self):
|
||||||
|
return self._attributes
|
||||||
|
|
||||||
|
def _setAttributes(self, attributes):
|
||||||
|
self._attributes = Attributes(self, attributes)
|
||||||
|
|
||||||
|
attributes = property(_getAttributes, _setAttributes)
|
||||||
|
|
||||||
|
def insertText(self, data, insertBefore=None):
|
||||||
|
data = infosetFilter.coerceCharacters(data)
|
||||||
|
builder.Element.insertText(self, data, insertBefore)
|
||||||
|
|
||||||
|
def appendChild(self, child):
|
||||||
|
builder.Element.appendChild(self, child)
|
||||||
|
|
||||||
|
class Comment(builder.Comment):
|
||||||
|
def __init__(self, data):
|
||||||
|
data = infosetFilter.coerceComment(data)
|
||||||
|
builder.Comment.__init__(self, data)
|
||||||
|
|
||||||
|
def _setData(self, data):
|
||||||
|
data = infosetFilter.coerceComment(data)
|
||||||
|
self._element.text = data
|
||||||
|
|
||||||
|
def _getData(self):
|
||||||
|
return self._element.text
|
||||||
|
|
||||||
|
data = property(_getData, _setData)
|
||||||
|
|
||||||
|
self.elementClass = Element
|
||||||
|
self.commentClass = builder.Comment
|
||||||
|
# self.fragmentClass = builder.DocumentFragment
|
||||||
|
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
_base.TreeBuilder.reset(self)
|
||||||
|
self.insertComment = self.insertCommentInitial
|
||||||
|
self.initial_comments = []
|
||||||
|
self.doctype = None
|
||||||
|
|
||||||
|
def testSerializer(self, element):
|
||||||
|
return testSerializer(element)
|
||||||
|
|
||||||
|
def getDocument(self):
|
||||||
|
if fullTree:
|
||||||
|
return self.document._elementTree
|
||||||
|
else:
|
||||||
|
return self.document._elementTree.getroot()
|
||||||
|
|
||||||
|
def getFragment(self):
|
||||||
|
fragment = []
|
||||||
|
element = self.openElements[0]._element
|
||||||
|
if element.text:
|
||||||
|
fragment.append(element.text)
|
||||||
|
fragment.extend(list(element))
|
||||||
|
if element.tail:
|
||||||
|
fragment.append(element.tail)
|
||||||
|
return fragment
|
||||||
|
|
||||||
|
def insertDoctype(self, token):
|
||||||
|
name = token["name"]
|
||||||
|
publicId = token["publicId"]
|
||||||
|
systemId = token["systemId"]
|
||||||
|
|
||||||
|
if not name:
|
||||||
|
warnings.warn("lxml cannot represent empty doctype", DataLossWarning)
|
||||||
|
self.doctype = None
|
||||||
|
else:
|
||||||
|
coercedName = self.infosetFilter.coerceElement(name)
|
||||||
|
if coercedName != name:
|
||||||
|
warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning)
|
||||||
|
|
||||||
|
doctype = self.doctypeClass(coercedName, publicId, systemId)
|
||||||
|
self.doctype = doctype
|
||||||
|
|
||||||
|
def insertCommentInitial(self, data, parent=None):
|
||||||
|
self.initial_comments.append(data)
|
||||||
|
|
||||||
|
def insertCommentMain(self, data, parent=None):
|
||||||
|
if (parent == self.document and
|
||||||
|
self.document._elementTree.getroot()[-1].tag == comment_type):
|
||||||
|
warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
|
||||||
|
super(TreeBuilder, self).insertComment(data, parent)
|
||||||
|
|
||||||
|
def insertRoot(self, token):
|
||||||
|
"""Create the document root"""
|
||||||
|
# Because of the way libxml2 works, it doesn't seem to be possible to
|
||||||
|
# alter information like the doctype after the tree has been parsed.
|
||||||
|
# Therefore we need to use the built-in parser to create our iniial
|
||||||
|
# tree, after which we can add elements like normal
|
||||||
|
docStr = ""
|
||||||
|
if self.doctype:
|
||||||
|
assert self.doctype.name
|
||||||
|
docStr += "<!DOCTYPE %s" % self.doctype.name
|
||||||
|
if (self.doctype.publicId is not None or
|
||||||
|
self.doctype.systemId is not None):
|
||||||
|
docStr += (' PUBLIC "%s" ' %
|
||||||
|
(self.infosetFilter.coercePubid(self.doctype.publicId or "")))
|
||||||
|
if self.doctype.systemId:
|
||||||
|
sysid = self.doctype.systemId
|
||||||
|
if sysid.find("'") >= 0 and sysid.find('"') >= 0:
|
||||||
|
warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning)
|
||||||
|
sysid = sysid.replace("'", 'U00027')
|
||||||
|
if sysid.find("'") >= 0:
|
||||||
|
docStr += '"%s"' % sysid
|
||||||
|
else:
|
||||||
|
docStr += "'%s'" % sysid
|
||||||
|
else:
|
||||||
|
docStr += "''"
|
||||||
|
docStr += ">"
|
||||||
|
if self.doctype.name != token["name"]:
|
||||||
|
warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning)
|
||||||
|
docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
|
||||||
|
root = etree.fromstring(docStr)
|
||||||
|
|
||||||
|
# Append the initial comments:
|
||||||
|
for comment_token in self.initial_comments:
|
||||||
|
root.addprevious(etree.Comment(comment_token["data"]))
|
||||||
|
|
||||||
|
# Create the root document and add the ElementTree to it
|
||||||
|
self.document = self.documentClass()
|
||||||
|
self.document._elementTree = root.getroottree()
|
||||||
|
|
||||||
|
# Give the root element the right name
|
||||||
|
name = token["name"]
|
||||||
|
namespace = token.get("namespace", self.defaultNamespace)
|
||||||
|
if namespace is None:
|
||||||
|
etree_tag = name
|
||||||
|
else:
|
||||||
|
etree_tag = "{%s}%s" % (namespace, name)
|
||||||
|
root.tag = etree_tag
|
||||||
|
|
||||||
|
# Add the root element to the internal child/open data structures
|
||||||
|
root_element = self.elementClass(name, namespace)
|
||||||
|
root_element._element = root
|
||||||
|
self.document._childNodes.append(root_element)
|
||||||
|
self.openElements.append(root_element)
|
||||||
|
|
||||||
|
# Reset to the default insert comment function
|
||||||
|
self.insertComment = self.insertCommentMain
|
|
@ -0,0 +1,57 @@
|
||||||
|
"""A collection of modules for iterating through different kinds of
|
||||||
|
tree, generating tokens identical to those produced by the tokenizer
|
||||||
|
module.
|
||||||
|
|
||||||
|
To create a tree walker for a new type of tree, you need to do
|
||||||
|
implement a tree walker object (called TreeWalker by convention) that
|
||||||
|
implements a 'serialize' method taking a tree as sole argument and
|
||||||
|
returning an iterator generating tokens.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from ..utils import default_etree
|
||||||
|
|
||||||
|
treeWalkerCache = {}
|
||||||
|
|
||||||
|
|
||||||
|
def getTreeWalker(treeType, implementation=None, **kwargs):
|
||||||
|
"""Get a TreeWalker class for various types of tree with built-in support
|
||||||
|
|
||||||
|
treeType - the name of the tree type required (case-insensitive). Supported
|
||||||
|
values are:
|
||||||
|
|
||||||
|
"dom" - The xml.dom.minidom DOM implementation
|
||||||
|
"pulldom" - The xml.dom.pulldom event stream
|
||||||
|
"etree" - A generic walker for tree implementations exposing an
|
||||||
|
elementtree-like interface (known to work with
|
||||||
|
ElementTree, cElementTree and lxml.etree).
|
||||||
|
"lxml" - Optimized walker for lxml.etree
|
||||||
|
"genshi" - a Genshi stream
|
||||||
|
|
||||||
|
implementation - (Currently applies to the "etree" tree type only). A module
|
||||||
|
implementing the tree type e.g. xml.etree.ElementTree or
|
||||||
|
cElementTree."""
|
||||||
|
|
||||||
|
treeType = treeType.lower()
|
||||||
|
if treeType not in treeWalkerCache:
|
||||||
|
if treeType in ("dom", "pulldom"):
|
||||||
|
name = "%s.%s" % (__name__, treeType)
|
||||||
|
__import__(name)
|
||||||
|
mod = sys.modules[name]
|
||||||
|
treeWalkerCache[treeType] = mod.TreeWalker
|
||||||
|
elif treeType == "genshi":
|
||||||
|
from . import genshistream
|
||||||
|
treeWalkerCache[treeType] = genshistream.TreeWalker
|
||||||
|
elif treeType == "lxml":
|
||||||
|
from . import lxmletree
|
||||||
|
treeWalkerCache[treeType] = lxmletree.TreeWalker
|
||||||
|
elif treeType == "etree":
|
||||||
|
from . import etree
|
||||||
|
if implementation is None:
|
||||||
|
implementation = default_etree
|
||||||
|
# XXX: NEVER cache here, caching is done in the etree submodule
|
||||||
|
return etree.getETreeModule(implementation, **kwargs).TreeWalker
|
||||||
|
return treeWalkerCache.get(treeType)
|
|
@ -0,0 +1,200 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
from pip._vendor.six import text_type, string_types
|
||||||
|
|
||||||
|
import gettext
|
||||||
|
_ = gettext.gettext
|
||||||
|
|
||||||
|
from xml.dom import Node
|
||||||
|
|
||||||
|
DOCUMENT = Node.DOCUMENT_NODE
|
||||||
|
DOCTYPE = Node.DOCUMENT_TYPE_NODE
|
||||||
|
TEXT = Node.TEXT_NODE
|
||||||
|
ELEMENT = Node.ELEMENT_NODE
|
||||||
|
COMMENT = Node.COMMENT_NODE
|
||||||
|
ENTITY = Node.ENTITY_NODE
|
||||||
|
UNKNOWN = "<#UNKNOWN#>"
|
||||||
|
|
||||||
|
from ..constants import voidElements, spaceCharacters
|
||||||
|
spaceCharacters = "".join(spaceCharacters)
|
||||||
|
|
||||||
|
|
||||||
|
def to_text(s, blank_if_none=True):
|
||||||
|
"""Wrapper around six.text_type to convert None to empty string"""
|
||||||
|
if s is None:
|
||||||
|
if blank_if_none:
|
||||||
|
return ""
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
elif isinstance(s, text_type):
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
return text_type(s)
|
||||||
|
|
||||||
|
|
||||||
|
def is_text_or_none(string):
|
||||||
|
"""Wrapper around isinstance(string_types) or is None"""
|
||||||
|
return string is None or isinstance(string, string_types)
|
||||||
|
|
||||||
|
|
||||||
|
class TreeWalker(object):
|
||||||
|
def __init__(self, tree):
|
||||||
|
self.tree = tree
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def error(self, msg):
|
||||||
|
return {"type": "SerializeError", "data": msg}
|
||||||
|
|
||||||
|
def emptyTag(self, namespace, name, attrs, hasChildren=False):
|
||||||
|
assert namespace is None or isinstance(namespace, string_types), type(namespace)
|
||||||
|
assert isinstance(name, string_types), type(name)
|
||||||
|
assert all((namespace is None or isinstance(namespace, string_types)) and
|
||||||
|
isinstance(name, string_types) and
|
||||||
|
isinstance(value, string_types)
|
||||||
|
for (namespace, name), value in attrs.items())
|
||||||
|
|
||||||
|
yield {"type": "EmptyTag", "name": to_text(name, False),
|
||||||
|
"namespace": to_text(namespace),
|
||||||
|
"data": attrs}
|
||||||
|
if hasChildren:
|
||||||
|
yield self.error(_("Void element has children"))
|
||||||
|
|
||||||
|
def startTag(self, namespace, name, attrs):
|
||||||
|
assert namespace is None or isinstance(namespace, string_types), type(namespace)
|
||||||
|
assert isinstance(name, string_types), type(name)
|
||||||
|
assert all((namespace is None or isinstance(namespace, string_types)) and
|
||||||
|
isinstance(name, string_types) and
|
||||||
|
isinstance(value, string_types)
|
||||||
|
for (namespace, name), value in attrs.items())
|
||||||
|
|
||||||
|
return {"type": "StartTag",
|
||||||
|
"name": text_type(name),
|
||||||
|
"namespace": to_text(namespace),
|
||||||
|
"data": dict(((to_text(namespace, False), to_text(name)),
|
||||||
|
to_text(value, False))
|
||||||
|
for (namespace, name), value in attrs.items())}
|
||||||
|
|
||||||
|
def endTag(self, namespace, name):
|
||||||
|
assert namespace is None or isinstance(namespace, string_types), type(namespace)
|
||||||
|
assert isinstance(name, string_types), type(namespace)
|
||||||
|
|
||||||
|
return {"type": "EndTag",
|
||||||
|
"name": to_text(name, False),
|
||||||
|
"namespace": to_text(namespace),
|
||||||
|
"data": {}}
|
||||||
|
|
||||||
|
def text(self, data):
|
||||||
|
assert isinstance(data, string_types), type(data)
|
||||||
|
|
||||||
|
data = to_text(data)
|
||||||
|
middle = data.lstrip(spaceCharacters)
|
||||||
|
left = data[:len(data) - len(middle)]
|
||||||
|
if left:
|
||||||
|
yield {"type": "SpaceCharacters", "data": left}
|
||||||
|
data = middle
|
||||||
|
middle = data.rstrip(spaceCharacters)
|
||||||
|
right = data[len(middle):]
|
||||||
|
if middle:
|
||||||
|
yield {"type": "Characters", "data": middle}
|
||||||
|
if right:
|
||||||
|
yield {"type": "SpaceCharacters", "data": right}
|
||||||
|
|
||||||
|
def comment(self, data):
|
||||||
|
assert isinstance(data, string_types), type(data)
|
||||||
|
|
||||||
|
return {"type": "Comment", "data": text_type(data)}
|
||||||
|
|
||||||
|
def doctype(self, name, publicId=None, systemId=None, correct=True):
|
||||||
|
assert is_text_or_none(name), type(name)
|
||||||
|
assert is_text_or_none(publicId), type(publicId)
|
||||||
|
assert is_text_or_none(systemId), type(systemId)
|
||||||
|
|
||||||
|
return {"type": "Doctype",
|
||||||
|
"name": to_text(name),
|
||||||
|
"publicId": to_text(publicId),
|
||||||
|
"systemId": to_text(systemId),
|
||||||
|
"correct": to_text(correct)}
|
||||||
|
|
||||||
|
def entity(self, name):
|
||||||
|
assert isinstance(name, string_types), type(name)
|
||||||
|
|
||||||
|
return {"type": "Entity", "name": text_type(name)}
|
||||||
|
|
||||||
|
def unknown(self, nodeType):
|
||||||
|
return self.error(_("Unknown node type: ") + nodeType)
|
||||||
|
|
||||||
|
|
||||||
|
class NonRecursiveTreeWalker(TreeWalker):
|
||||||
|
def getNodeDetails(self, node):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def getFirstChild(self, node):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def getNextSibling(self, node):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def getParentNode(self, node):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
currentNode = self.tree
|
||||||
|
while currentNode is not None:
|
||||||
|
details = self.getNodeDetails(currentNode)
|
||||||
|
type, details = details[0], details[1:]
|
||||||
|
hasChildren = False
|
||||||
|
|
||||||
|
if type == DOCTYPE:
|
||||||
|
yield self.doctype(*details)
|
||||||
|
|
||||||
|
elif type == TEXT:
|
||||||
|
for token in self.text(*details):
|
||||||
|
yield token
|
||||||
|
|
||||||
|
elif type == ELEMENT:
|
||||||
|
namespace, name, attributes, hasChildren = details
|
||||||
|
if name in voidElements:
|
||||||
|
for token in self.emptyTag(namespace, name, attributes,
|
||||||
|
hasChildren):
|
||||||
|
yield token
|
||||||
|
hasChildren = False
|
||||||
|
else:
|
||||||
|
yield self.startTag(namespace, name, attributes)
|
||||||
|
|
||||||
|
elif type == COMMENT:
|
||||||
|
yield self.comment(details[0])
|
||||||
|
|
||||||
|
elif type == ENTITY:
|
||||||
|
yield self.entity(details[0])
|
||||||
|
|
||||||
|
elif type == DOCUMENT:
|
||||||
|
hasChildren = True
|
||||||
|
|
||||||
|
else:
|
||||||
|
yield self.unknown(details[0])
|
||||||
|
|
||||||
|
if hasChildren:
|
||||||
|
firstChild = self.getFirstChild(currentNode)
|
||||||
|
else:
|
||||||
|
firstChild = None
|
||||||
|
|
||||||
|
if firstChild is not None:
|
||||||
|
currentNode = firstChild
|
||||||
|
else:
|
||||||
|
while currentNode is not None:
|
||||||
|
details = self.getNodeDetails(currentNode)
|
||||||
|
type, details = details[0], details[1:]
|
||||||
|
if type == ELEMENT:
|
||||||
|
namespace, name, attributes, hasChildren = details
|
||||||
|
if name not in voidElements:
|
||||||
|
yield self.endTag(namespace, name)
|
||||||
|
if self.tree is currentNode:
|
||||||
|
currentNode = None
|
||||||
|
break
|
||||||
|
nextSibling = self.getNextSibling(currentNode)
|
||||||
|
if nextSibling is not None:
|
||||||
|
currentNode = nextSibling
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
currentNode = self.getParentNode(currentNode)
|
|
@ -0,0 +1,46 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from xml.dom import Node
|
||||||
|
|
||||||
|
import gettext
|
||||||
|
_ = gettext.gettext
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
|
||||||
|
|
||||||
|
class TreeWalker(_base.NonRecursiveTreeWalker):
|
||||||
|
def getNodeDetails(self, node):
|
||||||
|
if node.nodeType == Node.DOCUMENT_TYPE_NODE:
|
||||||
|
return _base.DOCTYPE, node.name, node.publicId, node.systemId
|
||||||
|
|
||||||
|
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
|
||||||
|
return _base.TEXT, node.nodeValue
|
||||||
|
|
||||||
|
elif node.nodeType == Node.ELEMENT_NODE:
|
||||||
|
attrs = {}
|
||||||
|
for attr in list(node.attributes.keys()):
|
||||||
|
attr = node.getAttributeNode(attr)
|
||||||
|
if attr.namespaceURI:
|
||||||
|
attrs[(attr.namespaceURI, attr.localName)] = attr.value
|
||||||
|
else:
|
||||||
|
attrs[(None, attr.name)] = attr.value
|
||||||
|
return (_base.ELEMENT, node.namespaceURI, node.nodeName,
|
||||||
|
attrs, node.hasChildNodes())
|
||||||
|
|
||||||
|
elif node.nodeType == Node.COMMENT_NODE:
|
||||||
|
return _base.COMMENT, node.nodeValue
|
||||||
|
|
||||||
|
elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
|
||||||
|
return (_base.DOCUMENT,)
|
||||||
|
|
||||||
|
else:
|
||||||
|
return _base.UNKNOWN, node.nodeType
|
||||||
|
|
||||||
|
def getFirstChild(self, node):
|
||||||
|
return node.firstChild
|
||||||
|
|
||||||
|
def getNextSibling(self, node):
|
||||||
|
return node.nextSibling
|
||||||
|
|
||||||
|
def getParentNode(self, node):
|
||||||
|
return node.parentNode
|
|
@ -0,0 +1,138 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
try:
|
||||||
|
from collections import OrderedDict
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
from ordereddict import OrderedDict
|
||||||
|
except ImportError:
|
||||||
|
OrderedDict = dict
|
||||||
|
import gettext
|
||||||
|
_ = gettext.gettext
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from pip._vendor.six import text_type
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
from ..utils import moduleFactoryFactory
|
||||||
|
|
||||||
|
tag_regexp = re.compile("{([^}]*)}(.*)")
|
||||||
|
|
||||||
|
|
||||||
|
def getETreeBuilder(ElementTreeImplementation):
|
||||||
|
ElementTree = ElementTreeImplementation
|
||||||
|
ElementTreeCommentType = ElementTree.Comment("asd").tag
|
||||||
|
|
||||||
|
class TreeWalker(_base.NonRecursiveTreeWalker):
|
||||||
|
"""Given the particular ElementTree representation, this implementation,
|
||||||
|
to avoid using recursion, returns "nodes" as tuples with the following
|
||||||
|
content:
|
||||||
|
|
||||||
|
1. The current element
|
||||||
|
|
||||||
|
2. The index of the element relative to its parent
|
||||||
|
|
||||||
|
3. A stack of ancestor elements
|
||||||
|
|
||||||
|
4. A flag "text", "tail" or None to indicate if the current node is a
|
||||||
|
text node; either the text or tail of the current element (1)
|
||||||
|
"""
|
||||||
|
def getNodeDetails(self, node):
|
||||||
|
if isinstance(node, tuple): # It might be the root Element
|
||||||
|
elt, key, parents, flag = node
|
||||||
|
if flag in ("text", "tail"):
|
||||||
|
return _base.TEXT, getattr(elt, flag)
|
||||||
|
else:
|
||||||
|
node = elt
|
||||||
|
|
||||||
|
if not(hasattr(node, "tag")):
|
||||||
|
node = node.getroot()
|
||||||
|
|
||||||
|
if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
|
||||||
|
return (_base.DOCUMENT,)
|
||||||
|
|
||||||
|
elif node.tag == "<!DOCTYPE>":
|
||||||
|
return (_base.DOCTYPE, node.text,
|
||||||
|
node.get("publicId"), node.get("systemId"))
|
||||||
|
|
||||||
|
elif node.tag == ElementTreeCommentType:
|
||||||
|
return _base.COMMENT, node.text
|
||||||
|
|
||||||
|
else:
|
||||||
|
assert type(node.tag) == text_type, type(node.tag)
|
||||||
|
# This is assumed to be an ordinary element
|
||||||
|
match = tag_regexp.match(node.tag)
|
||||||
|
if match:
|
||||||
|
namespace, tag = match.groups()
|
||||||
|
else:
|
||||||
|
namespace = None
|
||||||
|
tag = node.tag
|
||||||
|
attrs = OrderedDict()
|
||||||
|
for name, value in list(node.attrib.items()):
|
||||||
|
match = tag_regexp.match(name)
|
||||||
|
if match:
|
||||||
|
attrs[(match.group(1), match.group(2))] = value
|
||||||
|
else:
|
||||||
|
attrs[(None, name)] = value
|
||||||
|
return (_base.ELEMENT, namespace, tag,
|
||||||
|
attrs, len(node) or node.text)
|
||||||
|
|
||||||
|
def getFirstChild(self, node):
|
||||||
|
if isinstance(node, tuple):
|
||||||
|
element, key, parents, flag = node
|
||||||
|
else:
|
||||||
|
element, key, parents, flag = node, None, [], None
|
||||||
|
|
||||||
|
if flag in ("text", "tail"):
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
if element.text:
|
||||||
|
return element, key, parents, "text"
|
||||||
|
elif len(element):
|
||||||
|
parents.append(element)
|
||||||
|
return element[0], 0, parents, None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getNextSibling(self, node):
|
||||||
|
if isinstance(node, tuple):
|
||||||
|
element, key, parents, flag = node
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if flag == "text":
|
||||||
|
if len(element):
|
||||||
|
parents.append(element)
|
||||||
|
return element[0], 0, parents, None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
if element.tail and flag != "tail":
|
||||||
|
return element, key, parents, "tail"
|
||||||
|
elif key < len(parents[-1]) - 1:
|
||||||
|
return parents[-1][key + 1], key + 1, parents, None
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getParentNode(self, node):
|
||||||
|
if isinstance(node, tuple):
|
||||||
|
element, key, parents, flag = node
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if flag == "text":
|
||||||
|
if not parents:
|
||||||
|
return element
|
||||||
|
else:
|
||||||
|
return element, key, parents, None
|
||||||
|
else:
|
||||||
|
parent = parents.pop()
|
||||||
|
if not parents:
|
||||||
|
return parent
|
||||||
|
else:
|
||||||
|
return parent, list(parents[-1]).index(parent), parents, None
|
||||||
|
|
||||||
|
return locals()
|
||||||
|
|
||||||
|
getETreeModule = moduleFactoryFactory(getETreeBuilder)
|
|
@ -0,0 +1,69 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from genshi.core import QName
|
||||||
|
from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
|
||||||
|
from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
|
||||||
|
from ..constants import voidElements, namespaces
|
||||||
|
|
||||||
|
|
||||||
|
class TreeWalker(_base.TreeWalker):
|
||||||
|
def __iter__(self):
|
||||||
|
# Buffer the events so we can pass in the following one
|
||||||
|
previous = None
|
||||||
|
for event in self.tree:
|
||||||
|
if previous is not None:
|
||||||
|
for token in self.tokens(previous, event):
|
||||||
|
yield token
|
||||||
|
previous = event
|
||||||
|
|
||||||
|
# Don't forget the final event!
|
||||||
|
if previous is not None:
|
||||||
|
for token in self.tokens(previous, None):
|
||||||
|
yield token
|
||||||
|
|
||||||
|
def tokens(self, event, next):
|
||||||
|
kind, data, pos = event
|
||||||
|
if kind == START:
|
||||||
|
tag, attribs = data
|
||||||
|
name = tag.localname
|
||||||
|
namespace = tag.namespace
|
||||||
|
converted_attribs = {}
|
||||||
|
for k, v in attribs:
|
||||||
|
if isinstance(k, QName):
|
||||||
|
converted_attribs[(k.namespace, k.localname)] = v
|
||||||
|
else:
|
||||||
|
converted_attribs[(None, k)] = v
|
||||||
|
|
||||||
|
if namespace == namespaces["html"] and name in voidElements:
|
||||||
|
for token in self.emptyTag(namespace, name, converted_attribs,
|
||||||
|
not next or next[0] != END
|
||||||
|
or next[1] != tag):
|
||||||
|
yield token
|
||||||
|
else:
|
||||||
|
yield self.startTag(namespace, name, converted_attribs)
|
||||||
|
|
||||||
|
elif kind == END:
|
||||||
|
name = data.localname
|
||||||
|
namespace = data.namespace
|
||||||
|
if name not in voidElements:
|
||||||
|
yield self.endTag(namespace, name)
|
||||||
|
|
||||||
|
elif kind == COMMENT:
|
||||||
|
yield self.comment(data)
|
||||||
|
|
||||||
|
elif kind == TEXT:
|
||||||
|
for token in self.text(data):
|
||||||
|
yield token
|
||||||
|
|
||||||
|
elif kind == DOCTYPE:
|
||||||
|
yield self.doctype(*data)
|
||||||
|
|
||||||
|
elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
|
||||||
|
START_CDATA, END_CDATA, PI):
|
||||||
|
pass
|
||||||
|
|
||||||
|
else:
|
||||||
|
yield self.unknown(kind)
|
|
@ -0,0 +1,204 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
from pip._vendor.six import text_type
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
from ..treebuilders.etree import tag_regexp
|
||||||
|
|
||||||
|
from gettext import gettext
|
||||||
|
_ = gettext
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
|
||||||
|
from .. import ihatexml
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_str(s):
|
||||||
|
if s is None:
|
||||||
|
return None
|
||||||
|
elif isinstance(s, text_type):
|
||||||
|
return s
|
||||||
|
else:
|
||||||
|
return s.decode("utf-8", "strict")
|
||||||
|
|
||||||
|
|
||||||
|
class Root(object):
|
||||||
|
def __init__(self, et):
|
||||||
|
self.elementtree = et
|
||||||
|
self.children = []
|
||||||
|
if et.docinfo.internalDTD:
|
||||||
|
self.children.append(Doctype(self,
|
||||||
|
ensure_str(et.docinfo.root_name),
|
||||||
|
ensure_str(et.docinfo.public_id),
|
||||||
|
ensure_str(et.docinfo.system_url)))
|
||||||
|
root = et.getroot()
|
||||||
|
node = root
|
||||||
|
|
||||||
|
while node.getprevious() is not None:
|
||||||
|
node = node.getprevious()
|
||||||
|
while node is not None:
|
||||||
|
self.children.append(node)
|
||||||
|
node = node.getnext()
|
||||||
|
|
||||||
|
self.text = None
|
||||||
|
self.tail = None
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self.children[key]
|
||||||
|
|
||||||
|
def getnext(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
class Doctype(object):
|
||||||
|
def __init__(self, root_node, name, public_id, system_id):
|
||||||
|
self.root_node = root_node
|
||||||
|
self.name = name
|
||||||
|
self.public_id = public_id
|
||||||
|
self.system_id = system_id
|
||||||
|
|
||||||
|
self.text = None
|
||||||
|
self.tail = None
|
||||||
|
|
||||||
|
def getnext(self):
|
||||||
|
return self.root_node.children[1]
|
||||||
|
|
||||||
|
|
||||||
|
class FragmentRoot(Root):
|
||||||
|
def __init__(self, children):
|
||||||
|
self.children = [FragmentWrapper(self, child) for child in children]
|
||||||
|
self.text = self.tail = None
|
||||||
|
|
||||||
|
def getnext(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class FragmentWrapper(object):
|
||||||
|
def __init__(self, fragment_root, obj):
|
||||||
|
self.root_node = fragment_root
|
||||||
|
self.obj = obj
|
||||||
|
if hasattr(self.obj, 'text'):
|
||||||
|
self.text = ensure_str(self.obj.text)
|
||||||
|
else:
|
||||||
|
self.text = None
|
||||||
|
if hasattr(self.obj, 'tail'):
|
||||||
|
self.tail = ensure_str(self.obj.tail)
|
||||||
|
else:
|
||||||
|
self.tail = None
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self.obj, name)
|
||||||
|
|
||||||
|
def getnext(self):
|
||||||
|
siblings = self.root_node.children
|
||||||
|
idx = siblings.index(self)
|
||||||
|
if idx < len(siblings) - 1:
|
||||||
|
return siblings[idx + 1]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self.obj[key]
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return bool(self.obj)
|
||||||
|
|
||||||
|
def getparent(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self.obj)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return str(self.obj)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.obj)
|
||||||
|
|
||||||
|
|
||||||
|
class TreeWalker(_base.NonRecursiveTreeWalker):
|
||||||
|
def __init__(self, tree):
|
||||||
|
if hasattr(tree, "getroot"):
|
||||||
|
tree = Root(tree)
|
||||||
|
elif isinstance(tree, list):
|
||||||
|
tree = FragmentRoot(tree)
|
||||||
|
_base.NonRecursiveTreeWalker.__init__(self, tree)
|
||||||
|
self.filter = ihatexml.InfosetFilter()
|
||||||
|
|
||||||
|
def getNodeDetails(self, node):
|
||||||
|
if isinstance(node, tuple): # Text node
|
||||||
|
node, key = node
|
||||||
|
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
|
||||||
|
return _base.TEXT, ensure_str(getattr(node, key))
|
||||||
|
|
||||||
|
elif isinstance(node, Root):
|
||||||
|
return (_base.DOCUMENT,)
|
||||||
|
|
||||||
|
elif isinstance(node, Doctype):
|
||||||
|
return _base.DOCTYPE, node.name, node.public_id, node.system_id
|
||||||
|
|
||||||
|
elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
|
||||||
|
return _base.TEXT, node.obj
|
||||||
|
|
||||||
|
elif node.tag == etree.Comment:
|
||||||
|
return _base.COMMENT, ensure_str(node.text)
|
||||||
|
|
||||||
|
elif node.tag == etree.Entity:
|
||||||
|
return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
|
||||||
|
|
||||||
|
else:
|
||||||
|
# This is assumed to be an ordinary element
|
||||||
|
match = tag_regexp.match(ensure_str(node.tag))
|
||||||
|
if match:
|
||||||
|
namespace, tag = match.groups()
|
||||||
|
else:
|
||||||
|
namespace = None
|
||||||
|
tag = ensure_str(node.tag)
|
||||||
|
attrs = {}
|
||||||
|
for name, value in list(node.attrib.items()):
|
||||||
|
name = ensure_str(name)
|
||||||
|
value = ensure_str(value)
|
||||||
|
match = tag_regexp.match(name)
|
||||||
|
if match:
|
||||||
|
attrs[(match.group(1), match.group(2))] = value
|
||||||
|
else:
|
||||||
|
attrs[(None, name)] = value
|
||||||
|
return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
|
||||||
|
attrs, len(node) > 0 or node.text)
|
||||||
|
|
||||||
|
def getFirstChild(self, node):
|
||||||
|
assert not isinstance(node, tuple), _("Text nodes have no children")
|
||||||
|
|
||||||
|
assert len(node) or node.text, "Node has no children"
|
||||||
|
if node.text:
|
||||||
|
return (node, "text")
|
||||||
|
else:
|
||||||
|
return node[0]
|
||||||
|
|
||||||
|
def getNextSibling(self, node):
|
||||||
|
if isinstance(node, tuple): # Text node
|
||||||
|
node, key = node
|
||||||
|
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
|
||||||
|
if key == "text":
|
||||||
|
# XXX: we cannot use a "bool(node) and node[0] or None" construct here
|
||||||
|
# because node[0] might evaluate to False if it has no child element
|
||||||
|
if len(node):
|
||||||
|
return node[0]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else: # tail
|
||||||
|
return node.getnext()
|
||||||
|
|
||||||
|
return (node, "tail") if node.tail else node.getnext()
|
||||||
|
|
||||||
|
def getParentNode(self, node):
|
||||||
|
if isinstance(node, tuple): # Text node
|
||||||
|
node, key = node
|
||||||
|
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
|
||||||
|
if key == "text":
|
||||||
|
return node
|
||||||
|
# else: fallback to "normal" processing
|
||||||
|
|
||||||
|
return node.getparent()
|
|
@ -0,0 +1,63 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
|
||||||
|
COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
|
||||||
|
|
||||||
|
from . import _base
|
||||||
|
|
||||||
|
from ..constants import voidElements
|
||||||
|
|
||||||
|
|
||||||
|
class TreeWalker(_base.TreeWalker):
|
||||||
|
def __iter__(self):
|
||||||
|
ignore_until = None
|
||||||
|
previous = None
|
||||||
|
for event in self.tree:
|
||||||
|
if previous is not None and \
|
||||||
|
(ignore_until is None or previous[1] is ignore_until):
|
||||||
|
if previous[1] is ignore_until:
|
||||||
|
ignore_until = None
|
||||||
|
for token in self.tokens(previous, event):
|
||||||
|
yield token
|
||||||
|
if token["type"] == "EmptyTag":
|
||||||
|
ignore_until = previous[1]
|
||||||
|
previous = event
|
||||||
|
if ignore_until is None or previous[1] is ignore_until:
|
||||||
|
for token in self.tokens(previous, None):
|
||||||
|
yield token
|
||||||
|
elif ignore_until is not None:
|
||||||
|
raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
|
||||||
|
|
||||||
|
def tokens(self, event, next):
|
||||||
|
type, node = event
|
||||||
|
if type == START_ELEMENT:
|
||||||
|
name = node.nodeName
|
||||||
|
namespace = node.namespaceURI
|
||||||
|
attrs = {}
|
||||||
|
for attr in list(node.attributes.keys()):
|
||||||
|
attr = node.getAttributeNode(attr)
|
||||||
|
attrs[(attr.namespaceURI, attr.localName)] = attr.value
|
||||||
|
if name in voidElements:
|
||||||
|
for token in self.emptyTag(namespace,
|
||||||
|
name,
|
||||||
|
attrs,
|
||||||
|
not next or next[1] is not node):
|
||||||
|
yield token
|
||||||
|
else:
|
||||||
|
yield self.startTag(namespace, name, attrs)
|
||||||
|
|
||||||
|
elif type == END_ELEMENT:
|
||||||
|
name = node.nodeName
|
||||||
|
namespace = node.namespaceURI
|
||||||
|
if name not in voidElements:
|
||||||
|
yield self.endTag(namespace, name)
|
||||||
|
|
||||||
|
elif type == COMMENT:
|
||||||
|
yield self.comment(node.nodeValue)
|
||||||
|
|
||||||
|
elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
|
||||||
|
for token in self.text(node.nodeValue):
|
||||||
|
yield token
|
||||||
|
|
||||||
|
else:
|
||||||
|
yield self.unknown(type)
|
|
@ -0,0 +1,12 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from .py import Trie as PyTrie
|
||||||
|
|
||||||
|
Trie = PyTrie
|
||||||
|
|
||||||
|
try:
|
||||||
|
from .datrie import Trie as DATrie
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
Trie = DATrie
|
|
@ -0,0 +1,37 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from collections import Mapping
|
||||||
|
|
||||||
|
|
||||||
|
class Trie(Mapping):
|
||||||
|
"""Abstract base class for tries"""
|
||||||
|
|
||||||
|
def keys(self, prefix=None):
|
||||||
|
keys = super().keys()
|
||||||
|
|
||||||
|
if prefix is None:
|
||||||
|
return set(keys)
|
||||||
|
|
||||||
|
# Python 2.6: no set comprehensions
|
||||||
|
return set([x for x in keys if x.startswith(prefix)])
|
||||||
|
|
||||||
|
def has_keys_with_prefix(self, prefix):
|
||||||
|
for key in self.keys():
|
||||||
|
if key.startswith(prefix):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def longest_prefix(self, prefix):
|
||||||
|
if prefix in self:
|
||||||
|
return prefix
|
||||||
|
|
||||||
|
for i in range(1, len(prefix) + 1):
|
||||||
|
if prefix[:-i] in self:
|
||||||
|
return prefix[:-i]
|
||||||
|
|
||||||
|
raise KeyError(prefix)
|
||||||
|
|
||||||
|
def longest_prefix_item(self, prefix):
|
||||||
|
lprefix = self.longest_prefix(prefix)
|
||||||
|
return (lprefix, self[lprefix])
|
|
@ -0,0 +1,44 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from datrie import Trie as DATrie
|
||||||
|
from pip._vendor.six import text_type
|
||||||
|
|
||||||
|
from ._base import Trie as ABCTrie
|
||||||
|
|
||||||
|
|
||||||
|
class Trie(ABCTrie):
|
||||||
|
def __init__(self, data):
|
||||||
|
chars = set()
|
||||||
|
for key in data.keys():
|
||||||
|
if not isinstance(key, text_type):
|
||||||
|
raise TypeError("All keys must be strings")
|
||||||
|
for char in key:
|
||||||
|
chars.add(char)
|
||||||
|
|
||||||
|
self._data = DATrie("".join(chars))
|
||||||
|
for key, value in data.items():
|
||||||
|
self._data[key] = value
|
||||||
|
|
||||||
|
def __contains__(self, key):
|
||||||
|
return key in self._data
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._data)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self._data[key]
|
||||||
|
|
||||||
|
def keys(self, prefix=None):
|
||||||
|
return self._data.keys(prefix)
|
||||||
|
|
||||||
|
def has_keys_with_prefix(self, prefix):
|
||||||
|
return self._data.has_keys_with_prefix(prefix)
|
||||||
|
|
||||||
|
def longest_prefix(self, prefix):
|
||||||
|
return self._data.longest_prefix(prefix)
|
||||||
|
|
||||||
|
def longest_prefix_item(self, prefix):
|
||||||
|
return self._data.longest_prefix_item(prefix)
|
|
@ -0,0 +1,67 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
from pip._vendor.six import text_type
|
||||||
|
|
||||||
|
from bisect import bisect_left
|
||||||
|
|
||||||
|
from ._base import Trie as ABCTrie
|
||||||
|
|
||||||
|
|
||||||
|
class Trie(ABCTrie):
|
||||||
|
def __init__(self, data):
|
||||||
|
if not all(isinstance(x, text_type) for x in data.keys()):
|
||||||
|
raise TypeError("All keys must be strings")
|
||||||
|
|
||||||
|
self._data = data
|
||||||
|
self._keys = sorted(data.keys())
|
||||||
|
self._cachestr = ""
|
||||||
|
self._cachepoints = (0, len(data))
|
||||||
|
|
||||||
|
def __contains__(self, key):
|
||||||
|
return key in self._data
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._data)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self._data)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return self._data[key]
|
||||||
|
|
||||||
|
def keys(self, prefix=None):
|
||||||
|
if prefix is None or prefix == "" or not self._keys:
|
||||||
|
return set(self._keys)
|
||||||
|
|
||||||
|
if prefix.startswith(self._cachestr):
|
||||||
|
lo, hi = self._cachepoints
|
||||||
|
start = i = bisect_left(self._keys, prefix, lo, hi)
|
||||||
|
else:
|
||||||
|
start = i = bisect_left(self._keys, prefix)
|
||||||
|
|
||||||
|
keys = set()
|
||||||
|
if start == len(self._keys):
|
||||||
|
return keys
|
||||||
|
|
||||||
|
while self._keys[i].startswith(prefix):
|
||||||
|
keys.add(self._keys[i])
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
self._cachestr = prefix
|
||||||
|
self._cachepoints = (start, i)
|
||||||
|
|
||||||
|
return keys
|
||||||
|
|
||||||
|
def has_keys_with_prefix(self, prefix):
|
||||||
|
if prefix in self._data:
|
||||||
|
return True
|
||||||
|
|
||||||
|
if prefix.startswith(self._cachestr):
|
||||||
|
lo, hi = self._cachepoints
|
||||||
|
i = bisect_left(self._keys, prefix, lo, hi)
|
||||||
|
else:
|
||||||
|
i = bisect_left(self._keys, prefix)
|
||||||
|
|
||||||
|
if i == len(self._keys):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return self._keys[i].startswith(prefix)
|
|
@ -0,0 +1,82 @@
|
||||||
|
from __future__ import absolute_import, division, unicode_literals
|
||||||
|
|
||||||
|
from types import ModuleType
|
||||||
|
|
||||||
|
try:
|
||||||
|
import xml.etree.cElementTree as default_etree
|
||||||
|
except ImportError:
|
||||||
|
import xml.etree.ElementTree as default_etree
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
|
||||||
|
"surrogatePairToCodepoint", "moduleFactoryFactory"]
|
||||||
|
|
||||||
|
|
||||||
|
class MethodDispatcher(dict):
|
||||||
|
"""Dict with 2 special properties:
|
||||||
|
|
||||||
|
On initiation, keys that are lists, sets or tuples are converted to
|
||||||
|
multiple keys so accessing any one of the items in the original
|
||||||
|
list-like object returns the matching value
|
||||||
|
|
||||||
|
md = MethodDispatcher({("foo", "bar"):"baz"})
|
||||||
|
md["foo"] == "baz"
|
||||||
|
|
||||||
|
A default value which can be set through the default attribute.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, items=()):
|
||||||
|
# Using _dictEntries instead of directly assigning to self is about
|
||||||
|
# twice as fast. Please do careful performance testing before changing
|
||||||
|
# anything here.
|
||||||
|
_dictEntries = []
|
||||||
|
for name, value in items:
|
||||||
|
if type(name) in (list, tuple, frozenset, set):
|
||||||
|
for item in name:
|
||||||
|
_dictEntries.append((item, value))
|
||||||
|
else:
|
||||||
|
_dictEntries.append((name, value))
|
||||||
|
dict.__init__(self, _dictEntries)
|
||||||
|
self.default = None
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
return dict.get(self, key, self.default)
|
||||||
|
|
||||||
|
|
||||||
|
# Some utility functions to dal with weirdness around UCS2 vs UCS4
|
||||||
|
# python builds
|
||||||
|
|
||||||
|
def isSurrogatePair(data):
|
||||||
|
return (len(data) == 2 and
|
||||||
|
ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
|
||||||
|
ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
|
||||||
|
|
||||||
|
|
||||||
|
def surrogatePairToCodepoint(data):
|
||||||
|
char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
|
||||||
|
(ord(data[1]) - 0xDC00))
|
||||||
|
return char_val
|
||||||
|
|
||||||
|
# Module Factory Factory (no, this isn't Java, I know)
|
||||||
|
# Here to stop this being duplicated all over the place.
|
||||||
|
|
||||||
|
|
||||||
|
def moduleFactoryFactory(factory):
|
||||||
|
moduleCache = {}
|
||||||
|
|
||||||
|
def moduleFactory(baseModule, *args, **kwargs):
|
||||||
|
if isinstance(ModuleType.__name__, type("")):
|
||||||
|
name = "_%s_factory" % baseModule.__name__
|
||||||
|
else:
|
||||||
|
name = b"_%s_factory" % baseModule.__name__
|
||||||
|
|
||||||
|
if name in moduleCache:
|
||||||
|
return moduleCache[name]
|
||||||
|
else:
|
||||||
|
mod = ModuleType(name)
|
||||||
|
objs = factory(baseModule, *args, **kwargs)
|
||||||
|
mod.__dict__.update(objs)
|
||||||
|
moduleCache[name] = mod
|
||||||
|
return mod
|
||||||
|
|
||||||
|
return moduleFactory
|
File diff suppressed because it is too large
Load diff
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue