Pythonの__xxx__の集計

(追記、まとめはこちら Pythonの__xxx__まとめ)

Python2.7.3のコードから__xxx__の形の文字列が何回でてくるかを集計した。まずは*.cと*.hの中を集計したもの。C言語レベルで使われている物もまざっている。__LP64__とか__GNUC__とか。Pythonコードに対する集計はこのリストの下。

__doc__ 1278
__LP64__ 317
__in_len__ 206
__in__ 183
__UNUSED__ 180
__len__ 169
__attribute__ 77
__dict__ 74
__init__ 70
__format__ 68
__reduce__ 62
__out__ 61
__GNUC__ 55
__new__ 51
__name__ 49
__APPLE__ 48
__builtin__ 41
__class__ 40
__error__ 40
__mode__ 36
__CYGWIN__ 34
__getitem__ 34
__LINE__ 34
__main__ 31
__bases__ 30
__ppc64__ 29
__FILE__ 28
__x86_64__ 27
__sizeof__ 26
__asm__ 25
__module__ 25
__del__ 25
__BORLANDC__ 25
__ppc__ 25
__builtins__ 23
__contains__ 22
__SH4__ 21
__setstate__ 21
__FreeBSD__ 21
__BEOS__ 20
__i386__ 20
__NetBSD__ 19
__buf__ 18
__trunc__ 17
__int__ 15
__str__ 15
__cmp__ 15
__call__ 15
__getattr__ 15
__length_hint__ 15
__coerce__ 15
__version__ 15
__eq__ 14
__QNX__ 14
__enter__ 13
__exit__ 13
__file__ 13
__getstate__ 13
__copy__ 13
__import__ 13
__path__ 12
__delitem__ 12
__setitem__ 12
__mul__ 12
__future__ 12
__WATCOMC__ 12
__index__ 12
__pow__ 11
__hash__ 11
__SI__ 10
__DI__ 10
__s390x__ 10
__abstractmethods__ 9
__unicode__ 9
__getslice__ 9
__getnewargs__ 9
__members__ 9
__self__ 9
__reduce_ex__ 9
__add__ 9
__delattr__ 8
__setattr__ 8
__weakref__ 8
__STDC__ 8
__HI__ 8
__repr__ 8
__QI__ 8
__package__ 8
__newobj__ 8
__reversed__ 7
__nonzero__ 7
__LITTLE_ENDIAN__ 7
__setslice__ 7
__getattribute__ 7
__OpenBSD__ 7
__slots__ 7
__methods__ 7
__delslice__ 7
__getinitargs__ 7
__lt__ 6
__LONG_LONG_MAX__ 6
__unused__ 6
__proc__ 6
__ne__ 6
__missing__ 6
__rpow__ 6
__iter__ 6
__subclasscheck__ 6
__DragonFly__ 6
__TURBOC__ 6
__GNUC_MINOR__ 6
__dir__ 6
__CHAR_UNSIGNED__ 5
__get__ 5
__deepcopy__ 5
__gt__ 5
__iadd__ 5
__MVS__ 5
__instancecheck__ 5
__osf__ 5
__setformat__ 5
___hello__ 5
__imul__ 5
__le__ 5
__ge__ 5
__ipow__ 5
__debug__ 5
__ctypes_from_outparam__ 4
__volatile__ 4
__MWERKS__ 4
__oct__ 4
__CRIS__ 4
__long__ 4
__SC__ 4
__all__ 4
__rmul__ 4
__complex__ 4
__hex__ 4
__delete__ 4
__float__ 4
__set__ 4
__abs__ 4
__invert__ 4
__OS400__ 4
__IBMC__ 4
__ctype_be__ 4
__getformat__ 4
__visibility__ 4
__pos__ 4
__STDC_VERSION__ 4
__GLIBC__ 4
__ctype_le__ 4
__neg__ 4
__irshift__ 3
__stderr__ 3
__rshift__ 3
__alignof__ 3
__getinitargs___ 3
__dict___ 3
__amigaos4__ 3
__metaclass__ 3
__MyCompanyName__ 3
__truediv__ 3
__STDC_SECURE_LIB__ 3
__imod__ 3
__stdout__ 3
__iand__ 3
__MIPSEB__ 3
__idiv__ 3
__ifloordiv__ 3
__main___ 3
__name___ 3
_____ 3
__xor__ 3
__ior__ 3
__and__ 3
__func__ 3
__loader__ 3
__ilshift__ 3
__class___ 3
__ixor__ 3
__isub__ 3
__author__ 3
__lshift__ 3
__mod__ 3
__itruediv__ 3
__about__ 3
__floordiv__ 3
__sub__ 3
__radd__ 3
__OS2__ 3
__or__ 3
__FRV_FDPIC__ 3
__stdin__ 3
__BUILTIN___ 3
__div__ 3
__NO_FPRS__ 3
__LDBL_MANT_DIG__ 2
__ror__ 2
__HOS_AIX__ 2
__objclass__ 2
__rdiv__ 2
__code__ 2
__powerpc64__ 2
__linux__ 2
__TOS_OS2__ 2
__LCC__ 2
__FLAT__ 2
__defaults__ 2
__reduce___ 2
__LONG_DOUBLE_128__ 2
__BIG_ENDIAN__ 2
__TIME__ 2
__rsub__ 2
__rrshift__ 2
__rlshift__ 2
__MACTYPES__ 2
__WIN32__ 2
__SMALL__ 2
__rmod__ 2
__warningregistry__ 2
__conform__ 2
__phello__ 2
__MSDOS__ 2
__getstate___ 2
__excepthook__ 2
__rxor__ 2
__MACH__ 2
__divmod__ 2
__386__ 2
__reduce_ex___ 2
__VERSION__ 2
__subclasses__ 2
__foo__ 2
__INSURE__ 2
__rand__ 2
___subclasscheck__ 2
__alloc__ 2
__________________________________ 2
__SCO_VERSION__ 2
__exit__doc__ 2
______ 2
__rdivmod__ 2
__hello__ 2
__USLC__ 2
__isdir__doc__ 2
__rtruediv__ 2
__itemsize__ 2
__name__doc__ 2
__adapt__ 2
__MEDIUM__ 2
___instancecheck__ 2
___init__ 2
___import__ 2
__PROC__ 2
__alpha__ 2
__MINGW32__ 2
__arch64__ 2
__rfloordiv__ 2
__DATE__ 2
__setstate___ 2
__displayhook__ 2
__powerpc__ 2
___new__ 2
__IBMCPP__ 2
__concat__ 1
__HITACHI__ 1
__noinline__ 1
__builtin___ 1
__hppa__ 1
__inv__ 1
__COMPACT__ 1
______________________________________________________________ 1
__gnu_linux__ 1
__iconcat__ 1
__weakrefoffset__ 1
__id__ 1
__copyright__ 1
______________________ 1
__subclasshook__ 1
__basicsize__ 1
__EXTENSIONS__ 1
__repeat__ 1
__whatever__ 1
__deprecated__ 1
__mro__ 1
__next__ 1
__64BIT__ 1
__DJGPP__ 1
__lltrace__ 1
_______________________________________________________________________ 1
__not__ 1
__________________________________________________________________ 1
_______________________________ 1
__sun__ 1
__LARGE__ 1
__flags__ 1
__EMX__ 1
__KAME__ 1
__closure__ 1
___main__ 1
__hpux__ 1
__slotnames__ 1
__irepeat__ 1
__svr4__ 1
__digital__ 1
__xx__ 1
____________ 1
__base__ 1
__rtems__ 1
__ELF__ 1
__globals__ 1
__unix__ 1
__dictoffset__ 1
__MULTI__ 1
__self_class__ 1
________ 1
__thisclass__ 1
__FUNCTION__ 1
__OPTIMIZE__ 1
_________ 1

py

一方で*.pyに対して同じ事をやったものはこちら。こっちのほうが期待していたものに近いな。コンストラクタ__init__が一番多く、ライブラリとしてインポートされたかスクリプトとして実行されたかを判断するのに使う__name__と__main__のセットが次に多い(__name__の方が600件ほど多いってことはそれ以外の使い方をしているケースが600件あるってことだな)

あるインスタンスがどのクラスであるか(__class__)や、そのインスタンスがどんなメンバを持っているか(__dict__)は、Pythonを学び始めの人にすぐ必要にはならないかなぁ。

__init__ 3058
__name__ 1729
__main__ 1191
__class__ 564
__dict__ 524
__repr__ 392
__getitem__ 378
__file__ 341
__all__ 299
__doc__ 255
__str__ 217
__iter__ 206
__hash__ 195
__len__ 175
__slots__ 175
__builtin__ 166
__eq__ 164
__call__ 161
__new__ 160
__cmp__ 157
__future__ 157
__del__ 154
__exit__ 150
__setitem__ 148
__getattr__ 138
__contains__ 133
__format__ 131
__import__ 125
__enter__ 119
__version__ 112
__add__ 107
__metaclass__ 101
__module__ 100
__bases__ 95
__debug__ 83
__delitem__ 82
__path__ 81
__setstate__ 70
__get__ 66
__mod__ 60
__int__ 58
__ne__ 58
__reduce__ 58
__coerce__ 57
__getattribute__ 57
__mul__ 55
__builtins__ 52
__setattr__ 50
__revision__ 48
__lt__ 47
__loader__ 47
__float__ 47
__getstate__ 45
__getslice__ 45
__mro__ 45
__sub__ 43
__nonzero__ 41
__package__ 40
__c__ 40
__b__ 40
__unicode__ 40
__le__ 40
__a__ 40
__missing__ 39
__test__ 39
__stderr__ 35
__gt__ 35
__complex__ 35
__floordiv__ 35
__radd__ 35
__index__ 35
__next__ 34
__out__ 33
__ge__ 33
__rmul__ 32
__ctype_be__ 32
__ctype_le__ 32
__setslice__ 31
__iadd__ 31
__author__ 30
__long__ 29
__truediv__ 29
__pow__ 28
__div__ 28
__weakref__ 26
__attribute__ 26
__rfloordiv__ 25
__set__ 25
__getformat__ 24
__imul__ 23
__aepack__ 23
__warningregistry__ 23
__code__ 22
__rsub__ 22
__stdout__ 21
__rpow__ 20
__in__ 20
__delattr__ 19
__or__ 19
__rdiv__ 17
__trunc__ 17
__and__ 17
__delslice__ 17
__reversed__ 16
__abs__ 16
__LINE__ 15
__reduce_ex__ 15
__setformat__ 15
__divmod__ 15
__subclasscheck__ 14
__rmod__ 14
__deepcopy__ 13
__getnewargs__ 13
__phello__ 13
__copy__ 13
__xor__ 13
__in_len__ 13
__getinitargs__ 13
__isub__ 12
__LP64__ 11
__iand__ 11
__credits__ 11
__GNUC__ 11
__self__ 11
__rtruediv__ 11
__CONCAT__ 11
__neg__ 11
__pos__ 10
__decimal_context__ 10
__abstractmethods__ 10
__newobj__ 10
__ior__ 10
__ixor__ 10
__delete__ 10
__trace_output__ 9
__date__ 9
__safe_for_unpickling__ 9
__spam__ 9
__GNUCLIKE___ 9
__methods__ 9
__annotations__ 9
__ipow__ 9
__ror__ 8
__rshift__ 8
__subclasshook__ 8
__rxor__ 8
__callable__ 8
__ifloordiv__ 8
__length_hint__ 8
__rdivmod__ 8
__isabstractmethod__ 8
__lshift__ 8
__rand__ 8
__idiv__ 8
__objclass__ 7
__irshift__ 7
__NETLIB_VERSION__ 7
__copyright__ 7
__instancecheck__ 7
__UNDEF__ 7
__rlshift__ 7
__op__ 7
__ispkg__ 7
__rop__ 7
__rrshift__ 7
__ilshift__ 7
__imod__ 7
__format_arg__ 6
__bool__ 6
__oct__ 6
__section__ 6
__attribute_format_arg__ 6
__importer__ 6
__itruediv__ 6
__aligned__ 6
__trace_call__ 6
__members__ 6
____C__ 6
__GLIBC__ 6
__hello__ 6
__GLIBC_MINOR__ 6
__GNU_LIBRARY__ 5
__STDC_IEC_559__ 5
__error__ 5
__return__ 5
__hex__ 5
__subclasses__ 5
__invert__ 5
__STDC_IEC_559_COMPLEX__ 5
__26__ 5
__unittest_skip__ 5
__dir__ 5
__flags__ 5
__USER_LABEL_PREFIX__ 5
__helperinit__ 4
__bootstrap__ 4
____________________________________________________________ 4
__stdin__ 4
__as_immutable__ 4
__kp__ 4
_____ 4
__setup__ 4
__func__ 4
__rawmain__ 4
__methodwrapper__ 4
__helper__ 4
__STDC_ISO_10646__ 3
__nonnull__ 3
__formalclass__ 3
___str__ 3
__BIT_TYPES_DEFINED__ 3
__basicsize__ 3
__CC_SUPPORTS___ 3
__proc__ 3
__as_temporarily_immutable__ 3
__CC_SUPPORTS___INLINE__ 3
__alloc__ 3
__realdict__ 3
______ 3
__rcmp__ 3
__runpy_pkg__ 3
__unittest_skip_why__ 3
__lock__ 3
__CC_SUPPORTS___FUNC__ 3
__pkgdir__ 2
__buf__ 2
__conform__ 2
__slotnames__ 2
__args__ 2
__test_name__ 2
__iop__ 2
__console__ 2
__temp__ 2
__subclass__ 2
__usergetattr__ 2
__value__ 2
__dummy__ 2
__attributes__ 2
__GL_GET_H__ 2
__subclassing__ 2
__meta_init__ 2
__klass__ 2
___file__ 2
__sizeof__ 2
__not_mangled__ 2
__cause__ 2
__namespace__ 2
__parents_main__ 2
__displayhook__ 2
__iconcat__ 1
__docformat__ 1
__base__ 1
__xyz__ 1
__email__ 1
__cached__ 1
__ceil__ 1
__non__zero__ 1
__license__ 1
____________________ 1
__excepthook__ 1
__traceback__ 1
___contains__ 1
___all__ 1
__dialog__ 1
__NETLIB_BASE_VERSION__ 1
__TABLE_NAMES__ 1
__irepeat__ 1
__class__with__dict__ 1
__about__ 1
__itemsize__ 1
_______________ 1
__inst__ 1
__context__ 1
__________________________ 1
__floor__ 1
__MSL__ 1
________________________ 1
__exception__ 1
__foo__ 1
__status__ 1
__cvsid__ 1
__testdir__ 1
__x__ 1
import re
import os
from collections import Counter

stat = Counter()

for root, dirs, files in os.walk('.'):
    for f in files:
        if f.endswith('.py'):
            data = file(os.path.join(root, f)).read()
            words = re.findall("__\w+__", data)
            stat.update(words)

for x in stat.most_common():
    print "%s %d" % x