Skip to content
Snippets Groups Projects
  • Martin Bauer's avatar
    Separated modules into subfolders with own setup.py · 1e02cdc7
    Martin Bauer authored
    This restructuring allows for easier separation of modules into
    separate repositories later. Also, now pip install with repo url can be
    used.
    
    The setup.py files have also been updated to correctly reference each
    other. Module versions are not extracted from git state
    1e02cdc7
default_machine_file.yaml 12.78 KiB
kerncraft version: 0.7.3
clock: 2.7 GHz
cores per socket: 8
cores per NUMA domain: 8
NUMA domains per socket: 1
model type: Intel Core SandyBridge EP processor
model name: Intel(R) Xeon(R) CPU E5-2680 0 @ 2.70GHz
sockets: 2
threads per core: 2
cacheline size: 64 B
compiler:
    !!omap
    - icc: -O3 -xAVX -fno-alias -qopenmp
    - clang: -O3 -march=corei7-avx -mtune=corei7-avx -D_POSIX_C_SOURCE=200112L -fopenmp
    - gcc: -O3 -march=corei7-avx -D_POSIX_C_SOURCE=200112L -fopenmp
micro-architecture: SNB
FLOPs per cycle:
    SP: {total: 16, ADD: 8, MUL: 8}
    DP: {total: 8, ADD: 4, MUL: 4}
overlapping model:
    ports: ["0", "0DV", "1", "2", "3", "4", "5"]
    performance counter metric:
        Max(UOPS_DISPATCHED_PORT_PORT_0:PMC[0-3],
            UOPS_DISPATCHED_PORT_PORT_1:PMC[0-3],
            UOPS_DISPATCHED_PORT_PORT_4:PMC[0-3],
            UOPS_DISPATCHED_PORT_PORT_5:PMC[0-3])
non-overlapping model:
    ports: ["2D", "3D"]
    performance counter metric: T_OL + T_L1L2 + T_L2L3 + T_L3MEM
write-allocate: True
memory hierarchy:
    - level: L1
      cache per group: {
         'sets': 64, 'ways': 8, 'cl_size': 64, # 32 kB
         'replacement_policy': 'LRU',
         'write_allocate': True, 'write_back': True,
         'load_from': 'L2', 'store_to': 'L2'}
      cores per group: 1
      threads per group: 2
      groups: 16
      performance counter metrics:
          accesses: MEM_UOPS_RETIRED_LOADS:PMC[0-3]
          misses: L1D_REPLACEMENT:PMC[0-3]
          evicts: L1D_M_EVICT:PMC[0-3]
    - level: L2
      cache per group: {
         'sets': 512, 'ways': 8, 'cl_size': 64, # 256 kB
         'replacement_policy': 'LRU',
         'write_allocate': True, 'write_back': True,
         'load_from': 'L3', 'store_to': 'L3'}
      cores per group: 1
      threads per group: 2
      groups: 16
      non-overlap upstream throughput: [32 B/cy, 'half-duplex']
      performance counter metrics:
          accesses: L1D_REPLACEMENT:PMC[0-3]
          misses: L2_LINES_IN_ALL:PMC[0-3]
          evicts: L2_TRANS_L2_WB:PMC[0-3]
    - level: L3
      cache per group: {
         'sets': 20480, 'ways': 16, 'cl_size': 64, # 20 MB
         'replacement_policy': 'LRU',
         'write_allocate': True, 'write_back': True}
      cores per group: 8
      threads per group: 16
      groups: 2
      non-overlap upstream throughput: [32 B/cy, 'half-duplex']
      performance counter metrics:
          accesses: L2_LINES_IN_ALL:PMC[0-3]
          misses: (CAS_COUNT_RD:MBOX0C[01] + CAS_COUNT_RD:MBOX1C[01] +