目录

2019-中关村网络与信息安全领域专项赛

src_leak

题目给了源码如下:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
#include<iostream>    
using namespace std;    
    
typedef unsigned int uint;    
    
    
template <bool Flag, class MaybeA, class MaybeB> class IfElse;    
    
template <class MaybeA, class MaybeB>    
class IfElse<true, MaybeA, MaybeB> {    
public:    
	using ResultType = MaybeA;    
};    
    
template <class MaybeA, class MaybeB>    
class IfElse<false, MaybeA, MaybeB> {    
public:    
	using ResultType = MaybeB;    
};    
    
template <uint N, uint L, uint R> struct func1 {    
	enum { mid = (L + R + 1) / 2 };    
    
	using ResultType = typename IfElse<(N < mid * mid),    
		func1<N, L, mid - 1>, func1<N, mid, R> >::ResultType;    
    
	enum { result = ResultType::result };    
};    
    
template <uint N, uint L> struct func1<N, L, L> { enum { result = L }; };    
    
template <uint N> struct _func1 { enum { result = func1<N, 1, N>::result }; };    
    
    
template<size_t Input>    
constexpr size_t func2 = (Input % 2) + func2< (Input / 2) >;    
    
template<>    
constexpr size_t func2<0> = 0;    
    
template<size_t num>    
constexpr size_t func3 = num % 2;    
    
template<uint n, uint m>struct NEXTN {    
	const static uint value = ((n % m != 0) * n);    
};    
template<uint n, uint m>struct NEXTM {    
	const static uint value = (m * m <= n ? (m + 1) : 0);    
};    
template<uint n, uint m>struct TEST {    
	const static uint value = TEST<NEXTN<n, m>::value, NEXTM<n, m>::value>::value;    
};    
template<uint m>struct TEST<0, m> {    
	const static uint value = 0;    
};    
template<uint n>struct TEST<n, 0> {    
	const static uint value = 1;    
};    
template<uint n>struct func4 {    
	const static uint value = TEST<n, 2>::value;    
};    
template<>struct func4<1> {    
	const static uint value = 0;    
};    
template<>struct func4<2> {    
	const static uint value = 1;    
};    
    
    
int main(int argc, char**argv) {    
	//input 5 uint numbers ,x1,x2,x3,x4,x5    
	//the sum of them should be MIN    
    
    
	cout << func3< func2<x1> > << endl;    
	cout << func3< func2<x2> > << endl;    
	cout << func3< func2<x3> > << endl;    
	cout << func3< func2<x4> > << endl;    
	cout << func3< func2<x5> > << endl;    
    
	// output: 1 1 1 1 1    
    
    
	cout << _func1<x1>::result << endl;    
	cout << _func1<x2>::result << endl;    
	cout << _func1<x3>::result << endl;    
	cout << _func1<x4>::result << endl;    
	cout << _func1<x5>::result << endl;    
    
	//output: 963 4396 6666 1999 3141    
    
	//how many "1" will func4<1>,func4<2>,fun4<3>......fun4<10000> ::value  return?    
	x6 = count;    
    
    
	// your flag is flag{x1-x2-x3-x4-x5-x6}    
	// if x1=1,x2=2,x3=3,x4=4,x5=5,x6=6    
	// flag is     flag{1-2-3-4-5-6}    
    
	return 0;    
}    

模板:
https://zh.cppreference.com/w/cpp/language/template_specialization
模板特化:
https://blog.csdn.net/gatieme/article/details/50953564

  • _func1求参数的平方根,向下取整(二分法)
  • func2求参数的二进制数中1的个数
  • func3求参数的奇偶
  • func4求是否位素数(sqrt(n)以下的数是否是n的参数)

所以函数要求: x1-x5都是奇数,而且平方根分别为963 4396 6666 1999 3141, x6是10000以内素数的个数。

pyc

使用010Editor的相应模板打开,并修改pyc.bt中的以下部分:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
enum <uint16> MagicValue {    
    PY_24a0 = 62041,    
    PY_24a3 = 62051,    
    PY_24b1 = 62061,    
    PY_25a0_1 = 62071,    
    PY_25a0_2 = 62081,    
    PY_25a0_3 = 62091,    
    PY_25a0_4 = 62092,    
    PY_25b3_1 = 62101,    
    PY_25b3_2 = 62111,    
    PY_25c1 = 62121,    
    PY_25c2 = 62131,    
    PY_26a0 = 62151,    
    PY_26a1 = 62161,    
    PY_27a0_1 = 62171,    
    PY_27a0_2 = 62181,    
    PY_27a0_a = 62211,    
};    
  • Magic Number(四字节)
    /images/9978a4a673035e941d972171ca2bd329/11884068-27fc58dc2c1ff56d.png
    前两个字节是可变的,它和编译 python 文件的 python 版本有关,接下来两个字节是固定的 0D0A,转换成 ASC 码就是 \r\n
  • mtime(四字节)
    这个字段表示该 pyc 文件的编译日期,用 unix 时间戳来表示
  • PyCodeObject
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
//Include/code.h    
typedef struct {    
    PyObject_HEAD // 用它来表示一个 PyCodeObject的开始    
    int co_argcount;        /* #arguments, except *args */    
    int co_nlocals;     /* #local variables */    
    int co_stacksize;       /* #entries needed for evaluation stack */    
    int co_flags;       /* CO_..., see below */    
    PyObject *co_code;      /* instruction opcodes */    
    PyObject *co_consts;    /* list (constants used) */    
    PyObject *co_names;     /* list of strings (names used) */    
    PyObject *co_varnames;  /* tuple of strings (local variable names) */    
    PyObject *co_freevars;  /* tuple of strings (free variable names) */    
    PyObject *co_cellvars;      /* tuple of strings (cell variable names) */    
    /* The rest doesn't count for hash/cmp */    
    PyObject *co_filename;  /* string (where it was loaded from) */    
    PyObject *co_name;      /* string (name, for reference) */    
    int co_firstlineno;     /* first source line number */    
    PyObject *co_lnotab;    /* string (encoding addr<->lineno mapping) See    
                Objects/lnotab_notes.txt for details. */    
    void *co_zombieframe;     /* for optimization only (see frameobject.c) */    
    PyObject *co_weakreflist;   /* to support weakrefs to code objects */    
} PyCodeObject;    

各个字段的含义如下:

1
2
3
4
5
6
7
argcount  参数个数    
nlocals   局部变量个数    
stacksize 栈空间大小    
flags     N/A    
TYPE_STRING 表示字节码开始    
r_long n  字节码的数量    
struct Instruction inst[] 字节码序列    

PyObject 的序列化在 python/marshal.c 内实现,一般是先写入一个 byte 来标识此 PyObject 的类型,每种 PyObject 对应的类型也在 Python/marshal.c 内定义:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#define TYPE_NULL               '0'    
#define TYPE_NONE               'N'    
#define TYPE_FALSE              'F'    
#define TYPE_TRUE               'T'    
#define TYPE_STOPITER           'S'    
#define TYPE_ELLIPSIS           '.'    
#define TYPE_INT                'i'    
#define TYPE_INT64              'I'    
#define TYPE_FLOAT              'f'    
#define TYPE_BINARY_FLOAT       'g'    
#define TYPE_COMPLEX            'x'    
#define TYPE_BINARY_COMPLEX     'y'    
#define TYPE_LONG               'l'    
#define TYPE_STRING             's'    
#define TYPE_INTERNED           't'    
#define TYPE_STRINGREF          'R'    
#define TYPE_TUPLE              '('    
#define TYPE_LIST               '['    
#define TYPE_DICT               '{'    
#define TYPE_CODE               'c'    
#define TYPE_UNICODE            'u'    
#define TYPE_UNKNOWN            '?'    
#define TYPE_SET                '<'    
#define TYPE_FROZENSET          '>'    

/images/9978a4a673035e941d972171ca2bd329/11884068-efc60ce7950d8041.png
如图type等于99就是TYPE_CODE类型,PyCodeObject 的第一个部分肯定是 TYPE_CODE,表示字节码区块
使用dis模块转化为反编译字节码区块部分:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
import dis    
class read():    
def __init__(self, filename, mode):    
    # print filename, mode    
    self.file = open(filename, mode)    
def read(self, num):    
    return (self.file).read(num)    
    
pyc = read("test.pyc", "rb")    
pyc.read(30)    
target = pyc.read(0x31)    
# print bytes(target)    
dis.dis(target)    

结果:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
 0 LOAD_CONST          0 (0)    # 读取常量列表中的 0 号常量    
 3 MAKE_FUNCTION       0      # 制作一个函数    
 6 STORE_NAME          0 (0)  # 函数命名为字符串列表中的 0     
 9 LOAD_CONST          1 (1)    
12 MAKE_FUNCTION       0    
15 STORE_NAME          1 (1)    
18 LOAD_NAME           0 (0)  # 取出函数 1    
21 CALL_FUNCTION       0     # 调用    
24 STORE_NAME          2 (2) # 存储返回值    
27 LOAD_NAME           1 (1)     
30 CALL_FUNCTION       0    
33 STORE_NAME          3 (3)    
36 LOAD_NAME           2 (2)   # 取出两个返回值    
39 LOAD_NAME           3 (3)    
42 BINARY_ADD                 # 相加    
43 PRINT_ITEM                 # 打印结果    
44 PRINT_NEWLINE    
45 LOAD_CONST          2 (2)    
48 RETURN_VALUE    

/images/9978a4a673035e941d972171ca2bd329/11884068-bcba931ae7ce9c12.png
混淆的一般思路:
/images/9978a4a673035e941d972171ca2bd329/11884068-409d19f3223da194.png
uncompyle 的工作原理和一般的反编译器类似,它会尽力去匹配每一条指令,尝试将所有指令都覆盖到,但是在解析上面的代码时,碰到 load 不存在的常量时就会出错,无法继续反编译。
所有的指令可以分为两类,不需要参数和需要参数的,Python字节码在设计的时候故意把没有参数的指令分配在了对应编号的低位,高位都是有参数的,以Include/opcode.h中的HAVE_ARGUMENT分界。他们的在二进制级别上的组织是这样的:

[指令] 不需要参数的指令只占用一个字节
[指令] [参数低字节] [参数高字节] 需要参数的指令占用三个字节,一个字节指令,两个字节参数
本题中经过混淆的字节码:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
>>> list(map(ord,code.co_code[:9]))    
[113, 158, 2, 136, 104, 110, 126, 58, 140]    
>>> dis.opname[113]    
'JUMP_ABSOLUTE'    
>>> 2*256+158    
670    
>>> dis.opname[136]    
'LOAD_DEREF'    
>>> 110*256+104    
28264    

从上面可以看到,第一条指令是JUMP_ABSOLTE 670,这个offset的指令是真实存在的,所以指令合法。但是第二条指令应该是LOAD_DEREF 28264,这个index的对象并不存在,在dis尝试解析的时候就会崩溃。

实际上因为之前的跳转指令所以第二条的非法指令并不会被真实执行到,所以pyc文件作者是故意加入不影响执行的非法指令触发分析软件崩溃,阻碍对该pyc文件的分析。

去混淆

用Apeng师傅的方法可以去掉混淆

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# python2 disasm_anti.py py.pyc    
import dis, marshal, struct, sys, time, types    
from opcode import *    
    
def ana_branch(code, i, hits):    
    if i > len(code):    
        return    
    if i in hits:    
        return    
    else:    
        hits.append(i)    
    c = code[i]    
    op = ord(c)    
    if op == 111 or op == 112 or op == 114 or op == 115 or op == 120 or op == 93:    
        oparg = ord(code[i+1]) + ord(code[i+2])*256    
        if op == 120 or op == 93:    
            oparg += i    
            oparg += 3    
        ana_branch(code, oparg, hits)    
        ana_branch(code, i+3, hits)    
    elif op == 110:    
        oparg = ord(code[i+1]) + ord(code[i+2])*256    
        ana_branch(code, i + oparg + 3, hits)    
    elif op == 113:    
        oparg = ord(code[i+1]) + ord(code[i+2])*256    
        ana_branch(code, oparg, hits)    
    else:    
        if op>=HAVE_ARGUMENT:    
            ana_branch(code, i+3, hits)    
        else:    
            ana_branch(code, i+1, hits)    
    
def findlinestarts(code):    
    """Find the offsets in a byte code which are start of lines in the source.    
      
    Generate pairs (offset, lineno) as described in Python/compile.c.    
      
    """    
    byte_increments = [ord(c) for c in code.co_lnotab[0::2]]    
    line_increments = [ord(c) for c in code.co_lnotab[1::2]]    
      
    lastlineno = None    
    lineno = code.co_firstlineno    
    addr = 0    
    for byte_incr, line_incr in zip(byte_increments, line_increments):    
        if byte_incr:    
            if lineno != lastlineno:    
                yield (addr, lineno)    
                lastlineno = lineno    
            addr += byte_incr    
        lineno += line_incr    
    if lineno != lastlineno:    
        yield (addr, lineno)    
    
def findhits(code):    
    hits = []    
    n = len(code)    
    i = 0    
    ana_branch(code, i, hits)    
    hits.sort()    
    return hits    
    
def anti_findlabels(code):    
    """Detect all offsets in a byte code which are jump targets.    
    Return the list of offsets.    
    """    
    hits = findhits(code)    
    labels = []    
    n = len(code)    
    i = 0    
    while i < n:    
        if i not in hits:    
            i+=1    
            continue    
        c = code[i]    
        op = ord(c)    
        i = i+1    
        if op >= HAVE_ARGUMENT:    
            oparg = ord(code[i]) + ord(code[i+1])*256    
            i = i+2    
            label = -1    
            if op in hasjrel:    
                label = i+oparg    
            elif op in hasjabs:    
                label = oparg    
            if label >= 0:    
                if label not in labels:    
                    labels.append(label)    
    return labels    
    
def dis_anti_obf(co, lasti = -1):    
    """Disassemble a code object, anti obf"""    
    anti_code = ""    
    code = co.co_code    
    hits = findhits(code)    
    labels = anti_findlabels(code)    
    linestarts = dict(findlinestarts(co))    
    n = len(code)    
    i = 0    
    i = 0    
    extended_arg = 0    
    free = None    
    while i < n:    
        if i not in hits:    
            i+=1    
            anti_code+="\x09"    
            continue    
        c = code[i]    
        op = ord(c)    
        if i in linestarts:    
            if i > 0:    
                print    
            print "%3d" % linestarts[i],    
        else:    
            print '   ',    
          
        if i == lasti: print '-->',    
        else: print '   ',    
        if i in labels: print '>>',    
        else: print '  ',    
        print repr(i).rjust(4),    
        print opname[op].ljust(20),    
        anti_code += code[i]    
        i = i+1    
        if op >= HAVE_ARGUMENT:    
            oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg    
            extended_arg = 0    
            anti_code+=code[i]    
            anti_code+=code[i+1]    
            i = i+2    
            if op == EXTENDED_ARG:    
                extended_arg = oparg*65536L    
            print repr(oparg).rjust(5),    
            if op in hasconst:    
                print '(' + repr(co.co_consts[oparg]) + ')',    
            elif op in hasname:    
                print '(' + co.co_names[oparg] + ')',    
            elif op in hasjrel:    
                print '(to ' + repr(i + oparg) + ')',    
            elif op in haslocal:    
                print '(' + co.co_varnames[oparg] + ')',    
            elif op in hascompare:    
                print '(' + cmp_op[oparg] + ')',    
            elif op in hasfree:    
                if free is None:    
                    free = co.co_cellvars + co.co_freevars    
                print '(' + free[oparg] + ')',    
        print    
print "patch code:"    
print(anti_code.encode("hex"))    
    
    
    
    
def show_file(fname):    
    f = open(fname, "rb")    
    magic = f.read(4)    
    moddate = f.read(4)    
    modtime = time.asctime(time.localtime(struct.unpack('L', moddate)[0]))    
    print "magic %s" % (magic.encode('hex'))    
    print "moddate %s (%s)" % (moddate.encode('hex'), modtime)    
    code = marshal.load(f)    
    show_code(code)    
      
    def show_code(code, indent=''):    
    print "%scode" % indent    
    indent += '   '    
    print "%sargcount %d" % (indent, code.co_argcount)    
    print "%snlocals %d" % (indent, code.co_nlocals)    
    print "%sstacksize %d" % (indent, code.co_stacksize)    
    print "%sflags %04x" % (indent, code.co_flags)    
    show_hex("code", code.co_code, indent=indent)    
    dis_anti_obf(code)    
    print "%sconsts" % indent    
    for const in code.co_consts:    
        if type(const) == types.CodeType:    
            show_code(const, indent+'   ')    
        else:    
            print "   %s%r" % (indent, const)    
    print "%snames %r" % (indent, code.co_names)    
    print "%svarnames %r" % (indent, code.co_varnames)    
    print "%sfreevars %r" % (indent, code.co_freevars)    
    print "%scellvars %r" % (indent, code.co_cellvars)    
    print "%sfilename %r" % (indent, code.co_filename)    
    print "%sname %r" % (indent, code.co_name)    
    print "%sfirstlineno %d" % (indent, code.co_firstlineno)    
    show_hex("lnotab", code.co_lnotab, indent=indent)    
     
def show_hex(label, h, indent):    
h = h.encode('hex')    
if len(h) < 60:    
    print "%s%s %s" % (indent, label, h)    
else:    
    print "%s%s" % (indent, label)    
    for i in range(0, len(h), 60):    
        print "%s   %s" % (indent, h[i:i+60])    
    
show_file(sys.argv[1])    

或者使用FXTi师傅的方法将不合法的指令转换为nop:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import marshal, sys, opcode, types, dis    
    
NOP = 9    
    
HAVE_ARGUMENT = 90    
    
JUMP_FORWARD = 110    
JUMP_IF_FALSE_OR_POP = 111    
JUMP_IF_TRUE_OR_POP = 112    
JUMP_ABSOLUTE = 113    
POP_JUMP_IF_FALSE = 114    
POP_JUMP_IF_TRUE = 115    
    
CONTINUE_LOOP = 119    
FOR_ITER = 93    
    
RETURN_VALUE = 83    
    
used_set = set()    
    
def deconf_inner(code, now):    
global used_set    
    
while code[now] != RETURN_VALUE:    
    if now in used_set:    
        break    
    used_set.add(now)    
    if code[now] >= HAVE_ARGUMENT:    
        used_set.add(now+1)    
        used_set.add(now+2)    
    op = code[now]    
    
    #print(str(now) + " " + opcode.opname[op])    
    
    if op == JUMP_FORWARD:    
        arg = code[now+2] << 8 | code[now+1]    
        now += arg + 3    
        continue    
    
    elif op == JUMP_ABSOLUTE:    
        arg = code[now+2] << 8 | code[now+1]    
        now = arg    
        continue    
    
    elif op == JUMP_IF_TRUE_OR_POP:    
        arg = code[now+2] << 8 | code[now+1]     
        deconf_inner(code, arg)    
    
    elif op == JUMP_IF_FALSE_OR_POP:    
        arg = code[now+2] << 8 | code[now+1]     
        deconf_inner(code, arg)    
    
    elif op == POP_JUMP_IF_TRUE:    
        arg = code[now+2] << 8 | code[now+1]     
        deconf_inner(code, arg)    
    
    elif op == POP_JUMP_IF_FALSE:     
        arg = code[now+2] << 8 | code[now+1]     
        deconf_inner(code, arg)    
    
    elif op == CONTINUE_LOOP:    
        arg = code[now+2] << 8 | code[now+1]     
        deconf_inner(code, arg)    
    
    elif op == FOR_ITER:     
        arg = code[now+2] << 8 | code[now+1]     
        deconf_inner(code, now + arg + 3)    
    
    if op < HAVE_ARGUMENT:    
        now += 1    
    else:    
        now += 3    
    
used_set.add(now)    
if code[now] >= HAVE_ARGUMENT:    
    used_set.add(now+1)    
    used_set.add(now+2)    
    
def deconf(code):    
    global used_set    
    
used_set = set() #Remember to clean up used_set for every target function    
    
cod = list(map(ord, code))    
deconf_inner(cod, 0)    
    
for i in range(len(cod)):    
    if i not in used_set:    
        cod[i] = NOP    
    
return "".join(list(map(chr, cod)))    
    
with open(sys.argv[1], 'rb') as f:    
    header = f.read(8)    
    code = marshal.load(f)    
    
'''    
print(code.co_consts[3].co_name)    
print(dis.dis(deconf(code.co_consts[3].co_code)))    
'''    
    
consts = list()    
    
for i in range(len(code.co_consts)):    
    if hasattr(code.co_consts[i], 'co_code'):    
        consts.append(types.CodeType(code.co_consts[i].co_argcount,    
            # c.co_kwonlyargcount,  Add this in Python3    
            code.co_consts[i].co_nlocals,    
            code.co_consts[i].co_stacksize,    
            code.co_consts[i].co_flags,    
            deconf(code.co_consts[i].co_code),    
            code.co_consts[i].co_consts,    
            code.co_consts[i].co_names,    
            code.co_consts[i].co_varnames,    
            code.co_consts[i].co_filename,    
            code.co_consts[i].co_name,    
            code.co_consts[i].co_firstlineno,    
            code.co_consts[i].co_lnotab,   # In general, You should adjust this    
            code.co_consts[i].co_freevars,    
            code.co_consts[i].co_cellvars))    
    else:    
        consts.append(code.co_consts[i])    
    
mode = types.CodeType(code.co_argcount,    
# c.co_kwonlyargcount,  Add this in Python3    
code.co_nlocals,    
code.co_stacksize,    
code.co_flags,    
deconf(code.co_code),    
tuple(consts),    
code.co_names,    
code.co_varnames,    
code.co_filename,    
code.co_name,    
code.co_firstlineno,    
code.co_lnotab,   # In general, You should adjust this    
code.co_freevars,    
code.co_cellvars)    
    
f = open(sys.argv[1]+".mod", 'wb')     
f.write(header)    
marshal.dump(mode, f)    

NOP指令去除与块的合并

fxti师傅给了一个工具:
https://github.com/extremecoders-re/bytecode_simplifier
利用这个工具可以去除上面去混淆以后生成的全是NOP指令的块,以及合并一些块
完成之后使用uncompyle6可以反汇编一下(反编译还是会失败)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
 L.   1         0  LOAD_CONST               2694209818L    
            3  STORE_FAST            0  'DIVIDER'    
            6  JUMP_FORWARD          0  'to 9'    
          9_0  COME_FROM             6  '6'    
            9  LOAD_CONST               4130330538L    
    
 L.   3        12  LOAD_FAST             0  'DIVIDER'    
           15  COMPARE_OP            2  ==    
           18  POP_JUMP_IF_TRUE    366  'to 366'    
    
 L.   9        21  LOAD_CONST               3168701571L    
           24  LOAD_FAST             0  'DIVIDER'    
           27  COMPARE_OP            2  ==    
    
 L.  14        30  POP_JUMP_IF_TRUE    345  'to 345'    
           33  LOAD_CONST               3715947653L    
           36  LOAD_FAST             0  'DIVIDER'    
    
 L.  17        39  COMPARE_OP            2  ==    
           42  POP_JUMP_IF_TRUE    324  'to 324'    
           45  LOAD_CONST               2694209818L    
    
 L.  20        48  LOAD_FAST             0  'DIVIDER'    
           51  COMPARE_OP            2  ==    
           54  POP_JUMP_IF_TRUE    306  'to 306'    
    
 L.  38        57  LOAD_CONST               651787064    
           60  LOAD_FAST             0  'DIVIDER'    
           63  COMPARE_OP            2  ==    
           66  POP_JUMP_IF_TRUE    285  'to 285'    
           69  LOAD_CONST               3521152606L    
           72  LOAD_FAST             0  'DIVIDER'    
           75  COMPARE_OP            2  ==    
           78  POP_JUMP_IF_TRUE    264  'to 264'    
           81  LOAD_CONST               2730391645L    
           84  LOAD_FAST             0  'DIVIDER'    
           87  COMPARE_OP            2  ==    
           90  POP_JUMP_IF_TRUE    246  'to 246'    
           93  LOAD_CONST               4084147187L    
           96  LOAD_FAST             0  'DIVIDER'    
           99  COMPARE_OP            2  ==    
          102  POP_JUMP_IF_TRUE    222  'to 222'    
          105  LOAD_CONST               1860581437    
          108  LOAD_FAST             0  'DIVIDER'    
          111  COMPARE_OP            2  ==    
          114  POP_JUMP_IF_TRUE    204  'to 204'    
          117  LOAD_CONST               3816944324L    
          120  LOAD_FAST             0  'DIVIDER'    
          123  COMPARE_OP            2  ==    
          126  POP_JUMP_IF_TRUE    203  'to 203'    
          129  LOAD_CONST               394367122    
          132  LOAD_FAST             0  'DIVIDER'    
          135  COMPARE_OP            2  ==    
          138  POP_JUMP_IF_TRUE    181  'to 181'    
          141  LOAD_CONST               1627830889    
          144  LOAD_FAST             0  'DIVIDER'    
          147  COMPARE_OP            2  ==    
          150  POP_JUMP_IF_TRUE    157  'to 157'    
          153  LOAD_CONST               None    
          156  RETURN_END_IF        
        157_0  COME_FROM           150  '150'    
          157  STORE_NAME            0  'sys'    
          160  LOAD_CODE                <code_object str2hex>    
          163  MAKE_FUNCTION_0       0  None    
          166  STORE_NAME            1  'str2hex'    
          169  LOAD_CODE                <code_object hex2str>    
          172  LOAD_CONST               3715947653L    
          175  STORE_FAST            0  'DIVIDER'    
          178  JUMP_BACK             9  'to 9'    
          181  LOAD_NAME            10  'flag'    
          184  CALL_FUNCTION_1       1  None    
          187  CALL_FUNCTION_1       1  None    
          190  POP_TOP              
          191  LOAD_CONST               None    
          194  LOAD_CONST               3816944324L    
          197  STORE_FAST            0  'DIVIDER'    
          200  JUMP_BACK             9  'to 9'    
          203  RETURN_END_IF        
        204_0  COME_FROM           114  '114'    
          204  LOAD_CONST               97    
          207  LOAD_CONST               103    
          210  LOAD_CONST               58    
          213  LOAD_CONST               2730391645L    
          216  STORE_FAST            0  'DIVIDER'    
          219  JUMP_BACK             9  'to 9'    
          222  LOAD_ATTR             6  'stdout'    
          225  LOAD_ATTR             7  'write'    
          228  LOAD_NAME             2  'hex2str'    
          231  LOAD_CONST               102    
          234  LOAD_CONST               108    
          237  LOAD_CONST               1860581437    
          240  STORE_FAST            0  'DIVIDER'    
          243  JUMP_BACK             9  'to 9'    
          246  BUILD_LIST_5          5     
          249  CALL_FUNCTION_1       1  None    
          252  CALL_FUNCTION_1       1  None    
          255  LOAD_CONST               4130330538L    
          258  STORE_FAST            0  'DIVIDER'    
          261  JUMP_BACK             9  'to 9'    
          264  CALL_FUNCTION_1       1  None    
          267  STORE_NAME           10  'flag'    
          270  LOAD_NAME             5  'count'    
          273  LOAD_NAME             1  'str2hex'    
          276  LOAD_CONST               394367122    
          279  STORE_FAST            0  'DIVIDER'    
          282  JUMP_BACK             9  'to 9'    
          285  STORE_NAME            3  'p_s'    
          288  LOAD_CODE                <code_object p_f>    
          291  MAKE_FUNCTION_0       0  None    
          294  STORE_NAME            4  'p_f'    
          297  LOAD_CONST               3168701571L    
          300  STORE_FAST            0  'DIVIDER'    
          303  JUMP_BACK             9  'to 9'    
          306  LOAD_CONST               -1    
          309  LOAD_CONST               None    
          312  IMPORT_NAME           0  'sys'    
          315  LOAD_CONST               1627830889    
          318  STORE_FAST            0  'DIVIDER'    
          321  JUMP_BACK             9  'to 9'    
          324  MAKE_FUNCTION_0       0  None    
          327  STORE_NAME            2  'hex2str'    
          330  LOAD_CODE                <code_object p_s>    
          333  MAKE_FUNCTION_0       0  None    
          336  LOAD_CONST               651787064    
          339  STORE_FAST            0  'DIVIDER'    
          342  JUMP_BACK             9  'to 9'    
          345  LOAD_CODE                <code_object count>    
          348  MAKE_FUNCTION_0       0  None    
          351  STORE_NAME            5  'count'    
          354  LOAD_NAME             0  'sys'    
          357  LOAD_CONST               4084147187L    
          360  STORE_FAST            0  'DIVIDER'    
          363  JUMP_BACK             9  'to 9'    
          366  POP_TOP              
          367  LOAD_NAME             0  'sys'    
          370  LOAD_ATTR             8  'stdin'    
          373  LOAD_ATTR             9  'read'    
          376  LOAD_CONST               38    
          379  LOAD_CONST               3521152606L    
          382  STORE_FAST            0  'DIVIDER'    
          385  JUMP_BACK             9  'to 9'    

然后我暂时只会看这个字节码,fxti师傅那个自己去混淆的方案没开源,我打算自己仔细研究一下这个反编译的代码,看能不能按照那个思路复现一下。

参考链接

https://apeng.fun/2019/08/16/2019zgc_quals/
PYC 文件的简单分析
PYC文件格式分析
Python字节码解混淆
Python字节码解混淆之反控制流扁平化