* Reinterpreting the compiler source code
@ 2014-09-04 17:33 Ian Grant
[not found] ` <CAKFjmdx4Zm2_HVEUre-PYvcZrm41gKv-2z_EsGehjv6NbVxBAw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-05 1:51 ` Richard Stallman
0 siblings, 2 replies; 7+ messages in thread
From: Ian Grant @ 2014-09-04 17:33 UTC (permalink / raw)
To: Richard Stallman, Markus Kuhn, Theo deRaadt, Linus Torvalds,
guile-devel, lightning, schellr
[-- Attachment #1: Type: text/plain, Size: 17514 bytes --]
Perhaps people would enjoy reading about how one could inject trojan
semantics into a compiler in such a way that they have some resilience
to all but quite major structural changes to the target source code?
This is not just for the purposes of frightening the birds. It will
show how useful tools which automatically analyse and transform
program syntax could be. Imagine this in a 'whole program' IDE, so
that one could query certain classes of variable assignment in an
Operating System kernel, say, and program global re-writes based on
pattern matching of whole syntactic structures, rather than just the
sub-lexical matching which regular expressions offer.
Here is an implementation of a Prolog interpreter. The interpreter
itself is only 88 lines of Standard ML. Most of it is written using
list functionals like map, exists, find, partition etc. So it should
be clear that it could be quite easily translated into a fairly
efficient C program.
(* (Apart from the != predicate) This is a transliteration of the
Prolog interpreter in John Harrison's Cambridge lecture notes. *)
datatype term =
Fn of string * term list
| Var of string
datatype outcome =
Yes of (string * term) list
| No
local open GrammarSyntax
fun parser startsymbol =
let fun parse file stream lexbuf =
let val expr = startsymbol PrologLexer.Token lexbuf
in Parsing.clearParser();
expr
end handle exn => (Parsing.clearParser(); raise exn)
in parse end
fun processString pfn = fn s =>
pfn "string" s (Lexing.createLexerString s)
fun qlToString l =
let fun iter r [] = r
| iter r ((QUOTE s)::fs) = iter (r^s) fs
| iter r ((ANTIQUOTE s)::fs) = iter (r^s) fs
in iter "" l
end
val parserulesq = processString (parser PrologParser.File) o qlToString
val parsetermq = processString (parser PrologParser.OneTerm) o qlToString
fun fails s t = (printTree t;raise Fail (s^": no case."))
fun mkTerm (NonTerm("atom",
[NonTerm("constant",
[Term(name)]),
l as NonTerm("term-list",_)]))
= Fn(name,mkArgs l)
| mkTerm (NonTerm("atom",[NonTerm("constant",[Term(name)])]))
= Fn(name,[])
| mkTerm (NonTerm("atom",[NonTerm("variable",[Term(name)])]))
= Var(name)
| mkTerm (NonTerm("atom",[l as NonTerm("list",_)]))
= mkList(l)
| mkTerm t = fails "mkTerm" t
and mkList (NonTerm("list",[a as NonTerm("atom",_),
l as NonTerm("list",_)]))
= Fn(".",[mkTerm a,mkList l])
| mkList (NonTerm("list",[a as NonTerm("atom",_)]))
= Fn(".",[mkTerm a,Fn("[]",[])])
| mkList (NonTerm("list",[a as NonTerm("atom",_),
a' as NonTerm("atom",_)]))
= Fn(".",[mkTerm a,mkTerm a'])
| mkList (NonTerm("list",[]))
= Fn("[]",[])
| mkList t = fails "mkList" t
and mkArgs (NonTerm("term-list",[a as NonTerm("atom",_),
l as NonTerm("term-list",_)]))
= (mkTerm a)::(mkArgs l)
| mkArgs (NonTerm("term-list",[a as NonTerm("atom",_)]))
= [mkTerm a]
| mkArgs t = fails "mkArgs" t
fun mkTerms (NonTerm("terms",[a as NonTerm("atom",_),
ts as NonTerm("terms",_)]))
= (mkTerm a)::(mkTerms ts)
| mkTerms (NonTerm("terms",[a as NonTerm("atom",_)]))
= [mkTerm a]
| mkTerms t = fails "mkTerms" t
and mkRule (NonTerm("rule",[a as NonTerm("atom",_),
ts as NonTerm("terms",_)]))
= (mkTerm a, mkTerms ts)
| mkRule (NonTerm("rule",[a as NonTerm("atom",_)]))
= (mkTerm a,[])
| mkRule t = fails "mkRule" t
and mkRules (NonTerm("rule-list",[l as NonTerm("rule-list",_),
r as NonTerm("rule",_)]))
= (mkRule(r))::(mkRules l)
| mkRules (NonTerm("rule-list",[r as NonTerm("rule",_)]))
= [mkRule(r)]
| mkRules t = fails "mkRules" t
in
val rules = List.rev o mkRules o parserulesq
val goal = mkTerm o parsetermq
end
local
fun occurs_in x =
fn (Var y) => x = y
| (Fn(_,l)) => List.exists (occurs_in x) l
fun assoc i =
(Option.map (fn (_,v) => v)) o (List.find (fn (k,_) => k = i))
fun subs insts =
fn (tm as (Var y)) =>
(case assoc y insts of NONE => tm | SOME v => v)
| (Fn(s,l)) =>
Fn(s,List.map (subs insts) l)
fun augment1 theta (x,s) =
let val s' = subs theta s
in if occurs_in x s andalso not (s = Var(x))
then raise Fail "Occurs check."
else (x,s')
end
fun raw_augment p insts = p::(List.map (augment1 [p]) insts)
fun augment (v,t) insts =
let val t' = subs insts t
in case t'
of Var (w) =>
if w <= v
then if w = v
then insts
else raw_augment (v,t') insts
else raw_augment (w,Var(v)) insts
| _ => if occurs_in v t'
then raise Fail "Occurs check."
else raw_augment (v,t') insts
end
fun itlist2 f =
let fun iter [] [] = (fn b => b)
| iter (h::t) (h'::t') = (fn b => f h h' (itlist2 f t t' b))
| iter _ _ = raise Fail "Arity."
in iter
end
fun unify tm1 tm2 insts =
case tm1
of Var(x) =>
(case assoc x insts
of NONE => augment (x,tm2) insts
| SOME tm1' => unify tm1' tm2 insts)
| Fn(f1,args1) =>
(case tm2
of (Var(y)) =>
(case assoc y insts
of NONE => augment (y,tm1) insts
| SOME tm2' => unify tm1 tm2' insts)
| Fn(f2,args2) =>
if f1 = f2 then itlist2 unify args1 args2 insts
else raise Fail ("Constants: mismatch:
"^f1^"<>"^f2))
fun rename s =
fn (Var v) => Var("~"^v^s)
| (Fn(f,args)) => Fn(f,List.map (rename s) args)
fun rename_rule s (conc,assums) =
(rename s conc,List.map (rename s) assums)
fun expand n rules insts goals =
let fun first f =
(fn [] => raise Fail "No rules apply."
| (h::t) => (f h handle Fail _ => first f t))
fun search rule =
if goals = []
then insts
else
let fun eqgoal (Fn("!=",[a,b])) = not (a = b)
| eqgoal _ = false
val (conc,assums) = rename_rule (Int.toString n) rule
val goal = hd goals
val insts' = if eqgoal goal
then insts (* CHECK! This is
probably wrong. *)
else unify conc goal insts
fun occurs (v,_) = occurs_in v conc
orelse List.exists (occurs_in v) assums
val (loc,glob) = List.partition occurs insts'
val goals' = (List.map (subs loc) assums) @ (tl
goals)
in expand (n+1) rules glob goals'
end
in
first search rules
end
in
fun prolog rules goal =
let val insts = expand 0 rules [] [goal]
in Yes (List.filter (fn (v,_) => occurs_in v goal) insts)
end handle Fail _ => No
end
Here is an example of what it can do (a rather tired old one, but bear
with us, because we will shortly reinterpret this as something
more interesting):
val () = load "Prolog";
val () = Meta.quotation := true;
val rs = Prolog.rules `
male(albert).
male(edward).
female(alice).
female(victoria).
father_of(albert,edward).
father_of(albert,alice).
mother_of(victoria,edward).
mother_of(victoria,alice).
parents(X,M,F):-mother_of(M,X),father_of(F,X).
sister_of(X,Y):-
female(X),
parents(X,M,F),
parents(Y,M,F).
brother_of(X,Y):-
!=(X,Y),
male(X),
parents(X,M,F),
parents(Y,M,F).
`;
This defines a database of facts, the so-called ground terms, which
are those which don't contain any logical variables (which in Prolog
are typically all-upper-case identifiers). Then there are _rules_
which define relations (in the set-theoretical sense of the term, but
here also in the genealogical sense).
Note that the two-place predicate sister_of is indirectly defined in
terms another rule, which defines the parents relation. But these 88
lines of code will resolve the indirection and so they can correctly
deduce, for example:
val g = Prolog.goal `sister_of(alice,edward)`
val r = Prolog.prolog rs g
giving
val r = Prolog.Yes [] : outcome
Now look at the database of facts, and note that it is really just an
abstract syntax representation of a collection of statements in an
arbitrary language (i.e. a language with undefined syntax). Therefore
one can apply Prolog deductions, quite trivially, to sentences in a
defined programming language such as C. Now the C compiler, which we
imagine we are attacking, already has all the code to analyse and
manipulate abstract syntax representations of C programs, because that
is its job! So I hope people can see, without my having to install it
into GCC 4.9, that one can easily implement a Prolog interpreter in
very little code, which can decide a quite general class of predicates
concerning a program, the structure of which is represented in the
compiler as abstract syntax.
Now note that the predicates need not have a fixed 'depth.' Here are
two rules defining a path in an arbitrary directed graph, which is a
non-wellfounded structure capable of representing almost anything,
including the sentences of any language defined by a context-free
grammar.
Here the ground terms represent a graph in three disjoint parts, with
two cycles.
val rs'' = Prolog.rules `
edge(g,h).
edge(e,f).
edge(f,e).
edge(a,b).
edge(b,c).
edge(c,d).
edge(d,a).
path(A,B):-edge(A,B).
path(A,B):-edge(A,C),path(C,B).
`;
Prolog can determine whether or not there is a path from one vertex to
another, no matter how far apart they may be:
val g2 = Prolog.goal `path(a,d)`
val r2 = Prolog.prolog rs'' g2
It prints
val r2 = Prolog.Yes [] : outcome
And it can tell which vertices are reachable from a given vertex:
val g3 = Prolog.goal `path(b,X)`
val r3 = Prolog.prolog rs'' g3
The result is
val r3 = Prolog.Yes [("X", Prolog.Fn("c", []))] : outcome
In this implementation it only prints the first such edge, but it
could find them all.
So it should not require any great stretching of people's credulity to
see that a very few lines of intelligently written code can recognise
large-scale structure in abstract syntax, even when quite extensive
changes may have been made to the leaves.
Now we need to show it could re-write those programs. This is easy,
because in fact a C compiler already does this. It has to, because the
syntax of C is not sufficient to determine what is a well-formed C
program and what is not. Here is an example of programmed re-writing
of arbitrary fragments of a C program. What this does is split a
compound typedef statement (any compound typedef statement whatsoever)
into separate parts:
val rewrite_typedef =
rewriteq `
⟦declaration
⌜w:declaration-specifiers⌝
(init-declarator-list
⌜x:init-declarator-list⌝
⌜y:init-declarator⌝)⟧ =
(declaration-list
(declaration-list
⟦declaration ⌜w⌝ ⌜x⌝⟧)
(declaration ⌜w⌝ (init-declarator-list ⌜y⌝)))`
So taking this typedef:
val td = absyntq `typedef unsigned int ui, *pui, (*funcp) (int i);`
we produce this rewritten abstract syntax with three separate
typedef declarations:
external-declarations:
external-declaration:
declaration-list:
declaration-list:
declaration-list:
declaration-list:
declaration:
declaration-specifiers:
storage-class-specifier: typedef
declaration-specifiers:
type-specifier: unsigned
declaration-specifiers:
type-specifier: int
init-declarator-list:
init-declarator:
declarator:
direct-declarator:
identifier: ui
declaration:
declaration-specifiers:
storage-class-specifier: typedef
declaration-specifiers:
type-specifier: unsigned
declaration-specifiers:
type-specifier: int
init-declarator-list:
init-declarator:
declarator:
pointer:
direct-declarator:
identifier: pui
declaration:
declaration-specifiers:
storage-class-specifier: typedef
declaration-specifiers:
type-specifier: unsigned
declaration-specifiers:
type-specifier: int
init-declarator-list:
init-declarator:
declarator:
direct-declarator:
direct-declarator:
declarator:
pointer:
direct-declarator:
identifier: funcp
parameter-type-list:
parameter-list:
parameter-declaration:
declaration-specifiers:
type-specifier: int
declarator:
direct-declarator
identifier i
The general system, capable of handling extremely complex rewrites
like this, which can be nested and called (conditionally and/or
recursively) one from another, with parameters, is implemented in
about 400 lines of rather badly written Standard ML. It is not Prolog,
but it uses a similar unification algorithm to identify certain
large-scale structures independently of many details of particular
instances, which can vary quite significantly.
These programs, the Prolog interpreter and the Rewrite engine, compile
to 15,533 and 24,535+28,944 bytes respectively of CAML bytecode. I
would not expect a C implementation of similar algorithms to be much
more than this. If anyone doubts someone's chances of effectively
hiding this much code in a C compiler binary, then they should
consider that on my system, between GCC 4.5 and GCC 4.9, the combined
compiler binaries: gcc, cpp, cc1, as and ld grew from 10 MB to over 70
MB. So I think it would not be too difficult to hide an extra 80k of
object code in a binary. That represents an increase of around one
tenth of one percent over the total 70MB (which I consider quite
ridiculous, by the way. One possible explanation is that GCHQ and the
NSA are playing core-wars in the GCC binaries.)
Now those still reading this will appreciate that the rewrite engine,
and the Prolog interpreter, are quite abstract in the sense that they
interpret _arbitrary_ rules, and so they could easily be implemented
in such a way that the rules can be changed after the fact, by coding
new rules into the compiler's source code. The coding could be in
comments, or it could be in the choice of variable names, or it could
be in terms of the number of spaces appearing at the ends of lines
which are certain successive prime multiples of lines apart. Then, by
applying a publicly scrutinised patch to the compiler source, the
penetrators could reprogram the trap door already in all the binaries
of the previous versions of the new compiler, so that they recognise
and effectively compromise the new compiler's source structure. And
while they're at it, they may as well change the coding system used to
transmit new rules, so that no more than one instance of any encrypted
message ever exists.
This is only the tip of the ice-burg; and the less evidence I see that
people are properly addressing this 'issue', the more of these
'suggestions' I will publish over the next few days. And I think that
after not so very long, people will be less inclined to make unfounded
claims about the relative improbability of such an attack. I'm looking
forward to it: some of them are really, really whacky! And needless to
say, none of them are original. This last is just a standard instance
of the teletype string trigger trap door that Karg and Schell
described 40 years ago in the Multics Security Evaluation:
Vulnerability Analysis. That document is well worth a careful
read. The First World Information War started decades ago, we think,
but some people apparently haven't cottoned-on yet.
Happy hacking, everyone!
Ian
[-- Attachment #2: Type: text/html, Size: 22098 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: Reinterpreting the compiler source code
[not found] ` <CAKFjmdx4Zm2_HVEUre-PYvcZrm41gKv-2z_EsGehjv6NbVxBAw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2014-09-04 20:57 ` Bruno Loff
[not found] ` <CAGOfsMgZE==zCC2OmEPtbH_ph1g1-AhWikioSdZJFep8crP3vQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-05 0:13 ` William ML Leslie
1 sibling, 1 reply; 7+ messages in thread
From: Bruno Loff @ 2014-09-04 20:57 UTC (permalink / raw)
To: Ian Grant
Cc: Richard Stallman, guile-devel-mXXj517/zsQ, Theo deRaadt,
Markus Kuhn, Linus Torvalds, lightning, schellr-EkmVulN54Sk
[-- Attachment #1.1: Type: text/plain, Size: 19677 bytes --]
Even if the threat you describe is real, why couldn't it be resolved in a
simpler way?
I'm thinking certificates and signatures...
I mean, for each distribution in each architecture, there is basically a
single build configuration which needs to be certified as "trojan-free".
And this is basically already implemented, provided that I get my compiler
package from a clean source which signed the package, all that is necessary
is that the clean source took special care in ensuring that their version
is in fact bug-free...
That solves 99% of the use-cases, when you use a standard compiler provided
by a clean source. For the remaining .99%, when you use a custom-built
compiler, say, you could start with a compiler from a clean source and use
that to compile your custom-build... No bug injection would happen then.
Wouldn't that be enough? Formal certification of the cleanliness of the
compiler (if I understood correctly and this is what you propose) seems
over-the-top to me.
Happy hacking :-)
On 4 September 2014 18:33, Ian Grant <ian.a.n.grant-gM/Ye1E23mwN+BqQ9rBEUg@public.gmane.org> wrote:
> Perhaps people would enjoy reading about how one could inject trojan
> semantics into a compiler in such a way that they have some resilience
> to all but quite major structural changes to the target source code?
>
> This is not just for the purposes of frightening the birds. It will
> show how useful tools which automatically analyse and transform
> program syntax could be. Imagine this in a 'whole program' IDE, so
> that one could query certain classes of variable assignment in an
> Operating System kernel, say, and program global re-writes based on
> pattern matching of whole syntactic structures, rather than just the
> sub-lexical matching which regular expressions offer.
>
> Here is an implementation of a Prolog interpreter. The interpreter
> itself is only 88 lines of Standard ML. Most of it is written using
> list functionals like map, exists, find, partition etc. So it should
> be clear that it could be quite easily translated into a fairly
> efficient C program.
>
> (* (Apart from the != predicate) This is a transliteration of the
> Prolog interpreter in John Harrison's Cambridge lecture notes. *)
>
> datatype term =
> Fn of string * term list
> | Var of string
>
> datatype outcome =
> Yes of (string * term) list
> | No
>
> local open GrammarSyntax
> fun parser startsymbol =
> let fun parse file stream lexbuf =
> let val expr = startsymbol PrologLexer.Token lexbuf
> in Parsing.clearParser();
> expr
> end handle exn => (Parsing.clearParser(); raise exn)
> in parse end
> fun processString pfn = fn s =>
> pfn "string" s (Lexing.createLexerString s)
> fun qlToString l =
> let fun iter r [] = r
> | iter r ((QUOTE s)::fs) = iter (r^s) fs
> | iter r ((ANTIQUOTE s)::fs) = iter (r^s) fs
> in iter "" l
> end
> val parserulesq = processString (parser PrologParser.File) o qlToString
> val parsetermq = processString (parser PrologParser.OneTerm) o
> qlToString
> fun fails s t = (printTree t;raise Fail (s^": no case."))
> fun mkTerm (NonTerm("atom",
> [NonTerm("constant",
> [Term(name)]),
> l as NonTerm("term-list",_)]))
> = Fn(name,mkArgs l)
> | mkTerm (NonTerm("atom",[NonTerm("constant",[Term(name)])]))
> = Fn(name,[])
> | mkTerm (NonTerm("atom",[NonTerm("variable",[Term(name)])]))
> = Var(name)
> | mkTerm (NonTerm("atom",[l as NonTerm("list",_)]))
> = mkList(l)
> | mkTerm t = fails "mkTerm" t
> and mkList (NonTerm("list",[a as NonTerm("atom",_),
> l as NonTerm("list",_)]))
> = Fn(".",[mkTerm a,mkList l])
> | mkList (NonTerm("list",[a as NonTerm("atom",_)]))
> = Fn(".",[mkTerm a,Fn("[]",[])])
> | mkList (NonTerm("list",[a as NonTerm("atom",_),
> a' as NonTerm("atom",_)]))
> = Fn(".",[mkTerm a,mkTerm a'])
> | mkList (NonTerm("list",[]))
> = Fn("[]",[])
> | mkList t = fails "mkList" t
> and mkArgs (NonTerm("term-list",[a as NonTerm("atom",_),
> l as NonTerm("term-list",_)]))
> = (mkTerm a)::(mkArgs l)
> | mkArgs (NonTerm("term-list",[a as NonTerm("atom",_)]))
> = [mkTerm a]
> | mkArgs t = fails "mkArgs" t
> fun mkTerms (NonTerm("terms",[a as NonTerm("atom",_),
> ts as NonTerm("terms",_)]))
> = (mkTerm a)::(mkTerms ts)
> | mkTerms (NonTerm("terms",[a as NonTerm("atom",_)]))
> = [mkTerm a]
> | mkTerms t = fails "mkTerms" t
> and mkRule (NonTerm("rule",[a as NonTerm("atom",_),
> ts as NonTerm("terms",_)]))
> = (mkTerm a, mkTerms ts)
> | mkRule (NonTerm("rule",[a as NonTerm("atom",_)]))
> = (mkTerm a,[])
> | mkRule t = fails "mkRule" t
> and mkRules (NonTerm("rule-list",[l as NonTerm("rule-list",_),
> r as NonTerm("rule",_)]))
> = (mkRule(r))::(mkRules l)
> | mkRules (NonTerm("rule-list",[r as NonTerm("rule",_)]))
> = [mkRule(r)]
> | mkRules t = fails "mkRules" t
> in
> val rules = List.rev o mkRules o parserulesq
> val goal = mkTerm o parsetermq
> end
>
> local
> fun occurs_in x =
> fn (Var y) => x = y
> | (Fn(_,l)) => List.exists (occurs_in x) l
> fun assoc i =
> (Option.map (fn (_,v) => v)) o (List.find (fn (k,_) => k = i))
> fun subs insts =
> fn (tm as (Var y)) =>
> (case assoc y insts of NONE => tm | SOME v => v)
> | (Fn(s,l)) =>
> Fn(s,List.map (subs insts) l)
> fun augment1 theta (x,s) =
> let val s' = subs theta s
> in if occurs_in x s andalso not (s = Var(x))
> then raise Fail "Occurs check."
> else (x,s')
> end
> fun raw_augment p insts = p::(List.map (augment1 [p]) insts)
> fun augment (v,t) insts =
> let val t' = subs insts t
> in case t'
> of Var (w) =>
> if w <= v
> then if w = v
> then insts
> else raw_augment (v,t') insts
> else raw_augment (w,Var(v)) insts
> | _ => if occurs_in v t'
> then raise Fail "Occurs check."
> else raw_augment (v,t') insts
> end
> fun itlist2 f =
> let fun iter [] [] = (fn b => b)
> | iter (h::t) (h'::t') = (fn b => f h h' (itlist2 f t t' b))
> | iter _ _ = raise Fail "Arity."
> in iter
> end
> fun unify tm1 tm2 insts =
> case tm1
> of Var(x) =>
> (case assoc x insts
> of NONE => augment (x,tm2) insts
> | SOME tm1' => unify tm1' tm2 insts)
> | Fn(f1,args1) =>
> (case tm2
> of (Var(y)) =>
> (case assoc y insts
> of NONE => augment (y,tm1) insts
> | SOME tm2' => unify tm1 tm2' insts)
> | Fn(f2,args2) =>
> if f1 = f2 then itlist2 unify args1 args2 insts
> else raise Fail ("Constants: mismatch:
> "^f1^"<>"^f2))
> fun rename s =
> fn (Var v) => Var("~"^v^s)
> | (Fn(f,args)) => Fn(f,List.map (rename s) args)
> fun rename_rule s (conc,assums) =
> (rename s conc,List.map (rename s) assums)
> fun expand n rules insts goals =
> let fun first f =
> (fn [] => raise Fail "No rules apply."
> | (h::t) => (f h handle Fail _ => first f t))
> fun search rule =
> if goals = []
> then insts
> else
> let fun eqgoal (Fn("!=",[a,b])) = not (a = b)
> | eqgoal _ = false
> val (conc,assums) = rename_rule (Int.toString n)
> rule
> val goal = hd goals
> val insts' = if eqgoal goal
> then insts (* CHECK! This is
> probably wrong. *)
> else unify conc goal insts
> fun occurs (v,_) = occurs_in v conc
> orelse List.exists (occurs_in v) assums
> val (loc,glob) = List.partition occurs insts'
> val goals' = (List.map (subs loc) assums) @ (tl
> goals)
> in expand (n+1) rules glob goals'
> end
> in
> first search rules
> end
> in
> fun prolog rules goal =
> let val insts = expand 0 rules [] [goal]
> in Yes (List.filter (fn (v,_) => occurs_in v goal) insts)
> end handle Fail _ => No
> end
>
> Here is an example of what it can do (a rather tired old one, but bear
> with us, because we will shortly reinterpret this as something
> more interesting):
>
> val () = load "Prolog";
>
> val () = Meta.quotation := true;
>
> val rs = Prolog.rules `
> male(albert).
> male(edward).
>
> female(alice).
> female(victoria).
>
> father_of(albert,edward).
> father_of(albert,alice).
>
> mother_of(victoria,edward).
> mother_of(victoria,alice).
>
> parents(X,M,F):-mother_of(M,X),father_of(F,X).
>
> sister_of(X,Y):-
> female(X),
> parents(X,M,F),
> parents(Y,M,F).
>
> brother_of(X,Y):-
> !=(X,Y),
> male(X),
> parents(X,M,F),
> parents(Y,M,F).
> `;
>
> This defines a database of facts, the so-called ground terms, which
> are those which don't contain any logical variables (which in Prolog
> are typically all-upper-case identifiers). Then there are _rules_
> which define relations (in the set-theoretical sense of the term, but
> here also in the genealogical sense).
>
> Note that the two-place predicate sister_of is indirectly defined in
> terms another rule, which defines the parents relation. But these 88
> lines of code will resolve the indirection and so they can correctly
> deduce, for example:
>
> val g = Prolog.goal `sister_of(alice,edward)`
> val r = Prolog.prolog rs g
>
> giving
>
> val r = Prolog.Yes [] : outcome
>
> Now look at the database of facts, and note that it is really just an
> abstract syntax representation of a collection of statements in an
> arbitrary language (i.e. a language with undefined syntax). Therefore
> one can apply Prolog deductions, quite trivially, to sentences in a
> defined programming language such as C. Now the C compiler, which we
> imagine we are attacking, already has all the code to analyse and
> manipulate abstract syntax representations of C programs, because that
> is its job! So I hope people can see, without my having to install it
> into GCC 4.9, that one can easily implement a Prolog interpreter in
> very little code, which can decide a quite general class of predicates
> concerning a program, the structure of which is represented in the
> compiler as abstract syntax.
>
> Now note that the predicates need not have a fixed 'depth.' Here are
> two rules defining a path in an arbitrary directed graph, which is a
> non-wellfounded structure capable of representing almost anything,
> including the sentences of any language defined by a context-free
> grammar.
>
> Here the ground terms represent a graph in three disjoint parts, with
> two cycles.
>
> val rs'' = Prolog.rules `
> edge(g,h).
> edge(e,f).
> edge(f,e).
> edge(a,b).
> edge(b,c).
> edge(c,d).
> edge(d,a).
> path(A,B):-edge(A,B).
> path(A,B):-edge(A,C),path(C,B).
> `;
>
> Prolog can determine whether or not there is a path from one vertex to
> another, no matter how far apart they may be:
>
> val g2 = Prolog.goal `path(a,d)`
> val r2 = Prolog.prolog rs'' g2
>
> It prints
>
> val r2 = Prolog.Yes [] : outcome
>
> And it can tell which vertices are reachable from a given vertex:
>
> val g3 = Prolog.goal `path(b,X)`
> val r3 = Prolog.prolog rs'' g3
>
> The result is
>
> val r3 = Prolog.Yes [("X", Prolog.Fn("c", []))] : outcome
>
> In this implementation it only prints the first such edge, but it
> could find them all.
>
> So it should not require any great stretching of people's credulity to
> see that a very few lines of intelligently written code can recognise
> large-scale structure in abstract syntax, even when quite extensive
> changes may have been made to the leaves.
>
> Now we need to show it could re-write those programs. This is easy,
> because in fact a C compiler already does this. It has to, because the
> syntax of C is not sufficient to determine what is a well-formed C
> program and what is not. Here is an example of programmed re-writing
> of arbitrary fragments of a C program. What this does is split a
> compound typedef statement (any compound typedef statement whatsoever)
> into separate parts:
>
> val rewrite_typedef =
> rewriteq `
> ⟦declaration
> ⌜w:declaration-specifiers⌝
> (init-declarator-list
> ⌜x:init-declarator-list⌝
> ⌜y:init-declarator⌝)⟧ =
> (declaration-list
> (declaration-list
> ⟦declaration ⌜w⌝ ⌜x⌝⟧)
> (declaration ⌜w⌝ (init-declarator-list ⌜y⌝)))`
>
> So taking this typedef:
>
> val td = absyntq `typedef unsigned int ui, *pui, (*funcp) (int i);`
>
> we produce this rewritten abstract syntax with three separate
> typedef declarations:
>
> external-declarations:
> external-declaration:
> declaration-list:
> declaration-list:
> declaration-list:
> declaration-list:
> declaration:
> declaration-specifiers:
> storage-class-specifier: typedef
> declaration-specifiers:
> type-specifier: unsigned
> declaration-specifiers:
> type-specifier: int
> init-declarator-list:
> init-declarator:
> declarator:
> direct-declarator:
> identifier: ui
> declaration:
> declaration-specifiers:
> storage-class-specifier: typedef
> declaration-specifiers:
> type-specifier: unsigned
> declaration-specifiers:
> type-specifier: int
> init-declarator-list:
> init-declarator:
> declarator:
> pointer:
> direct-declarator:
> identifier: pui
> declaration:
> declaration-specifiers:
> storage-class-specifier: typedef
> declaration-specifiers:
> type-specifier: unsigned
> declaration-specifiers:
> type-specifier: int
> init-declarator-list:
> init-declarator:
> declarator:
> direct-declarator:
> direct-declarator:
> declarator:
> pointer:
> direct-declarator:
> identifier: funcp
> parameter-type-list:
> parameter-list:
> parameter-declaration:
> declaration-specifiers:
> type-specifier: int
> declarator:
> direct-declarator
> identifier i
>
> The general system, capable of handling extremely complex rewrites
> like this, which can be nested and called (conditionally and/or
> recursively) one from another, with parameters, is implemented in
> about 400 lines of rather badly written Standard ML. It is not Prolog,
> but it uses a similar unification algorithm to identify certain
> large-scale structures independently of many details of particular
> instances, which can vary quite significantly.
>
> These programs, the Prolog interpreter and the Rewrite engine, compile
> to 15,533 and 24,535+28,944 bytes respectively of CAML bytecode. I
> would not expect a C implementation of similar algorithms to be much
> more than this. If anyone doubts someone's chances of effectively
> hiding this much code in a C compiler binary, then they should
> consider that on my system, between GCC 4.5 and GCC 4.9, the combined
> compiler binaries: gcc, cpp, cc1, as and ld grew from 10 MB to over 70
> MB. So I think it would not be too difficult to hide an extra 80k of
> object code in a binary. That represents an increase of around one
> tenth of one percent over the total 70MB (which I consider quite
> ridiculous, by the way. One possible explanation is that GCHQ and the
> NSA are playing core-wars in the GCC binaries.)
>
> Now those still reading this will appreciate that the rewrite engine,
> and the Prolog interpreter, are quite abstract in the sense that they
> interpret _arbitrary_ rules, and so they could easily be implemented
> in such a way that the rules can be changed after the fact, by coding
> new rules into the compiler's source code. The coding could be in
> comments, or it could be in the choice of variable names, or it could
> be in terms of the number of spaces appearing at the ends of lines
> which are certain successive prime multiples of lines apart. Then, by
> applying a publicly scrutinised patch to the compiler source, the
> penetrators could reprogram the trap door already in all the binaries
> of the previous versions of the new compiler, so that they recognise
> and effectively compromise the new compiler's source structure. And
> while they're at it, they may as well change the coding system used to
> transmit new rules, so that no more than one instance of any encrypted
> message ever exists.
>
> This is only the tip of the ice-burg; and the less evidence I see that
> people are properly addressing this 'issue', the more of these
> 'suggestions' I will publish over the next few days. And I think that
> after not so very long, people will be less inclined to make unfounded
> claims about the relative improbability of such an attack. I'm looking
> forward to it: some of them are really, really whacky! And needless to
> say, none of them are original. This last is just a standard instance
> of the teletype string trigger trap door that Karg and Schell
> described 40 years ago in the Multics Security Evaluation:
> Vulnerability Analysis. That document is well worth a careful
> read. The First World Information War started decades ago, we think,
> but some people apparently haven't cottoned-on yet.
>
> Happy hacking, everyone!
>
> Ian
>
>
> _______________________________________________
> Lightning mailing list
> Lightning-mXXj517/zsQ@public.gmane.org
> https://lists.gnu.org/mailman/listinfo/lightning
>
>
[-- Attachment #1.2: Type: text/html, Size: 24162 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Lightning mailing list
Lightning-mXXj517/zsQ@public.gmane.org
https://lists.gnu.org/mailman/listinfo/lightning
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: Reinterpreting the compiler source code
[not found] ` <CAKFjmdx4Zm2_HVEUre-PYvcZrm41gKv-2z_EsGehjv6NbVxBAw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-04 20:57 ` Bruno Loff
@ 2014-09-05 0:13 ` William ML Leslie
1 sibling, 0 replies; 7+ messages in thread
From: William ML Leslie @ 2014-09-05 0:13 UTC (permalink / raw)
To: Ian Grant; +Cc: lightning, guile-devel
On 5 September 2014 03:33, Ian Grant <ian.a.n.grant-gM/Ye1E23mwN+BqQ9rBEUg@public.gmane.org> wrote:
> This is only the tip of the ice-burg; and the less evidence I see that
> people are properly addressing this 'issue', the more of these
> 'suggestions' I will publish over the next few days.
I would be much more interested in hearing about how you intend to
solve this 'issue'.
Are you building an ELF inspection and decompilation tool? Are you
rewriting GCC and the exec server as a bisimulation in ATS, Idris, or
Coq?
--
William Leslie
Notice:
Likely much of this email is, by the nature of copyright, covered
under copyright law. You absolutely MAY reproduce any part of it in
accordance with the copyright law of the nation you are reading this
in. Any attempt to DENY YOU THOSE RIGHTS would be illegal without
prior contractual agreement.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: Reinterpreting the compiler source code
[not found] ` <CAGOfsMgZE==zCC2OmEPtbH_ph1g1-AhWikioSdZJFep8crP3vQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2014-09-05 0:44 ` Ian Grant
0 siblings, 0 replies; 7+ messages in thread
From: Ian Grant @ 2014-09-05 0:44 UTC (permalink / raw)
To: Bruno Loff, guile-devel-mXXj517/zsQ, lightning, Richard Stallman,
Theo deRaadt, Markus Kuhn, Linus Torvalds, schellr-EkmVulN54Sk
[-- Attachment #1.1: Type: text/plain, Size: 22538 bytes --]
On Thu, Sep 4, 2014 at 4:57 PM, Bruno Loff <bruno.loff-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> Even if the threat you describe is real, why couldn't it be resolved in a
> simpler way?
>
First, here's a slightly philosophical question: do you accept that a
threat can be a real threat, even if it's not carried out? So if I say to
you, "You will do what I say, otherwise I'll destroy your project and ruin
your whole life!" Then that is a threat, whether I carry it out or not,
because it restricts your freedom: so it is real because it is an immediate
cause of your acting in a way that you would not otherwise act. If not,
then all threats would be what Richard calls "imponderables" you wouldn't
be able to know anything about them until they were carried out.
>
> I'm thinking certificates and signatures...
>
Certificates and signatures are simpler, but only if you forget how it is
you actually know they are secure. The so-called "mathematical" basis of
all public-key cryptography that I know of is the assumption that some
one-way "trapdoor" function is easy to compute, but its inverse is
computationally infeasible. Yet the knowledge this represents is not
knowledge. It's certainly nothing to do with mathematics. It is simply the
absence of knowledge to the contrary.
So these protocols are secure solely because of _some_ people's complete
ignorance of any evidence that they are not! But if someone had such
contrary evidence, would you really expect them to tell the world? I am not
suggesting for one minute that the NSA, say, have a polynomial-time time
solution to NP-complete problems: that is not even a necessary condition
for these algorithms being insecure. I am just saying that there is not
even any remotely plausible justification for the belief that the ignorance
we are assuming exists is actually real!!! We are ignorant of the ignorance
as well as the knowledge! This is really funny. People should laugh about
it a lot more than they do.
> I mean, for each distribution in each architecture, there is basically a
> single build configuration which needs to be certified as "trojan-free".
> And this is basically already implemented, provided that I get my compiler
> package from a clean source which signed the package, all that is necessary
> is that the clean source took special care in ensuring that their version
> is in fact bug-free...
>
> That solves 99% of the use-cases, when you use a standard compiler
> provided by a clean source. For the remaining .99%, when you use a
> custom-built compiler, say, you could start with a compiler from a clean
> source and use that to compile your custom-build... No bug injection would
> happen then.
>
How do you know that? If you can't know what the C compiler did when it
compiled the LibreTLS code that Apache runs, or the OpenSSL CA signing app
on the OpenBSD box in the offices of the company who sold you a
certificate?.
> Wouldn't that be enough? Formal certification of the cleanliness of the
> compiler (if I understood correctly and this is what you propose) seems
> over-the-top to me.
>
Formal semantics are not as boring as they sound. It's all about writing
real code that works, and which interprets languages and does stuff. It's
fun. If you try it, then I think you'll believe me.
> Happy hacking :-)
>
Thank you! You too.
Ian
>
>
>
> On 4 September 2014 18:33, Ian Grant <ian.a.n.grant-gM/Ye1E23mwN+BqQ9rBEUg@public.gmane.org> wrote:
>
>> Perhaps people would enjoy reading about how one could inject trojan
>> semantics into a compiler in such a way that they have some resilience
>> to all but quite major structural changes to the target source code?
>>
>> This is not just for the purposes of frightening the birds. It will
>> show how useful tools which automatically analyse and transform
>> program syntax could be. Imagine this in a 'whole program' IDE, so
>> that one could query certain classes of variable assignment in an
>> Operating System kernel, say, and program global re-writes based on
>> pattern matching of whole syntactic structures, rather than just the
>> sub-lexical matching which regular expressions offer.
>>
>> Here is an implementation of a Prolog interpreter. The interpreter
>> itself is only 88 lines of Standard ML. Most of it is written using
>> list functionals like map, exists, find, partition etc. So it should
>> be clear that it could be quite easily translated into a fairly
>> efficient C program.
>>
>> (* (Apart from the != predicate) This is a transliteration of the
>> Prolog interpreter in John Harrison's Cambridge lecture notes. *)
>>
>> datatype term =
>> Fn of string * term list
>> | Var of string
>>
>> datatype outcome =
>> Yes of (string * term) list
>> | No
>>
>> local open GrammarSyntax
>> fun parser startsymbol =
>> let fun parse file stream lexbuf =
>> let val expr = startsymbol PrologLexer.Token lexbuf
>> in Parsing.clearParser();
>> expr
>> end handle exn => (Parsing.clearParser(); raise exn)
>> in parse end
>> fun processString pfn = fn s =>
>> pfn "string" s (Lexing.createLexerString s)
>> fun qlToString l =
>> let fun iter r [] = r
>> | iter r ((QUOTE s)::fs) = iter (r^s) fs
>> | iter r ((ANTIQUOTE s)::fs) = iter (r^s) fs
>> in iter "" l
>> end
>> val parserulesq = processString (parser PrologParser.File) o qlToString
>> val parsetermq = processString (parser PrologParser.OneTerm) o
>> qlToString
>> fun fails s t = (printTree t;raise Fail (s^": no case."))
>> fun mkTerm (NonTerm("atom",
>> [NonTerm("constant",
>> [Term(name)]),
>> l as NonTerm("term-list",_)]))
>> = Fn(name,mkArgs l)
>> | mkTerm (NonTerm("atom",[NonTerm("constant",[Term(name)])]))
>> = Fn(name,[])
>> | mkTerm (NonTerm("atom",[NonTerm("variable",[Term(name)])]))
>> = Var(name)
>> | mkTerm (NonTerm("atom",[l as NonTerm("list",_)]))
>> = mkList(l)
>> | mkTerm t = fails "mkTerm" t
>> and mkList (NonTerm("list",[a as NonTerm("atom",_),
>> l as NonTerm("list",_)]))
>> = Fn(".",[mkTerm a,mkList l])
>> | mkList (NonTerm("list",[a as NonTerm("atom",_)]))
>> = Fn(".",[mkTerm a,Fn("[]",[])])
>> | mkList (NonTerm("list",[a as NonTerm("atom",_),
>> a' as NonTerm("atom",_)]))
>> = Fn(".",[mkTerm a,mkTerm a'])
>> | mkList (NonTerm("list",[]))
>> = Fn("[]",[])
>> | mkList t = fails "mkList" t
>> and mkArgs (NonTerm("term-list",[a as NonTerm("atom",_),
>> l as NonTerm("term-list",_)]))
>> = (mkTerm a)::(mkArgs l)
>> | mkArgs (NonTerm("term-list",[a as NonTerm("atom",_)]))
>> = [mkTerm a]
>> | mkArgs t = fails "mkArgs" t
>> fun mkTerms (NonTerm("terms",[a as NonTerm("atom",_),
>> ts as NonTerm("terms",_)]))
>> = (mkTerm a)::(mkTerms ts)
>> | mkTerms (NonTerm("terms",[a as NonTerm("atom",_)]))
>> = [mkTerm a]
>> | mkTerms t = fails "mkTerms" t
>> and mkRule (NonTerm("rule",[a as NonTerm("atom",_),
>> ts as NonTerm("terms",_)]))
>> = (mkTerm a, mkTerms ts)
>> | mkRule (NonTerm("rule",[a as NonTerm("atom",_)]))
>> = (mkTerm a,[])
>> | mkRule t = fails "mkRule" t
>> and mkRules (NonTerm("rule-list",[l as NonTerm("rule-list",_),
>> r as NonTerm("rule",_)]))
>> = (mkRule(r))::(mkRules l)
>> | mkRules (NonTerm("rule-list",[r as NonTerm("rule",_)]))
>> = [mkRule(r)]
>> | mkRules t = fails "mkRules" t
>> in
>> val rules = List.rev o mkRules o parserulesq
>> val goal = mkTerm o parsetermq
>> end
>>
>> local
>> fun occurs_in x =
>> fn (Var y) => x = y
>> | (Fn(_,l)) => List.exists (occurs_in x) l
>> fun assoc i =
>> (Option.map (fn (_,v) => v)) o (List.find (fn (k,_) => k = i))
>> fun subs insts =
>> fn (tm as (Var y)) =>
>> (case assoc y insts of NONE => tm | SOME v => v)
>> | (Fn(s,l)) =>
>> Fn(s,List.map (subs insts) l)
>> fun augment1 theta (x,s) =
>> let val s' = subs theta s
>> in if occurs_in x s andalso not (s = Var(x))
>> then raise Fail "Occurs check."
>> else (x,s')
>> end
>> fun raw_augment p insts = p::(List.map (augment1 [p]) insts)
>> fun augment (v,t) insts =
>> let val t' = subs insts t
>> in case t'
>> of Var (w) =>
>> if w <= v
>> then if w = v
>> then insts
>> else raw_augment (v,t') insts
>> else raw_augment (w,Var(v)) insts
>> | _ => if occurs_in v t'
>> then raise Fail "Occurs check."
>> else raw_augment (v,t') insts
>> end
>> fun itlist2 f =
>> let fun iter [] [] = (fn b => b)
>> | iter (h::t) (h'::t') = (fn b => f h h' (itlist2 f t t' b))
>> | iter _ _ = raise Fail "Arity."
>> in iter
>> end
>> fun unify tm1 tm2 insts =
>> case tm1
>> of Var(x) =>
>> (case assoc x insts
>> of NONE => augment (x,tm2) insts
>> | SOME tm1' => unify tm1' tm2 insts)
>> | Fn(f1,args1) =>
>> (case tm2
>> of (Var(y)) =>
>> (case assoc y insts
>> of NONE => augment (y,tm1) insts
>> | SOME tm2' => unify tm1 tm2' insts)
>> | Fn(f2,args2) =>
>> if f1 = f2 then itlist2 unify args1 args2 insts
>> else raise Fail ("Constants: mismatch:
>> "^f1^"<>"^f2))
>> fun rename s =
>> fn (Var v) => Var("~"^v^s)
>> | (Fn(f,args)) => Fn(f,List.map (rename s) args)
>> fun rename_rule s (conc,assums) =
>> (rename s conc,List.map (rename s) assums)
>> fun expand n rules insts goals =
>> let fun first f =
>> (fn [] => raise Fail "No rules apply."
>> | (h::t) => (f h handle Fail _ => first f t))
>> fun search rule =
>> if goals = []
>> then insts
>> else
>> let fun eqgoal (Fn("!=",[a,b])) = not (a = b)
>> | eqgoal _ = false
>> val (conc,assums) = rename_rule (Int.toString n)
>> rule
>> val goal = hd goals
>> val insts' = if eqgoal goal
>> then insts (* CHECK! This is
>> probably wrong. *)
>> else unify conc goal insts
>> fun occurs (v,_) = occurs_in v conc
>> orelse List.exists (occurs_in v) assums
>> val (loc,glob) = List.partition occurs insts'
>> val goals' = (List.map (subs loc) assums) @ (tl
>> goals)
>> in expand (n+1) rules glob goals'
>> end
>> in
>> first search rules
>> end
>> in
>> fun prolog rules goal =
>> let val insts = expand 0 rules [] [goal]
>> in Yes (List.filter (fn (v,_) => occurs_in v goal) insts)
>> end handle Fail _ => No
>> end
>>
>> Here is an example of what it can do (a rather tired old one, but bear
>> with us, because we will shortly reinterpret this as something
>> more interesting):
>>
>> val () = load "Prolog";
>>
>> val () = Meta.quotation := true;
>>
>> val rs = Prolog.rules `
>> male(albert).
>> male(edward).
>>
>> female(alice).
>> female(victoria).
>>
>> father_of(albert,edward).
>> father_of(albert,alice).
>>
>> mother_of(victoria,edward).
>> mother_of(victoria,alice).
>>
>> parents(X,M,F):-mother_of(M,X),father_of(F,X).
>>
>> sister_of(X,Y):-
>> female(X),
>> parents(X,M,F),
>> parents(Y,M,F).
>>
>> brother_of(X,Y):-
>> !=(X,Y),
>> male(X),
>> parents(X,M,F),
>> parents(Y,M,F).
>> `;
>>
>> This defines a database of facts, the so-called ground terms, which
>> are those which don't contain any logical variables (which in Prolog
>> are typically all-upper-case identifiers). Then there are _rules_
>> which define relations (in the set-theoretical sense of the term, but
>> here also in the genealogical sense).
>>
>> Note that the two-place predicate sister_of is indirectly defined in
>> terms another rule, which defines the parents relation. But these 88
>> lines of code will resolve the indirection and so they can correctly
>> deduce, for example:
>>
>> val g = Prolog.goal `sister_of(alice,edward)`
>> val r = Prolog.prolog rs g
>>
>> giving
>>
>> val r = Prolog.Yes [] : outcome
>>
>> Now look at the database of facts, and note that it is really just an
>> abstract syntax representation of a collection of statements in an
>> arbitrary language (i.e. a language with undefined syntax). Therefore
>> one can apply Prolog deductions, quite trivially, to sentences in a
>> defined programming language such as C. Now the C compiler, which we
>> imagine we are attacking, already has all the code to analyse and
>> manipulate abstract syntax representations of C programs, because that
>> is its job! So I hope people can see, without my having to install it
>> into GCC 4.9, that one can easily implement a Prolog interpreter in
>> very little code, which can decide a quite general class of predicates
>> concerning a program, the structure of which is represented in the
>> compiler as abstract syntax.
>>
>> Now note that the predicates need not have a fixed 'depth.' Here are
>> two rules defining a path in an arbitrary directed graph, which is a
>> non-wellfounded structure capable of representing almost anything,
>> including the sentences of any language defined by a context-free
>> grammar.
>>
>> Here the ground terms represent a graph in three disjoint parts, with
>> two cycles.
>>
>> val rs'' = Prolog.rules `
>> edge(g,h).
>> edge(e,f).
>> edge(f,e).
>> edge(a,b).
>> edge(b,c).
>> edge(c,d).
>> edge(d,a).
>> path(A,B):-edge(A,B).
>> path(A,B):-edge(A,C),path(C,B).
>> `;
>>
>> Prolog can determine whether or not there is a path from one vertex to
>> another, no matter how far apart they may be:
>>
>> val g2 = Prolog.goal `path(a,d)`
>> val r2 = Prolog.prolog rs'' g2
>>
>> It prints
>>
>> val r2 = Prolog.Yes [] : outcome
>>
>> And it can tell which vertices are reachable from a given vertex:
>>
>> val g3 = Prolog.goal `path(b,X)`
>> val r3 = Prolog.prolog rs'' g3
>>
>> The result is
>>
>> val r3 = Prolog.Yes [("X", Prolog.Fn("c", []))] : outcome
>>
>> In this implementation it only prints the first such edge, but it
>> could find them all.
>>
>> So it should not require any great stretching of people's credulity to
>> see that a very few lines of intelligently written code can recognise
>> large-scale structure in abstract syntax, even when quite extensive
>> changes may have been made to the leaves.
>>
>> Now we need to show it could re-write those programs. This is easy,
>> because in fact a C compiler already does this. It has to, because the
>> syntax of C is not sufficient to determine what is a well-formed C
>> program and what is not. Here is an example of programmed re-writing
>> of arbitrary fragments of a C program. What this does is split a
>> compound typedef statement (any compound typedef statement whatsoever)
>> into separate parts:
>>
>> val rewrite_typedef =
>> rewriteq `
>> ⟦declaration
>> ⌜w:declaration-specifiers⌝
>> (init-declarator-list
>> ⌜x:init-declarator-list⌝
>> ⌜y:init-declarator⌝)⟧ =
>> (declaration-list
>> (declaration-list
>> ⟦declaration ⌜w⌝ ⌜x⌝⟧)
>> (declaration ⌜w⌝ (init-declarator-list ⌜y⌝)))`
>>
>> So taking this typedef:
>>
>> val td = absyntq `typedef unsigned int ui, *pui, (*funcp) (int i);`
>>
>> we produce this rewritten abstract syntax with three separate
>> typedef declarations:
>>
>> external-declarations:
>> external-declaration:
>> declaration-list:
>> declaration-list:
>> declaration-list:
>> declaration-list:
>> declaration:
>> declaration-specifiers:
>> storage-class-specifier: typedef
>> declaration-specifiers:
>> type-specifier: unsigned
>> declaration-specifiers:
>> type-specifier: int
>> init-declarator-list:
>> init-declarator:
>> declarator:
>> direct-declarator:
>> identifier: ui
>> declaration:
>> declaration-specifiers:
>> storage-class-specifier: typedef
>> declaration-specifiers:
>> type-specifier: unsigned
>> declaration-specifiers:
>> type-specifier: int
>> init-declarator-list:
>> init-declarator:
>> declarator:
>> pointer:
>> direct-declarator:
>> identifier: pui
>> declaration:
>> declaration-specifiers:
>> storage-class-specifier: typedef
>> declaration-specifiers:
>> type-specifier: unsigned
>> declaration-specifiers:
>> type-specifier: int
>> init-declarator-list:
>> init-declarator:
>> declarator:
>> direct-declarator:
>> direct-declarator:
>> declarator:
>> pointer:
>> direct-declarator:
>> identifier: funcp
>> parameter-type-list:
>> parameter-list:
>> parameter-declaration:
>> declaration-specifiers:
>> type-specifier: int
>> declarator:
>> direct-declarator
>> identifier i
>>
>> The general system, capable of handling extremely complex rewrites
>> like this, which can be nested and called (conditionally and/or
>> recursively) one from another, with parameters, is implemented in
>> about 400 lines of rather badly written Standard ML. It is not Prolog,
>> but it uses a similar unification algorithm to identify certain
>> large-scale structures independently of many details of particular
>> instances, which can vary quite significantly.
>>
>> These programs, the Prolog interpreter and the Rewrite engine, compile
>> to 15,533 and 24,535+28,944 bytes respectively of CAML bytecode. I
>> would not expect a C implementation of similar algorithms to be much
>> more than this. If anyone doubts someone's chances of effectively
>> hiding this much code in a C compiler binary, then they should
>> consider that on my system, between GCC 4.5 and GCC 4.9, the combined
>> compiler binaries: gcc, cpp, cc1, as and ld grew from 10 MB to over 70
>> MB. So I think it would not be too difficult to hide an extra 80k of
>> object code in a binary. That represents an increase of around one
>> tenth of one percent over the total 70MB (which I consider quite
>> ridiculous, by the way. One possible explanation is that GCHQ and the
>> NSA are playing core-wars in the GCC binaries.)
>>
>> Now those still reading this will appreciate that the rewrite engine,
>> and the Prolog interpreter, are quite abstract in the sense that they
>> interpret _arbitrary_ rules, and so they could easily be implemented
>> in such a way that the rules can be changed after the fact, by coding
>> new rules into the compiler's source code. The coding could be in
>> comments, or it could be in the choice of variable names, or it could
>> be in terms of the number of spaces appearing at the ends of lines
>> which are certain successive prime multiples of lines apart. Then, by
>> applying a publicly scrutinised patch to the compiler source, the
>> penetrators could reprogram the trap door already in all the binaries
>> of the previous versions of the new compiler, so that they recognise
>> and effectively compromise the new compiler's source structure. And
>> while they're at it, they may as well change the coding system used to
>> transmit new rules, so that no more than one instance of any encrypted
>> message ever exists.
>>
>> This is only the tip of the ice-burg; and the less evidence I see that
>> people are properly addressing this 'issue', the more of these
>> 'suggestions' I will publish over the next few days. And I think that
>> after not so very long, people will be less inclined to make unfounded
>> claims about the relative improbability of such an attack. I'm looking
>> forward to it: some of them are really, really whacky! And needless to
>> say, none of them are original. This last is just a standard instance
>> of the teletype string trigger trap door that Karg and Schell
>> described 40 years ago in the Multics Security Evaluation:
>> Vulnerability Analysis. That document is well worth a careful
>> read. The First World Information War started decades ago, we think,
>> but some people apparently haven't cottoned-on yet.
>>
>> Happy hacking, everyone!
>>
>> Ian
>>
>>
>> _______________________________________________
>> Lightning mailing list
>> Lightning-mXXj517/zsQ@public.gmane.org
>> https://lists.gnu.org/mailman/listinfo/lightning
>>
>>
>
[-- Attachment #1.2: Type: text/html, Size: 27862 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Lightning mailing list
Lightning-mXXj517/zsQ@public.gmane.org
https://lists.gnu.org/mailman/listinfo/lightning
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: Reinterpreting the compiler source code
2014-09-04 17:33 Reinterpreting the compiler source code Ian Grant
[not found] ` <CAKFjmdx4Zm2_HVEUre-PYvcZrm41gKv-2z_EsGehjv6NbVxBAw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2014-09-05 1:51 ` Richard Stallman
[not found] ` <E1XPigS-000528-41-iW7gFb+/I3LZHJUXO5efmti2O/JbrIOy@public.gmane.org>
1 sibling, 1 reply; 7+ messages in thread
From: Richard Stallman @ 2014-09-05 1:51 UTC (permalink / raw)
To: Ian Grant; +Cc: guile-devel, deraadt, Markus.Kuhn, torvalds, lightning, schellr
[[[ To any NSA and FBI agents reading my email: please consider ]]]
[[[ whether defending the US Constitution against all enemies, ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]
I don't know prolog, and even if I did, reading so much code would
take a lot of time. I don't see a point, because all that example can
prove is that some subtle of sabotage is _possible_. I'd rather just
agree that it is possible. (I already did.)
I think our community's distributed build practices would make it
difficult for such a sabotage to hit the whole community. Many GCC
developers and redistributors have been bootstrapping for decades
using their old versions.
However, this suggests to me a way of investigating whether such
sabotage is present in our tools. It would be much less work than
replacing the system with new "simple" software, but it would be
a substantial job. I think it would need funding. I don't know
how to get such funding, but maybe someone else does.
--
Dr Richard Stallman
President, Free Software Foundation
51 Franklin St
Boston MA 02110
USA
www.fsf.org www.gnu.org
Skype: No way! That's nonfree (freedom-denying) software.
Use Ekiga or an ordinary phone call.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: Reinterpreting the compiler source code
[not found] ` <E1XPigS-000528-41-iW7gFb+/I3LZHJUXO5efmti2O/JbrIOy@public.gmane.org>
@ 2014-09-05 2:23 ` Ian Grant
2014-09-06 22:16 ` Richard Stallman
0 siblings, 1 reply; 7+ messages in thread
From: Ian Grant @ 2014-09-05 2:23 UTC (permalink / raw)
To: Richard Stallman, guile-devel-mXXj517/zsQ, Theo deRaadt,
Markus Kuhn, lightning, Linus Torvalds
[-- Attachment #1.1: Type: text/plain, Size: 2341 bytes --]
Well, I don't know Prolog either, which is why I typed in that code. I
don't know about available funding either, I have been trying for over two
years now to get people interested in this, and I ran out of financial
resources a while ago. I have been living by extorting money from my
friends and family. It's not an ideal lifestyle, but I don't know what else
I can do.
There was some money in the Linux Foundation after that Bleeding Heart
fiasco, and I thought perhaps we could go to them and say: "This is just
treating the symptoms, not curing the disease. We would do better to cure
the disease, so why don't you give us just 10% of what you would have
spent, and see if we can't do 90% of the work." But if you're only
interested in treating the symptoms too, then you are competing directly
with the OpenSSL people, and I'm not sure how much better you will fare.
I'm really tired now, and I will try and reply more constructively tomorrow.
Ian
On Thu, Sep 4, 2014 at 9:51 PM, Richard Stallman <rms-mXXj517/zsQ@public.gmane.org> wrote:
> [[[ To any NSA and FBI agents reading my email: please consider ]]]
> [[[ whether defending the US Constitution against all enemies, ]]]
> [[[ foreign or domestic, requires you to follow Snowden's example. ]]]
>
> I don't know prolog, and even if I did, reading so much code would
> take a lot of time. I don't see a point, because all that example can
> prove is that some subtle of sabotage is _possible_. I'd rather just
> agree that it is possible. (I already did.)
>
> I think our community's distributed build practices would make it
> difficult for such a sabotage to hit the whole community. Many GCC
> developers and redistributors have been bootstrapping for decades
> using their old versions.
>
> However, this suggests to me a way of investigating whether such
> sabotage is present in our tools. It would be much less work than
> replacing the system with new "simple" software, but it would be
> a substantial job. I think it would need funding. I don't know
> how to get such funding, but maybe someone else does.
>
> --
> Dr Richard Stallman
> President, Free Software Foundation
> 51 Franklin St
> Boston MA 02110
> USA
> www.fsf.org www.gnu.org
> Skype: No way! That's nonfree (freedom-denying) software.
> Use Ekiga or an ordinary phone call.
>
>
[-- Attachment #1.2: Type: text/html, Size: 3095 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Lightning mailing list
Lightning-mXXj517/zsQ@public.gmane.org
https://lists.gnu.org/mailman/listinfo/lightning
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: Reinterpreting the compiler source code
2014-09-05 2:23 ` Ian Grant
@ 2014-09-06 22:16 ` Richard Stallman
0 siblings, 0 replies; 7+ messages in thread
From: Richard Stallman @ 2014-09-06 22:16 UTC (permalink / raw)
To: Ian Grant; +Cc: torvalds, lightning, deraadt, Markus.Kuhn, guile-devel
[[[ To any NSA and FBI agents reading my email: please consider ]]]
[[[ whether defending the US Constitution against all enemies, ]]]
[[[ foreign or domestic, requires you to follow Snowden's example. ]]]
I can speak in favor of any serious effort to try to verify that
our binaries match our souce code.
> What we need is a language with a simple semantics for which we can write
> interpreters from scratch. It will be slow, but that doesn't matter. All we
> need it for is to generate the reference compiler that we know is secure,
> and the reference tools that we use to verify that the object code produced
> by the full 740 MB of GCC source when compiled by the 74MB gcc binaries, is
> the same object code our reference compiler produces.
I did not understand, until now, that this was meant as a way to verify GCC.
I thought you meant we should stop using our existing tools and program
in this language instead. I was not interested in that.
However, as a scheme to verify our tools and keep using them,
it might make sense. I can't judge how effective this sort of proof
might be, but I won't reject the idea.
--
Dr Richard Stallman
President, Free Software Foundation
51 Franklin St
Boston MA 02110
USA
www.fsf.org www.gnu.org
Skype: No way! That's nonfree (freedom-denying) software.
Use Ekiga or an ordinary phone call.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2014-09-06 22:16 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-09-04 17:33 Reinterpreting the compiler source code Ian Grant
[not found] ` <CAKFjmdx4Zm2_HVEUre-PYvcZrm41gKv-2z_EsGehjv6NbVxBAw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-04 20:57 ` Bruno Loff
[not found] ` <CAGOfsMgZE==zCC2OmEPtbH_ph1g1-AhWikioSdZJFep8crP3vQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-09-05 0:44 ` Ian Grant
2014-09-05 0:13 ` William ML Leslie
2014-09-05 1:51 ` Richard Stallman
[not found] ` <E1XPigS-000528-41-iW7gFb+/I3LZHJUXO5efmti2O/JbrIOy@public.gmane.org>
2014-09-05 2:23 ` Ian Grant
2014-09-06 22:16 ` Richard Stallman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).